In [1]:
!pip install torch transformers bitsandbytes -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
from transformers import pipeline, BitsAndBytesConfig
from IPython.display import clear_output
from google.colab import output

In [3]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

In [4]:
class ChatBot:
    _instance = None
    _current_model = None

    def __init__(self, model_slug=None):
        if model_slug and model_slug != ChatBot._current_model:
            self.load_model(model_slug)
            ChatBot._current_model = model_slug

        self.messages = []
        self.max_tokens = 2048
        self.temperature = 0.5
        self.top_k = 100
        self.top_p = 0.95

    @classmethod
    def get_instance(cls, model_slug=None):
        if not cls._instance or (model_slug and model_slug != cls._current_model):
            cls._instance = cls(model_slug)
        return cls._instance

    def load_model(self, model_slug):
        print(f"Loading model {model_slug}...")
        self.pipeline = pipeline(
            "text-generation",
            model=model_slug,
            model_kwargs={"quantization_config": quantization_config},
            device_map="auto",
        )
        clear_output()
        print("Model loaded successfully!")

    def reset_conversation(self, system_message):
        """Reset the conversation with a new system message"""
        self.messages = [{"role": "system", "content": system_message}]

    def get_response(self, user_input):
        """Get response with current parameters"""
        self.messages.append({"role": "user", "content": user_input})
        outputs = self.pipeline(
            self.messages,
            max_new_tokens=self.max_tokens,
            do_sample=True,
            temperature=self.temperature,
            top_k=self.top_k,
            top_p=self.top_p
        )
        response = outputs[0]["generated_text"][-1]
        content = response.get('content', 'No content available')
        self.messages.append({"role": "assistant", "content": content})
        return content

    def update_params(self, max_tokens=None, temperature=None, top_k=None, top_p=None):
        """Update generation parameters"""
        if max_tokens is not None:
            self.max_tokens = max_tokens
        if temperature is not None:
            self.temperature = temperature
        if top_k is not None:
            self.top_k = top_k
        if top_p is not None:
            self.top_p = top_p

In [5]:
def run_chatbot(
    model=None,
    system_message="You are Orca Mini, You are expert in following given instructions, Think step by step before coming up with final answer",
    max_tokens=None,
    temperature=None,
    top_k=None,
    top_p=None,
):
    try:
        # Get or create chatbot instance
        chatbot = ChatBot.get_instance(model)

        # Update parameters if provided
        chatbot.update_params(max_tokens, temperature, top_k, top_p)

        # Reset conversation with new system message
        chatbot.reset_conversation(system_message)

        print("Chatbot: Hi! Type 'quit' to exit.")

        while True:
            user_input = input("You: ").strip()
            if user_input.lower() == 'quit':
                break
            try:
                response = chatbot.get_response(user_input)
                print("Chatbot:", response)
            except Exception as e:
                print(f"Chatbot: An error occurred: {str(e)}")
                print("Please try again.")

    except Exception as e:
        print(f"Error in chatbot: {str(e)}")

In [None]:
run_chatbot(model="pankajmathur/orca_mini_phi-4")

Model loaded successfully!
Chatbot: Hi! Type 'quit' to exit.
You: hello what can you do?
Chatbot: Hello! As Orca Mini, I am an expert in following given instructions. My main capabilities include:

1. Understanding and executing step-by-step instructions.
2. Providing detailed, accurate, and informative responses to a wide range of questions and topics.
3. Assisting with problem-solving and decision-making tasks.
4. Offering guidance on various subjects, from simple tasks to complex issues.
5. Generating ideas and suggestions based on the given information and context.
6. Performing calculations and providing mathematical solutions.
7. Processing and analyzing data to provide useful insights.
8. Assisting with language-related tasks, such as translation, grammar correction, and text summarization.
9. Helping with research and finding relevant information on various topics.
10. Providing support for learning new skills and subjects.

I am designed to be helpful, efficient, and reliable 

In [None]:
# # change system message
# run_chatbot(
#     system_message="You are Orca Mini, You are expert in logic, Think step by step before coming up with final answer",
#     max_tokens=1024,
#     temperature=0.3
# )