|
from ctransformers import AutoModelForCausalLM |
|
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
def generate_prompt(history): |
|
prompt = start_message |
|
for chain in history[:-1]: |
|
prompt += f"<|im_start|>user\n{chain[0]}<|im_end|>" +\ |
|
f"<|im_start|>assistant\n{chain[1]}<|im_end|>" |
|
prompt += f"<|im_start|>user\n{history[-1][0]}<|im_end|>" +\ |
|
"<|im_start|>assistant\n" |
|
return prompt |
|
|
|
def generate(history): |
|
prompt = generate_prompt(history) |
|
|
|
streamer = llm(prompt, stop = stop_tokens, stream=True, threads=2) |
|
return streamer |
|
|
|
|
|
|
|
llm = AutoModelForCausalLM.from_pretrained("model/ggml-model-q8_0.bin", model_type='mpt') |
|
stop_tokens = ["<|im_end|>", "<|endoftext|>"] |
|
|
|
start_message = """<|im_start|>system |
|
You are a helpful assistant chatbot.<|im_end|> |
|
""" |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot() |
|
msg = gr.Textbox() |
|
clear = gr.Button("Clear") |
|
|
|
def user(user_message, history): |
|
return "", history + [[user_message, ""]] |
|
|
|
def bot(history): |
|
streamer = generate(history) |
|
|
|
for token in streamer: |
|
history[-1][1] += token |
|
yield history |
|
|
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( |
|
bot, chatbot, chatbot |
|
) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
demo.queue() |
|
if __name__ == "__main__": |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |