|
from ctransformers import AutoModelForCausalLM |
|
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
def generate_prompt(history): |
|
prompt = "<s> " |
|
for chain in history[:-1]: |
|
prompt += f"<human>: {chain[0]}\n<bot>: {chain[1]}{end_token}\n" |
|
prompt += f"<human>: {history[-1][0]}\n<bot>: " |
|
return prompt |
|
|
|
def generate(history): |
|
prompt = generate_prompt(history) |
|
|
|
streamer = llm(prompt, temperature=0, stream=True) |
|
return streamer |
|
|
|
|
|
|
|
llm = AutoModelForCausalLM.from_pretrained("model/model_q4_0.bin", model_type='llama') |
|
end_token = "</s>" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
chatbot = gr.Chatbot() |
|
msg = gr.Textbox() |
|
clear = gr.Button("Clear") |
|
|
|
def user(user_message, history): |
|
return "", history + [[user_message, ""]] |
|
|
|
def bot(history): |
|
streamer = generate(history) |
|
|
|
for token in streamer: |
|
history[-1][1] += token |
|
yield history |
|
|
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( |
|
bot, chatbot, chatbot |
|
) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
demo.queue() |
|
if __name__ == "__main__": |
|
demo.launch() |