import logging from typing import cast from threading import Lock from transformers import AutoModelForCausalLM, AutoTokenizer import torch from conversation import get_default_conv_template import gradio as gr from pyllamacpp.model import Model from urllib import request, response, urlopen, error, parse, robotparser """ model = Model(model_path='/path/to/model.bin') while True: try: prompt = input("You: ", flush=True) if prompt == '': continue print(f"AI:", end='') for token in model.generate(prompt): print(f"{token}", end='', flush=True) print() except KeyboardInterrupt: break """ talkers = { "m3b": { "tokenizer": AutoTokenizer.from_pretrained("GeneZC/MiniChat-3B", use_fast=False), "model": AutoModelForCausalLM.from_pretrained("GeneZC/MiniChat-3B", device_map="auto", low_cpu_mem_usage=True), "conv": get_default_conv_template("minichat") } } request.urlretrieve("https://huggingface.co/GGUF/MiniChat-3B/resolve/main/ggml-model-q8_0.bin", "minichat-3b-q8_0.gguf") lcpp_model = Model(model_path="minichat-3b-q8_0.gguf") def m3b_talk(text): resp = "" for token in lcpp_model.generate(text): resp += token return resp def main(): logging.basicConfig(level=logging.INFO) with gr.Blocks() as demo: with gr.Row(variant="panel"): gr.Markdown("## Talk to MiniChat-3B\n\nTalk to MiniChat-3B.") with gr.Row(variant="panel"): with gr.Column(variant="panel"): m3b_talk_input = gr.Textbox(label="Message", placeholder="Type something here...") with gr.Column(variant="panel"): m3b_talk_output = gr.Textbox() m3b_talk_btn = gr.Button("Send") m3b_talk_btn.click(m3b_talk, inputs=m3b_talk_input, outputs=m3b_talk_output, api_name="talk_m3b") demo.queue(concurrency_count=1).launch() if __name__ == "__main__": main()