import gradio as gr from transformers import AutoTokenizer,AutoModel tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) # model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).cpu().float() model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).quantize(bits=4, compile_parallel_kernel=True, parallel_num=2).float() model = model.eval() def chat(input, history=None): if history is None: history = [] response, history = model.chat(tokenizer, input, history) return history, history description = "This is an unofficial chatbot application based on open source model\n" title = "ChatGLM-6B Chatbot" examples = [["Hello?"], ["你好。"], ["介绍清华"]] chatbot_interface = gr.Interface( fn=chat, title=title, description=description, examples=examples, inputs=["text", "state"], outputs=["chatbot", "state"] ) chatbot_interface.launch()