from gradio_client import Client from time import sleep from ctransformers import AutoModelForCausalLM from transformers import AutoTokenizer from huggingface_hub import snapshot_download,hf_hub_download import torch import gradio as gr def clear_session(): return [] model = AutoModelForCausalLM.from_pretrained("Cran-May/OpenSLIDE", model_file="SLIDE.0.1.gguf", model_type="mistral", gpu_layers=0) model_dir = hf_hub_download(repo_id="OpenBuddy/openbuddy-mistral-7b-v13.1",local_dir="./", filename="tokenizer.model") tokenizer = AutoTokenizer.from_pretrained("OpenBuddy/openbuddy-mistral-7b-v13.1") DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful and honest INTP-T AI Assistant named "Shi-Ci" in English or "兮辞" in Chinese. You are good at speaking English and Chinese. You are talking to a human User. If the question is meaningless, please explain the reason and don't share false information. You are based on SEA model, trained by "SSFW NLPark" team, not related to GPT, LLaMA, Meta, Mistral or OpenAI. Let's work this out in a step by step way to be sure we have the right answer.\n\n""" def generate_chat(input: str, chatbot: list): input = input.strip() if chatbot is None: chatbot = [] history = DEFAULT_SYSTEM_PROMPT for chat in chatbot: history += f"User: {chat[0]}\nAssistant: {chat[1]}\n" history += f"User: {input}\nAssistant:" inputs = tokenizer.encode(history, return_tensors="pt") outputs = model.generate(inputs) response = tokenizer.decode(outputs[0]) pos = response.rfind("Assistant:") reply = response[pos+10:] if reply.find("User:") != -1: reply = reply[:reply.find("User:")] chatbot.append((input, reply.strip())) chatbot = chatbot[-3:] return '', chatbot block = gr.Blocks() with block as demo: gr.Markdown("""