Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
model_id = "mistralai/Mistral-7B-Instruct-v0.1" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
def chat_with_expert(message, history): | |
prompt = f"<s>[INST] You are an expert assistant. Answer with clarity and depth.\n{message} [/INST]" | |
response = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]['generated_text'] | |
answer = response.split('[/INST]')[-1].strip() | |
history.append((message, answer)) | |
return history, history | |
chatbot = gr.ChatInterface(fn=chat_with_expert, title="Expert Chat Assistant") | |
chatbot.launch() | |