|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
import torch |
|
|
|
model_id = "aaditya/Llama3-OpenBioLLM-8B" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
device_map="auto", |
|
torch_dtype=torch.float16 |
|
) |
|
|
|
chat_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
def chatbot(message, history=[]): |
|
prompt = f"[INST] {message} [/INST]" |
|
response = chat_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]['generated_text'] |
|
return response.replace(prompt, "").strip() |
|
|
|
gr.ChatInterface(fn=chatbot, title="🩺 OpenBioLLM Chatbot", description="Ask me anything biomedical!").launch() |
|
|