import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch model_id = "aaditya/Llama3-OpenBioLLM-8B" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.float16 ) chat_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) def chatbot(message, history=[]): prompt = f"[INST] {message} [/INST]" response = chat_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]['generated_text'] return response.replace(prompt, "").strip() gr.ChatInterface(fn=chatbot, title="🩺 OpenBioLLM Chatbot", description="Ask me anything biomedical!").launch()