import gradio as gr | |
from transformers import pipeline | |
chat = pipeline("text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") | |
def respond(msg, history): | |
history = history or [] | |
out = chat(history + [msg], max_new_tokens=256) | |
reply = out[0]["generated_text"] | |
history.append((msg, reply)) | |
return history, history | |
iface = gr.ChatInterface(respond) | |
iface.launch() | |