Spaces:
Running
Running
from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline | |
import gradio as gr | |
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v0.3" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id) | |
pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer, max_new_tokens=200, do_sample=True) | |
def chat(user_input): | |
res = pipe(user_input)[0]["generated_text"] | |
return res[len(user_input):] # chỉ trả lời, không lặp lại câu hỏi | |
gr.Interface(fn=chat, inputs="text", outputs="text", title="🤖 TinyLlama Chatbot").launch() | |