from transformers import AutoTokenizer, AutoModelForCausalLM, TextGenerationPipeline import gradio as gr model_id = "TinyLlama/TinyLlama-1.1B-Chat-v0.3" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer, max_new_tokens=200, do_sample=True) def chat(user_input): res = pipe(user_input)[0]["generated_text"] return res[len(user_input):] # chỉ trả lời, không lặp lại câu hỏi gr.Interface(fn=chat, inputs="text", outputs="text", title="🤖 TinyLlama Chatbot").launch()