|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") |
|
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf") |
|
|
|
|
|
def generate_response(user_input, chat_history=None): |
|
if chat_history is None: |
|
chat_history = [] |
|
|
|
|
|
input_text = " ".join([f"User: {user_input}"] + [f"Assistant: {x}" for x in chat_history]) |
|
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True) |
|
|
|
|
|
outputs = model.generate(inputs['input_ids'], max_length=200, pad_token_id=tokenizer.eos_token_id) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
chat_history.append(user_input) |
|
chat_history.append(response) |
|
|
|
return response, chat_history |
|
|
|
|
|
def respond(user_input, chat_history=None): |
|
response, chat_history = generate_response(user_input, chat_history) |
|
return response, chat_history |
|
|
|
|
|
iface = gr.Interface(fn=respond, inputs="text", outputs=["text", "state"], live=True) |
|
|
|
iface.launch() |
|
|