File size: 1,323 Bytes
c4868a7 b4e88f2 5eb6970 d91393a 5eb6970 d91393a 5eb6970 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
# Initialize the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
# Define the response generation function
def generate_response(user_input, chat_history=None):
if chat_history is None:
chat_history = []
# Tokenize the user input and chat history
input_text = " ".join([f"User: {user_input}"] + [f"Assistant: {x}" for x in chat_history])
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
# Generate the model's response
outputs = model.generate(inputs['input_ids'], max_length=200, pad_token_id=tokenizer.eos_token_id)
# Decode the response and append to chat history
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
chat_history.append(user_input)
chat_history.append(response)
return response, chat_history
# Create a Gradio interface
def respond(user_input, chat_history=None):
response, chat_history = generate_response(user_input, chat_history)
return response, chat_history
# Launch the interface
iface = gr.Interface(fn=respond, inputs="text", outputs=["text", "state"], live=True)
iface.launch()
|