Spaces:

thelip
/

demm

Runtime error

File size: 1,323 Bytes

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Initialize the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")

# Define the response generation function
def generate_response(user_input, chat_history=None):
    if chat_history is None:
        chat_history = []
    
    # Tokenize the user input and chat history
    input_text = " ".join([f"User: {user_input}"] + [f"Assistant: {x}" for x in chat_history])
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)

    # Generate the model's response
    outputs = model.generate(inputs['input_ids'], max_length=200, pad_token_id=tokenizer.eos_token_id)

    # Decode the response and append to chat history
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    chat_history.append(user_input)
    chat_history.append(response)

    return response, chat_history

# Create a Gradio interface
def respond(user_input, chat_history=None):
    response, chat_history = generate_response(user_input, chat_history)
    return response, chat_history

# Launch the interface
iface = gr.Interface(fn=respond, inputs="text", outputs=["text", "state"], live=True)

iface.launch()