Spaces:

thelip
/

demm

Runtime error

File size: 1,700 Bytes

c4868a7
b4e88f2
d91393a
b4e88f2
b4682e6
b4e88f2
 
 
d2aa15d
 
 
b4682e6
d2aa15d
 
 
b4682e6
d2aa15d
 
d91393a
d2aa15d
c4868a7
d91393a
 
 
 
 
 
 
 
 
 
 
 
b4e88f2
 
 
 
d91393a
b4e88f2
 
d2aa15d

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

# Load model and tokenizer with the token from environment variables
model_name = "meta-llama/Llama-2-7b-hf"
token = os.getenv("HUGGINGFACE_TOKEN")  # Get token from environment

# Add print statements for debugging
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
print("Tokenizer loaded.")

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(model_name, token=token, torch_dtype=torch.float16)
print("Model loaded.")

model = model.to("cuda" if torch.cuda.is_available() else "cpu")
print("Model moved to device.")

# Function to generate responses
def generate_response(user_input, chat_history):
    chat_history.append({"role": "user", "content": user_input})
    conversation = ""
    for turn in chat_history:
        conversation += f"{turn['role']}: {turn['content']}\n"
    inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
    outputs = model.generate(inputs.input_ids, max_length=500, do_sample=True, temperature=0.7)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    chat_history.append({"role": "assistant", "content": response})
    return response, chat_history

# Define Gradio chat interface
def chat_interface():
    chat_history = []
    def respond(user_input):
        response, chat_history = generate_response(user_input, chat_history)
        return response
    gr.Interface(fn=respond, inputs="text", outputs="text", title="LLaMA-2 Chatbot").launch()

# Call the interface function to start the app
print("Launching Gradio interface...")
chat_interface()