Spaces:

thelip
/

demm

Runtime error

App Files Files Community

thelip commited on Nov 10, 2024

Commit

5eb6970

verified ·

1 Parent(s): d2aa15d

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -39

app.py CHANGED Viewed

@@ -1,44 +1,35 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
-import os
-# Load model and tokenizer with the token from environment variables
-model_name = "meta-llama/Llama-2-7b-hf"
-token = os.getenv("HUGGINGFACE_TOKEN")  # Get token from environment
-# Add print statements for debugging
-print("Loading tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
-print("Tokenizer loaded.")
-print("Loading model...")
-model = AutoModelForCausalLM.from_pretrained(model_name, token=token, torch_dtype=torch.float16)
-print("Model loaded.")
-model = model.to("cuda" if torch.cuda.is_available() else "cpu")
-print("Model moved to device.")
-# Function to generate responses
-def generate_response(user_input, chat_history):
-    chat_history.append({"role": "user", "content": user_input})
-    conversation = ""
-    for turn in chat_history:
-        conversation += f"{turn['role']}: {turn['content']}\n"
-    inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
-    outputs = model.generate(inputs.input_ids, max_length=500, do_sample=True, temperature=0.7)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    chat_history.append({"role": "assistant", "content": response})
     return response, chat_history
-# Define Gradio chat interface
-def chat_interface():
-    chat_history = []
-    def respond(user_input):
-        response, chat_history = generate_response(user_input, chat_history)
-        return response
-    gr.Interface(fn=respond, inputs="text", outputs="text", title="LLaMA-2 Chatbot").launch()
-# Call the interface function to start the app
-print("Launching Gradio interface...")
-chat_interface()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Initialize the model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
+# Define the response generation function
+def generate_response(user_input, chat_history=None):
+    if chat_history is None:
+        chat_history = []
+    # Tokenize the user input and chat history
+    input_text = " ".join([f"User: {user_input}"] + [f"Assistant: {x}" for x in chat_history])
+    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
+    # Generate the model's response
+    outputs = model.generate(inputs['input_ids'], max_length=200, pad_token_id=tokenizer.eos_token_id)
+    # Decode the response and append to chat history
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    chat_history.append(user_input)
+    chat_history.append(response)
+    return response, chat_history
+# Create a Gradio interface
+def respond(user_input, chat_history=None):
+    response, chat_history = generate_response(user_input, chat_history)
     return response, chat_history
+# Launch the interface
+iface = gr.Interface(fn=respond, inputs="text", outputs=["text", "state"], live=True)
+iface.launch()