Spaces:

thelip
/

demm

Runtime error

App Files Files Community

thelip commited on Nov 10, 2024

Commit

07afc51

verified ·

1 Parent(s): 5eb6970

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -17

app.py CHANGED Viewed

@@ -1,35 +1,59 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Initialize the model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
-# Define the response generation function
 def generate_response(user_input, chat_history=None):
     if chat_history is None:
         chat_history = []
-    # Tokenize the user input and chat history
-    input_text = " ".join([f"User: {user_input}"] + [f"Assistant: {x}" for x in chat_history])
-    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
-    # Generate the model's response
-    outputs = model.generate(inputs['input_ids'], max_length=200, pad_token_id=tokenizer.eos_token_id)
-    # Decode the response and append to chat history
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    chat_history.append(user_input)
-    chat_history.append(response)
     return response, chat_history
-# Create a Gradio interface
 def respond(user_input, chat_history=None):
     response, chat_history = generate_response(user_input, chat_history)
     return response, chat_history
-# Launch the interface
-iface = gr.Interface(fn=respond, inputs="text", outputs=["text", "state"], live=True)
 iface.launch()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load model and tokenizer
+model_name = "meta-llama/Llama-2-7b-hf"
+# Use Hugging Face authentication token
+token = 'HUGGINGFACE_TOKEN'  # Replace this with your Hugging Face token
+# Load model and tokenizer from Hugging Face
+tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
+model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token)
+# Function to generate response from the model
 def generate_response(user_input, chat_history=None):
     if chat_history is None:
         chat_history = []
+    # Format the input for the model
+    input_text = user_input + ' '  # Add a space for separation between user input and the response
+    # Encode the input
+    inputs = tokenizer.encode(input_text, return_tensors="pt")
+    # Generate a response from the model
+    outputs = model.generate(inputs, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)
+    # Decode the response
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Append the response to the chat history
+    chat_history.append((user_input, response))
+    # Return the response and updated chat history
     return response, chat_history
+# Gradio Interface
 def respond(user_input, chat_history=None):
     response, chat_history = generate_response(user_input, chat_history)
     return response, chat_history
+# Set up Gradio interface
+iface = gr.Interface(
+    fn=respond,
+    inputs=[
+        gr.Textbox(label="Your Message", placeholder="Ask me anything!", lines=2),
+        gr.State()
+    ],
+    outputs=[
+        gr.Textbox(label="Response", lines=3),
+        gr.State()
+    ],
+    title="Llama-2 Chatbot",
+    description="Ask me anything, and I'll respond using Llama-2 model.",
+    live=True
+)
+# Launch the Gradio interface
 iface.launch()