Spaces:

thelip
/

demm

Runtime error

App Files Files Community

thelip commited on Nov 10, 2024

Commit

b4e88f2

verified ·

1 Parent(s): fe52cbc

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -33

app.py CHANGED Viewed

@@ -1,50 +1,31 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load the LLaMA-2 model and tokenizer from Hugging Face
-model_name = "meta-llama/Llama-2-7b-hf"  # Change to the desired LLaMA model
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
 model = model.to("cuda" if torch.cuda.is_available() else "cpu")
 # Function to generate responses
 def generate_response(user_input, chat_history):
-    # Add the user's input to the conversation history
     chat_history.append({"role": "user", "content": user_input})
-    # Prepare input for the model
     conversation = ""
     for turn in chat_history:
         conversation += f"{turn['role']}: {turn['content']}\n"
     inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
-    # Generate model response
     outputs = model.generate(inputs.input_ids, max_length=500, do_sample=True, temperature=0.7)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Add the model's response to the chat history
     chat_history.append({"role": "assistant", "content": response})
-    # Only return the model's response for display
     return response, chat_history
-# Initialize the chat history
-chat_history = []
-# Define Gradio interface
-with gr.Blocks() as chat_interface:
-    gr.Markdown("## LLaMA-2 Chatbot")
-    chat_input = gr.Textbox(label="Your Message")
-    chat_output = gr.Chatbot()
-    # Update chat on button click
-    def handle_input(user_input):
         response, chat_history = generate_response(user_input, chat_history)
-        chat_output.update(chat_history)
-        return "", chat_history  # Clear input box and update chat history
-    chat_input.submit(handle_input, inputs=chat_input, outputs=[chat_input, chat_output])
-# Launch Gradio app
-chat_interface.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+import os
+# Load model and tokenizer with the token from environment variables
+model_name = "meta-llama/Llama-2-7b-hf"
+token = os.getenv("HUGGINGFACE_TOKEN")  # Get token from environment
+tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
+model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token, torch_dtype=torch.float16)
 model = model.to("cuda" if torch.cuda.is_available() else "cpu")
 # Function to generate responses
 def generate_response(user_input, chat_history):
     chat_history.append({"role": "user", "content": user_input})
     conversation = ""
     for turn in chat_history:
         conversation += f"{turn['role']}: {turn['content']}\n"
     inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
     outputs = model.generate(inputs.input_ids, max_length=500, do_sample=True, temperature=0.7)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     chat_history.append({"role": "assistant", "content": response})
     return response, chat_history
+# Define Gradio chat interface
+def chat_interface():
+    chat_history = []
+    def respond(user_input):
         response, chat_history = generate_response(user_input, chat_history)
+        return response
+    gr.Interface(fn=respond, inputs="text", outputs="text", title="LLaMA-2 Chatbot").launch()