thelip commited on
Commit
d2aa15d
·
verified ·
1 Parent(s): b4682e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -0
app.py CHANGED
@@ -6,9 +6,18 @@ import os
6
  # Load model and tokenizer with the token from environment variables
7
  model_name = "meta-llama/Llama-2-7b-hf"
8
  token = os.getenv("HUGGINGFACE_TOKEN") # Get token from environment
 
 
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
 
 
 
10
  model = AutoModelForCausalLM.from_pretrained(model_name, token=token, torch_dtype=torch.float16)
 
 
11
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
 
12
 
13
  # Function to generate responses
14
  def generate_response(user_input, chat_history):
@@ -29,3 +38,7 @@ def chat_interface():
29
  response, chat_history = generate_response(user_input, chat_history)
30
  return response
31
  gr.Interface(fn=respond, inputs="text", outputs="text", title="LLaMA-2 Chatbot").launch()
 
 
 
 
 
6
  # Load model and tokenizer with the token from environment variables
7
  model_name = "meta-llama/Llama-2-7b-hf"
8
  token = os.getenv("HUGGINGFACE_TOKEN") # Get token from environment
9
+
10
+ # Add print statements for debugging
11
+ print("Loading tokenizer...")
12
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
13
+ print("Tokenizer loaded.")
14
+
15
+ print("Loading model...")
16
  model = AutoModelForCausalLM.from_pretrained(model_name, token=token, torch_dtype=torch.float16)
17
+ print("Model loaded.")
18
+
19
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
20
+ print("Model moved to device.")
21
 
22
  # Function to generate responses
23
  def generate_response(user_input, chat_history):
 
38
  response, chat_history = generate_response(user_input, chat_history)
39
  return response
40
  gr.Interface(fn=respond, inputs="text", outputs="text", title="LLaMA-2 Chatbot").launch()
41
+
42
+ # Call the interface function to start the app
43
+ print("Launching Gradio interface...")
44
+ chat_interface()