thelip commited on
Commit
5eb6970
·
verified ·
1 Parent(s): d2aa15d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -39
app.py CHANGED
@@ -1,44 +1,35 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import os
5
-
6
- # Load model and tokenizer with the token from environment variables
7
- model_name = "meta-llama/Llama-2-7b-hf"
8
- token = os.getenv("HUGGINGFACE_TOKEN") # Get token from environment
9
-
10
- # Add print statements for debugging
11
- print("Loading tokenizer...")
12
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
13
- print("Tokenizer loaded.")
14
-
15
- print("Loading model...")
16
- model = AutoModelForCausalLM.from_pretrained(model_name, token=token, torch_dtype=torch.float16)
17
- print("Model loaded.")
18
-
19
- model = model.to("cuda" if torch.cuda.is_available() else "cpu")
20
- print("Model moved to device.")
21
-
22
- # Function to generate responses
23
- def generate_response(user_input, chat_history):
24
- chat_history.append({"role": "user", "content": user_input})
25
- conversation = ""
26
- for turn in chat_history:
27
- conversation += f"{turn['role']}: {turn['content']}\n"
28
- inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
29
- outputs = model.generate(inputs.input_ids, max_length=500, do_sample=True, temperature=0.7)
30
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
31
- chat_history.append({"role": "assistant", "content": response})
 
 
 
 
 
 
 
32
  return response, chat_history
33
 
34
- # Define Gradio chat interface
35
- def chat_interface():
36
- chat_history = []
37
- def respond(user_input):
38
- response, chat_history = generate_response(user_input, chat_history)
39
- return response
40
- gr.Interface(fn=respond, inputs="text", outputs="text", title="LLaMA-2 Chatbot").launch()
41
-
42
- # Call the interface function to start the app
43
- print("Launching Gradio interface...")
44
- chat_interface()
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
+
4
+ # Initialize the model and tokenizer
5
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
6
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
7
+
8
+ # Define the response generation function
9
+ def generate_response(user_input, chat_history=None):
10
+ if chat_history is None:
11
+ chat_history = []
12
+
13
+ # Tokenize the user input and chat history
14
+ input_text = " ".join([f"User: {user_input}"] + [f"Assistant: {x}" for x in chat_history])
15
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
16
+
17
+ # Generate the model's response
18
+ outputs = model.generate(inputs['input_ids'], max_length=200, pad_token_id=tokenizer.eos_token_id)
19
+
20
+ # Decode the response and append to chat history
 
 
 
 
 
 
 
 
 
21
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
22
+ chat_history.append(user_input)
23
+ chat_history.append(response)
24
+
25
+ return response, chat_history
26
+
27
+ # Create a Gradio interface
28
+ def respond(user_input, chat_history=None):
29
+ response, chat_history = generate_response(user_input, chat_history)
30
  return response, chat_history
31
 
32
+ # Launch the interface
33
+ iface = gr.Interface(fn=respond, inputs="text", outputs=["text", "state"], live=True)
34
+
35
+ iface.launch()