thelip commited on
Commit
b4e88f2
·
verified ·
1 Parent(s): fe52cbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -33
app.py CHANGED
@@ -1,50 +1,31 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
-
5
- # Load the LLaMA-2 model and tokenizer from Hugging Face
6
- model_name = "meta-llama/Llama-2-7b-hf" # Change to the desired LLaMA model
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
 
 
9
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
10
 
11
  # Function to generate responses
12
  def generate_response(user_input, chat_history):
13
- # Add the user's input to the conversation history
14
  chat_history.append({"role": "user", "content": user_input})
15
-
16
- # Prepare input for the model
17
  conversation = ""
18
  for turn in chat_history:
19
  conversation += f"{turn['role']}: {turn['content']}\n"
20
  inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
21
-
22
- # Generate model response
23
  outputs = model.generate(inputs.input_ids, max_length=500, do_sample=True, temperature=0.7)
24
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
-
26
- # Add the model's response to the chat history
27
  chat_history.append({"role": "assistant", "content": response})
28
-
29
- # Only return the model's response for display
30
  return response, chat_history
31
 
32
- # Initialize the chat history
33
- chat_history = []
34
-
35
- # Define Gradio interface
36
- with gr.Blocks() as chat_interface:
37
- gr.Markdown("## LLaMA-2 Chatbot")
38
- chat_input = gr.Textbox(label="Your Message")
39
- chat_output = gr.Chatbot()
40
-
41
- # Update chat on button click
42
- def handle_input(user_input):
43
  response, chat_history = generate_response(user_input, chat_history)
44
- chat_output.update(chat_history)
45
- return "", chat_history # Clear input box and update chat history
46
-
47
- chat_input.submit(handle_input, inputs=chat_input, outputs=[chat_input, chat_output])
48
-
49
- # Launch Gradio app
50
- chat_interface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
+ import os
5
+
6
+ # Load model and tokenizer with the token from environment variables
7
+ model_name = "meta-llama/Llama-2-7b-hf"
8
+ token = os.getenv("HUGGINGFACE_TOKEN") # Get token from environment
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
10
+ model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token, torch_dtype=torch.float16)
11
  model = model.to("cuda" if torch.cuda.is_available() else "cpu")
12
 
13
  # Function to generate responses
14
  def generate_response(user_input, chat_history):
 
15
  chat_history.append({"role": "user", "content": user_input})
 
 
16
  conversation = ""
17
  for turn in chat_history:
18
  conversation += f"{turn['role']}: {turn['content']}\n"
19
  inputs = tokenizer(conversation, return_tensors="pt").to(model.device)
 
 
20
  outputs = model.generate(inputs.input_ids, max_length=500, do_sample=True, temperature=0.7)
21
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
22
  chat_history.append({"role": "assistant", "content": response})
 
 
23
  return response, chat_history
24
 
25
+ # Define Gradio chat interface
26
+ def chat_interface():
27
+ chat_history = []
28
+ def respond(user_input):
 
 
 
 
 
 
 
29
  response, chat_history = generate_response(user_input, chat_history)
30
+ return response
31
+ gr.Interface(fn=respond, inputs="text", outputs="text", title="LLaMA-2 Chatbot").launch()