thelip commited on
Commit
07afc51
·
verified ·
1 Parent(s): 5eb6970

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -17
app.py CHANGED
@@ -1,35 +1,59 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
 
4
- # Initialize the model and tokenizer
5
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
6
- model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
7
 
8
- # Define the response generation function
 
 
 
 
 
 
 
9
  def generate_response(user_input, chat_history=None):
10
  if chat_history is None:
11
  chat_history = []
12
 
13
- # Tokenize the user input and chat history
14
- input_text = " ".join([f"User: {user_input}"] + [f"Assistant: {x}" for x in chat_history])
15
- inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
16
 
17
- # Generate the model's response
18
- outputs = model.generate(inputs['input_ids'], max_length=200, pad_token_id=tokenizer.eos_token_id)
19
 
20
- # Decode the response and append to chat history
21
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
22
- chat_history.append(user_input)
23
- chat_history.append(response)
24
 
 
 
 
 
 
 
 
25
  return response, chat_history
26
 
27
- # Create a Gradio interface
28
  def respond(user_input, chat_history=None):
29
  response, chat_history = generate_response(user_input, chat_history)
30
  return response, chat_history
31
 
32
- # Launch the interface
33
- iface = gr.Interface(fn=respond, inputs="text", outputs=["text", "state"], live=True)
34
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  iface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
 
4
+ # Load model and tokenizer
5
+ model_name = "meta-llama/Llama-2-7b-hf"
 
6
 
7
+ # Use Hugging Face authentication token
8
+ token = 'HUGGINGFACE_TOKEN' # Replace this with your Hugging Face token
9
+
10
+ # Load model and tokenizer from Hugging Face
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token)
13
+
14
+ # Function to generate response from the model
15
  def generate_response(user_input, chat_history=None):
16
  if chat_history is None:
17
  chat_history = []
18
 
19
+ # Format the input for the model
20
+ input_text = user_input + ' ' # Add a space for separation between user input and the response
 
21
 
22
+ # Encode the input
23
+ inputs = tokenizer.encode(input_text, return_tensors="pt")
24
 
25
+ # Generate a response from the model
26
+ outputs = model.generate(inputs, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id)
 
 
27
 
28
+ # Decode the response
29
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+
31
+ # Append the response to the chat history
32
+ chat_history.append((user_input, response))
33
+
34
+ # Return the response and updated chat history
35
  return response, chat_history
36
 
37
+ # Gradio Interface
38
  def respond(user_input, chat_history=None):
39
  response, chat_history = generate_response(user_input, chat_history)
40
  return response, chat_history
41
 
42
+ # Set up Gradio interface
43
+ iface = gr.Interface(
44
+ fn=respond,
45
+ inputs=[
46
+ gr.Textbox(label="Your Message", placeholder="Ask me anything!", lines=2),
47
+ gr.State()
48
+ ],
49
+ outputs=[
50
+ gr.Textbox(label="Response", lines=3),
51
+ gr.State()
52
+ ],
53
+ title="Llama-2 Chatbot",
54
+ description="Ask me anything, and I'll respond using Llama-2 model.",
55
+ live=True
56
+ )
57
+
58
+ # Launch the Gradio interface
59
  iface.launch()