CamiloVega commited on
Commit
e0875e2
·
verified ·
1 Parent(s): 357d91e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -24
app.py CHANGED
@@ -14,7 +14,7 @@ logging.basicConfig(
14
  logger = logging.getLogger(__name__)
15
 
16
  # Define the model name
17
- model_name = "microsoft/phi-2"
18
 
19
  try:
20
  logger.info("Starting model initialization...")
@@ -32,19 +32,21 @@ try:
32
  logger.info("Loading tokenizer...")
33
  tokenizer = AutoTokenizer.from_pretrained(
34
  model_name,
35
- trust_remote_code=True
 
36
  )
37
  logger.info("Tokenizer loaded successfully")
38
 
39
- # Load model
40
  logger.info("Loading model...")
41
  model = AutoModelForCausalLM.from_pretrained(
42
  model_name,
43
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
44
- trust_remote_code=True
 
 
 
45
  )
46
- if device == "cuda":
47
- model = model.to(device)
48
  logger.info("Model loaded successfully")
49
 
50
  # Create pipeline
@@ -58,7 +60,7 @@ try:
58
  temperature=0.7,
59
  top_p=0.9,
60
  repetition_penalty=1.1,
61
- device=0 if device == "cuda" else -1
62
  )
63
  logger.info("Pipeline created successfully")
64
 
@@ -72,6 +74,10 @@ You help users with any topic while raising awareness about water consumption
72
  in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
73
  equivalent to the daily consumption of a city of 10,000 people?"""
74
 
 
 
 
 
75
  # Constants for water consumption calculation
76
  WATER_PER_TOKEN = {
77
  "input_training": 0.0000309,
@@ -96,8 +102,19 @@ def calculate_water_consumption(text, is_input=True):
96
  return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
97
  return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])
98
 
99
- def format_message(role, content):
100
- return {"role": role, "content": content}
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  @spaces.GPU(duration=60)
103
  @torch.inference_mode()
@@ -110,13 +127,8 @@ def generate_response(user_input, chat_history):
110
  input_water_consumption = calculate_water_consumption(user_input, True)
111
  total_water_consumption += input_water_consumption
112
 
113
- # Create prompt
114
- conversation_history = ""
115
- if chat_history:
116
- for message in chat_history:
117
- conversation_history += f"User: {message[0]}\nAssistant: {message[1]}\n"
118
-
119
- prompt = f"{system_message}\n\n{conversation_history}User: {user_input}\nAssistant:"
120
 
121
  logger.info("Generating model response...")
122
  outputs = model_gen(
@@ -133,7 +145,7 @@ def generate_response(user_input, chat_history):
133
  output_water_consumption = calculate_water_consumption(assistant_response, False)
134
  total_water_consumption += output_water_consumption
135
 
136
- # Update chat history with the new formatted messages
137
  chat_history.append([user_input, assistant_response])
138
 
139
  # Prepare water consumption message
@@ -167,8 +179,8 @@ try:
167
  <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
168
  <h1 style="color: #2d333a;">AQuaBot</h1>
169
  <p style="color: #4a5568;">
170
- Welcome to AQuaBot - An AI assistant that helps raise awareness about water
171
- consumption in language models.
172
  </p>
173
  </div>
174
  """)
@@ -193,7 +205,7 @@ try:
193
  """)
194
  clear = gr.Button("Clear Chat")
195
 
196
- # Add footer with citation and disclaimer
197
  gr.HTML("""
198
  <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
199
  background-color: #f8f9fa; border-radius: 10px;">
@@ -207,10 +219,15 @@ try:
207
  </div>
208
  <div style="border-top: 1px solid #ddd; padding-top: 15px;">
209
  <p style="color: #666; font-size: 14px;">
210
- <strong>Important note:</strong> This application uses Microsoft's Phi-2 model
211
- instead of GPT-3 for availability and cost reasons. However,
212
- the water consumption calculations per token (input/output) are based on the
213
- conclusions from the cited paper.
 
 
 
 
 
214
  </p>
215
  </div>
216
  </div>
 
14
  logger = logging.getLogger(__name__)
15
 
16
  # Define the model name
17
+ model_name = "huggingface-projects/llama-2-7b-chat"
18
 
19
  try:
20
  logger.info("Starting model initialization...")
 
32
  logger.info("Loading tokenizer...")
33
  tokenizer = AutoTokenizer.from_pretrained(
34
  model_name,
35
+ trust_remote_code=True,
36
+ use_auth_token=True
37
  )
38
  logger.info("Tokenizer loaded successfully")
39
 
40
+ # Load model with 8-bit quantization
41
  logger.info("Loading model...")
42
  model = AutoModelForCausalLM.from_pretrained(
43
  model_name,
44
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
45
+ trust_remote_code=True,
46
+ use_auth_token=True,
47
+ load_in_8bit=True,
48
+ device_map="auto"
49
  )
 
 
50
  logger.info("Model loaded successfully")
51
 
52
  # Create pipeline
 
60
  temperature=0.7,
61
  top_p=0.9,
62
  repetition_penalty=1.1,
63
+ device_map="auto"
64
  )
65
  logger.info("Pipeline created successfully")
66
 
 
74
  in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
75
  equivalent to the daily consumption of a city of 10,000 people?"""
76
 
77
+ # Llama 2 specific tokens
78
+ B_INST, E_INST = "[INST]", "[/INST]"
79
+ B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
80
+
81
  # Constants for water consumption calculation
82
  WATER_PER_TOKEN = {
83
  "input_training": 0.0000309,
 
102
  return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
103
  return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])
104
 
105
+ def format_prompt(user_input, chat_history):
106
+ """
107
+ Format the prompt according to Llama 2 specific style
108
+ """
109
+ prompt = f"{B_INST}{B_SYS}{system_message}{E_SYS}"
110
+
111
+ if chat_history:
112
+ for user_msg, assistant_msg in chat_history:
113
+ prompt += f"{user_msg}{E_INST}{assistant_msg}{B_INST}"
114
+
115
+ prompt += f"{user_input}{E_INST}"
116
+
117
+ return prompt
118
 
119
  @spaces.GPU(duration=60)
120
  @torch.inference_mode()
 
127
  input_water_consumption = calculate_water_consumption(user_input, True)
128
  total_water_consumption += input_water_consumption
129
 
130
+ # Format prompt for Llama 2
131
+ prompt = format_prompt(user_input, chat_history)
 
 
 
 
 
132
 
133
  logger.info("Generating model response...")
134
  outputs = model_gen(
 
145
  output_water_consumption = calculate_water_consumption(assistant_response, False)
146
  total_water_consumption += output_water_consumption
147
 
148
+ # Update chat history
149
  chat_history.append([user_input, assistant_response])
150
 
151
  # Prepare water consumption message
 
179
  <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
180
  <h1 style="color: #2d333a;">AQuaBot</h1>
181
  <p style="color: #4a5568;">
182
+ Welcome to AQuaBot - An AI assistant powered by Llama 2 that helps raise awareness
183
+ about water consumption in language models.
184
  </p>
185
  </div>
186
  """)
 
205
  """)
206
  clear = gr.Button("Clear Chat")
207
 
208
+ # Add footer with citation, disclaimer, and credits
209
  gr.HTML("""
210
  <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
211
  background-color: #f8f9fa; border-radius: 10px;">
 
219
  </div>
220
  <div style="border-top: 1px solid #ddd; padding-top: 15px;">
221
  <p style="color: #666; font-size: 14px;">
222
+ <strong>Model Information:</strong> This application uses Meta's Llama 2 (7B) model,
223
+ a state-of-the-art language model fine-tuned for chat interactions. Water consumption
224
+ calculations are based on the methodology from the cited paper.
225
+ </p>
226
+ </div>
227
+ <div style="border-top: 1px solid #ddd; margin-top: 15px; padding-top: 15px;">
228
+ <p style="color: #666; font-size: 14px;">
229
+ Created by Camilo Vega - AI Consultant<br>
230
+ <a href="https://github.com/vegadevs/aquabot" target="_blank">GitHub Repository</a>
231
  </p>
232
  </div>
233
  </div>