EYEDOL commited on
Commit
e6292a4
·
verified ·
1 Parent(s): 6fefd54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -19
app.py CHANGED
@@ -14,11 +14,11 @@ from scipy.io.wavfile import write as write_wav
14
  import os
15
  import re
16
  from huggingface_hub import login
17
- import threading # <-- FIX: Added threading import
18
 
19
  # --- Login to Hugging Face using secret ---
20
  # Make sure HF_TOKEN is set in your Hugging Face Space > Settings > Repository secrets
21
- hf_token = os.environ.get("hugface") #
22
  if not hf_token:
23
  raise ValueError("HF_TOKEN not found. Please set it in Hugging Face Space repository secrets.")
24
  login(token=hf_token)
@@ -63,13 +63,12 @@ class WeeboAssistant:
63
 
64
  # LLM
65
  print(f"Loading LLM: {LLM_MODEL_ID}")
66
- # <-- FIX: Initialize tokenizer separately to use it with the streamer
67
  self.llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID)
68
  self.llm_pipeline = pipeline(
69
  "text-generation",
70
  model=LLM_MODEL_ID,
71
  model_kwargs={"torch_dtype": self.torch_dtype},
72
- tokenizer=self.llm_tokenizer, # Pass the tokenizer here
73
  device=self.device,
74
  )
75
  print("LLM pipeline loaded successfully.")
@@ -117,11 +116,20 @@ class WeeboAssistant:
117
  return output_path
118
 
119
  def get_llm_response(self, chat_history):
120
- messages = [{'role': 'system', 'content': self.SYSTEM_PROMPT}]
121
- for turn in chat_history:
122
- messages.append({'role': 'user', 'content': turn[0]})
123
- if turn[1] is not None:
124
- messages.append({'role': 'assistant', 'content': turn[1]})
 
 
 
 
 
 
 
 
 
125
 
126
  prompt = self.llm_pipeline.tokenizer.apply_chat_template(
127
  messages, tokenize=False, add_generation_prompt=True
@@ -131,7 +139,6 @@ class WeeboAssistant:
131
  self.llm_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
132
  ]
133
 
134
- # <-- START OF FIX: Use TextIteratorStreamer instead of gr.TextIterator -->
135
  streamer = TextIteratorStreamer(
136
  self.llm_pipeline.tokenizer, skip_prompt=True, skip_special_tokens=True
137
  )
@@ -145,12 +152,10 @@ class WeeboAssistant:
145
  top_p=0.9,
146
  )
147
 
148
- # Run the pipeline in a separate thread to enable streaming
149
  thread = threading.Thread(target=self.llm_pipeline, args=[prompt], kwargs=generation_kwargs)
150
  thread.start()
151
 
152
  return streamer
153
- # <-- END OF FIX -->
154
 
155
  assistant = WeeboAssistant()
156
 
@@ -163,12 +168,12 @@ def s2s_pipeline(audio_input, chat_history):
163
  return
164
 
165
  chat_history.append((user_text, ""))
166
- yield chat_history, None, "..." # Show thinking indicator
167
 
168
  response_stream = assistant.get_llm_response(chat_history)
169
  llm_response_text = ""
170
  for text_chunk in response_stream:
171
- llm_response_text += text_chunk # <-- FIX: Append chunk to full response
172
  chat_history[-1] = (user_text, llm_response_text)
173
  yield chat_history, None, llm_response_text
174
 
@@ -183,7 +188,7 @@ def t2t_pipeline(text_input, chat_history):
183
  response_stream = assistant.get_llm_response(chat_history)
184
  llm_response_text = ""
185
  for text_chunk in response_stream:
186
- llm_response_text += text_chunk # <-- FIX: Append chunk to full response
187
  chat_history[-1] = (text_input, llm_response_text)
188
  yield chat_history
189
 
@@ -232,7 +237,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Msaidizi wa Kiswahili") as demo:
232
  outputs=[s2s_chatbot, s2s_audio_out, s2s_text_out],
233
  queue=True
234
  ).then(
235
- fn=lambda: gr.Audio(value=None), # Clear audio input after submit
236
  inputs=None,
237
  outputs=s2s_audio_in
238
  )
@@ -240,7 +245,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Msaidizi wa Kiswahili") as demo:
240
  t2t_submit_btn.click(
241
  fn=t2t_pipeline,
242
  inputs=[t2t_text_in, t2t_chatbot],
243
- outputs=[t2t_chatbot], # <-- FIX: Only output to the chatbot
244
  queue=True
245
  ).then(
246
  fn=clear_textbox,
@@ -248,7 +253,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Msaidizi wa Kiswahili") as demo:
248
  outputs=t2t_text_in
249
  )
250
 
251
- # Also allow Enter key to submit text
252
  t2t_text_in.submit(
253
  fn=t2t_pipeline,
254
  inputs=[t2t_text_in, t2t_chatbot],
@@ -260,7 +264,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Msaidizi wa Kiswahili") as demo:
260
  outputs=t2t_text_in
261
  )
262
 
263
-
264
  tool_s2t_btn.click(
265
  fn=assistant.transcribe_audio,
266
  inputs=tool_s2t_audio_in,
 
14
  import os
15
  import re
16
  from huggingface_hub import login
17
+ import threading
18
 
19
  # --- Login to Hugging Face using secret ---
20
  # Make sure HF_TOKEN is set in your Hugging Face Space > Settings > Repository secrets
21
+ hf_token = os.environ.get("hugface") # Using "HF_TOKEN" is the standard on Spaces
22
  if not hf_token:
23
  raise ValueError("HF_TOKEN not found. Please set it in Hugging Face Space repository secrets.")
24
  login(token=hf_token)
 
63
 
64
  # LLM
65
  print(f"Loading LLM: {LLM_MODEL_ID}")
 
66
  self.llm_tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_ID)
67
  self.llm_pipeline = pipeline(
68
  "text-generation",
69
  model=LLM_MODEL_ID,
70
  model_kwargs={"torch_dtype": self.torch_dtype},
71
+ tokenizer=self.llm_tokenizer,
72
  device=self.device,
73
  )
74
  print("LLM pipeline loaded successfully.")
 
116
  return output_path
117
 
118
  def get_llm_response(self, chat_history):
119
+ # <-- START OF FIX: Rebuild message history without a 'system' role -->
120
+ messages = []
121
+ for user_msg, assistant_msg in chat_history:
122
+ # Add the user's message
123
+ messages.append({"role": "user", "content": user_msg})
124
+ # Add the assistant's message if it exists
125
+ if assistant_msg:
126
+ messages.append({"role": "assistant", "content": assistant_msg})
127
+
128
+ # Prepend the system prompt to the content of the very first user message.
129
+ # This is the correct way to use a system prompt with Gemma models.
130
+ if messages:
131
+ messages[0]["content"] = f"{self.SYSTEM_PROMPT}\n\n{messages[0]['content']}"
132
+ # <-- END OF FIX -->
133
 
134
  prompt = self.llm_pipeline.tokenizer.apply_chat_template(
135
  messages, tokenize=False, add_generation_prompt=True
 
139
  self.llm_pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
140
  ]
141
 
 
142
  streamer = TextIteratorStreamer(
143
  self.llm_pipeline.tokenizer, skip_prompt=True, skip_special_tokens=True
144
  )
 
152
  top_p=0.9,
153
  )
154
 
 
155
  thread = threading.Thread(target=self.llm_pipeline, args=[prompt], kwargs=generation_kwargs)
156
  thread.start()
157
 
158
  return streamer
 
159
 
160
  assistant = WeeboAssistant()
161
 
 
168
  return
169
 
170
  chat_history.append((user_text, ""))
171
+ yield chat_history, None, "..."
172
 
173
  response_stream = assistant.get_llm_response(chat_history)
174
  llm_response_text = ""
175
  for text_chunk in response_stream:
176
+ llm_response_text += text_chunk
177
  chat_history[-1] = (user_text, llm_response_text)
178
  yield chat_history, None, llm_response_text
179
 
 
188
  response_stream = assistant.get_llm_response(chat_history)
189
  llm_response_text = ""
190
  for text_chunk in response_stream:
191
+ llm_response_text += text_chunk
192
  chat_history[-1] = (text_input, llm_response_text)
193
  yield chat_history
194
 
 
237
  outputs=[s2s_chatbot, s2s_audio_out, s2s_text_out],
238
  queue=True
239
  ).then(
240
+ fn=lambda: gr.Audio(value=None),
241
  inputs=None,
242
  outputs=s2s_audio_in
243
  )
 
245
  t2t_submit_btn.click(
246
  fn=t2t_pipeline,
247
  inputs=[t2t_text_in, t2t_chatbot],
248
+ outputs=[t2t_chatbot],
249
  queue=True
250
  ).then(
251
  fn=clear_textbox,
 
253
  outputs=t2t_text_in
254
  )
255
 
 
256
  t2t_text_in.submit(
257
  fn=t2t_pipeline,
258
  inputs=[t2t_text_in, t2t_chatbot],
 
264
  outputs=t2t_text_in
265
  )
266
 
 
267
  tool_s2t_btn.click(
268
  fn=assistant.transcribe_audio,
269
  inputs=tool_s2t_audio_in,