Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -275,7 +275,7 @@ async def download_voice_file(media_url: str, filename: str) -> str:
|
|
275 |
return None
|
276 |
|
277 |
async def transcribe_voice_with_openai(file_path: str) -> str:
|
278 |
-
"""Transcribe voice file using OpenAI Whisper with
|
279 |
try:
|
280 |
# Check if file exists and has content
|
281 |
if not os.path.exists(file_path):
|
@@ -289,19 +289,11 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
|
|
289 |
|
290 |
logger.info(f"[Transcribe] Transcribing file: {file_path} (size: {file_size} bytes)")
|
291 |
|
292 |
-
#
|
293 |
system_prompt = """
|
294 |
-
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant.
|
295 |
|
296 |
-
|
297 |
-
|
298 |
-
IMPORTANT RULES:
|
299 |
-
1. ONLY transcribe English or Urdu speech
|
300 |
-
2. If you hear any other language, transcribe as "unclear audio"
|
301 |
-
3. If you hear unclear audio, transcribe as "unclear audio"
|
302 |
-
4. Never transcribe gibberish or random characters
|
303 |
-
5. Keep transcriptions simple and clean
|
304 |
-
6. Reject non-English/Urdu languages completely
|
305 |
|
306 |
PRODUCT NAMES (exact spelling required):
|
307 |
- Hydropex, Respira Aid Plus, Heposel, Bromacid, Hexatox
|
@@ -310,22 +302,25 @@ PRODUCT NAMES (exact spelling required):
|
|
310 |
- Apvita Plus, B-G Aspro-C, EC-Immune, Liverpex, Symodex
|
311 |
- Respira Aid, Adek Gold, Immuno DX
|
312 |
|
|
|
|
|
|
|
|
|
313 |
MENU COMMANDS:
|
314 |
-
-
|
315 |
-
-
|
316 |
-
- Options: option, number, choice, select
|
317 |
|
318 |
GREETINGS:
|
319 |
- English: hi, hello, hey, good morning, good afternoon, good evening
|
320 |
- Urdu: salam, assalamu alaikum, adaab, namaste, khuda hafiz
|
321 |
|
322 |
TRANSCRIPTION RULES:
|
323 |
-
1.
|
324 |
-
2.
|
325 |
-
3.
|
326 |
-
4.
|
327 |
-
5.
|
328 |
-
6.
|
329 |
|
330 |
EXAMPLES:
|
331 |
- "hydropex" -> "hydropex"
|
@@ -334,11 +329,12 @@ EXAMPLES:
|
|
334 |
- "main menu" -> "main"
|
335 |
- "salam" -> "salam"
|
336 |
- "search products" -> "search products"
|
337 |
-
-
|
338 |
-
-
|
|
|
339 |
"""
|
340 |
|
341 |
-
# First attempt with
|
342 |
with open(file_path, 'rb') as audio_file:
|
343 |
transcript = openai.Audio.transcribe(
|
344 |
model="whisper-1",
|
@@ -357,6 +353,8 @@ EXAMPLES:
|
|
357 |
urdu_system_prompt = """
|
358 |
You are transcribing Urdu voice messages for Apex Biotical Veterinary WhatsApp Assistant.
|
359 |
|
|
|
|
|
360 |
PRODUCT NAMES (Urdu/English):
|
361 |
- ہائیڈروپیکس (Hydropex)
|
362 |
- ریسپیرا ایڈ پلس (Respira Aid Plus)
|
@@ -369,12 +367,11 @@ PRODUCT NAMES (Urdu/English):
|
|
369 |
- فائٹو سال (PHYTO-SAL)
|
370 |
- مائیکوپیکس سپر (Mycopex Super)
|
371 |
|
372 |
-
URDU NUMBERS:
|
373 |
- ایک (1), دو (2), تین (3), چار (4), پانچ (5)
|
374 |
- چھ (6), سات (7), آٹھ (8), نو (9), دس (10)
|
375 |
- گیارہ (11), بارہ (12), تیرہ (13), چودہ (14), پندرہ (15)
|
376 |
- سولہ (16), سترہ (17), اٹھارہ (18), انیس (19), بیس (20)
|
377 |
-
- اکیس (21), بائیس (22), تئیس (23)
|
378 |
|
379 |
URDU GREETINGS:
|
380 |
- سلام (salam), السلام علیکم (assalamu alaikum)
|
@@ -386,11 +383,12 @@ URDU MENU COMMANDS:
|
|
386 |
- کیٹلاگ (catalog), رابطہ (contact), دستیابی (availability)
|
387 |
|
388 |
TRANSCRIPTION RULES:
|
389 |
-
1.
|
390 |
-
2.
|
391 |
-
3.
|
392 |
4. Preserve product names exactly
|
393 |
-
5.
|
|
|
394 |
"""
|
395 |
|
396 |
with open(file_path, 'rb') as audio_file:
|
@@ -403,59 +401,16 @@ TRANSCRIPTION RULES:
|
|
403 |
|
404 |
transcribed_text = transcript.text.strip()
|
405 |
logger.info(f"[Transcribe] Second attempt transcribed (Urdu): '{transcribed_text}'")
|
406 |
-
|
407 |
-
# Third attempt with mixed language prompt if still failing
|
408 |
-
if not transcribed_text or len(transcribed_text.strip()) < 2:
|
409 |
-
logger.warning(f"[Transcribe] Second attempt failed, trying with mixed language prompt")
|
410 |
-
|
411 |
-
mixed_system_prompt = """
|
412 |
-
You are transcribing voice messages for a veterinary products WhatsApp assistant. The user may speak in English, Urdu, or a mix of both languages.
|
413 |
-
|
414 |
-
PRODUCT NAMES (exact spelling required):
|
415 |
-
Hydropex, Respira Aid Plus, Heposel, Bromacid, Hexatox, APMA Fort, Para C.E, Tribiotic, PHYTO-SAL, Mycopex Super, Eflin KT-20, Salcozine ST-30, Oftilex UA-10, Biscomin 10, Apvita Plus, B-G Aspro-C, EC-Immune, Liverpex, Symodex, Respira Aid, Adek Gold, Immuno DX
|
416 |
-
|
417 |
-
NUMBERS (convert to digits):
|
418 |
-
English: one->1, two->2, three->3, etc.
|
419 |
-
Urdu: aik->1, ek->1, do->2, teen->3, etc.
|
420 |
-
|
421 |
-
MENU COMMANDS:
|
422 |
-
main, menu, back, home, start, option, number, search, browse, download, catalog, contact, availability
|
423 |
-
|
424 |
-
GREETINGS:
|
425 |
-
hi, hello, salam, assalamu alaikum, adaab, namaste
|
426 |
-
|
427 |
-
TRANSCRIPTION RULES:
|
428 |
-
1. Transcribe exactly what you hear
|
429 |
-
2. Convert numbers to digits
|
430 |
-
3. Preserve product names exactly
|
431 |
-
4. Handle both languages
|
432 |
-
5. Convert menu selections to numbers
|
433 |
-
"""
|
434 |
-
|
435 |
-
with open(file_path, 'rb') as audio_file:
|
436 |
-
transcript = openai.Audio.transcribe(
|
437 |
-
model="whisper-1",
|
438 |
-
file=audio_file,
|
439 |
-
prompt=mixed_system_prompt
|
440 |
-
)
|
441 |
-
|
442 |
-
transcribed_text = transcript.text.strip()
|
443 |
-
logger.info(f"[Transcribe] Third attempt (mixed) transcribed: '{transcribed_text}'")
|
444 |
|
445 |
-
# Final check for
|
446 |
if not transcribed_text or len(transcribed_text.strip()) < 2:
|
447 |
logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
|
448 |
return "unclear audio"
|
449 |
|
450 |
-
# Check for
|
451 |
-
if len(transcribed_text) > 10 and not re.search(r'[a-zA-Z\u0600-\u06FF]', transcribed_text):
|
452 |
-
logger.warning(f"[Transcribe] Gibberish detected: '{transcribed_text}'")
|
453 |
-
return "unclear audio"
|
454 |
-
|
455 |
-
# Check for too many special characters
|
456 |
special_char_ratio = len(re.findall(r'[^\w\s]', transcribed_text)) / len(transcribed_text)
|
457 |
-
if special_char_ratio > 0.
|
458 |
-
logger.warning(f"[Transcribe] Too many special characters: '{transcribed_text}'")
|
459 |
return "unclear audio"
|
460 |
|
461 |
return transcribed_text
|
|
|
275 |
return None
|
276 |
|
277 |
async def transcribe_voice_with_openai(file_path: str) -> str:
|
278 |
+
"""Transcribe voice file using OpenAI Whisper with intelligent English/Urdu focus"""
|
279 |
try:
|
280 |
# Check if file exists and has content
|
281 |
if not os.path.exists(file_path):
|
|
|
289 |
|
290 |
logger.info(f"[Transcribe] Transcribing file: {file_path} (size: {file_size} bytes)")
|
291 |
|
292 |
+
# Intelligent English/Urdu focused system prompt
|
293 |
system_prompt = """
|
294 |
+
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant.
|
295 |
|
296 |
+
FOCUS: The user will speak in English, Urdu, or a mix of both languages. Be intelligent and natural in understanding their speech.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
PRODUCT NAMES (exact spelling required):
|
299 |
- Hydropex, Respira Aid Plus, Heposel, Bromacid, Hexatox
|
|
|
302 |
- Apvita Plus, B-G Aspro-C, EC-Immune, Liverpex, Symodex
|
303 |
- Respira Aid, Adek Gold, Immuno DX
|
304 |
|
305 |
+
ENGLISH NUMBERS: one->1, two->2, three->3, four->4, five->5, six->6, seven->7, eight->8, nine->9, ten->10
|
306 |
+
|
307 |
+
URDU NUMBERS: aik->1, ek->1, do->2, teen->3, char->4, panch->5, cheh->6, saat->7, aath->8, nau->9, das->10
|
308 |
+
|
309 |
MENU COMMANDS:
|
310 |
+
- English: main, menu, back, home, start, option, number, search, browse, download, catalog, contact, availability
|
311 |
+
- Urdu: main menu, option, number, search, browse, download, catalog, contact, availability
|
|
|
312 |
|
313 |
GREETINGS:
|
314 |
- English: hi, hello, hey, good morning, good afternoon, good evening
|
315 |
- Urdu: salam, assalamu alaikum, adaab, namaste, khuda hafiz
|
316 |
|
317 |
TRANSCRIPTION RULES:
|
318 |
+
1. Intelligently transcribe English and Urdu speech
|
319 |
+
2. Handle mixed English-Urdu speech naturally
|
320 |
+
3. Convert numbers to digits
|
321 |
+
4. Preserve product names exactly
|
322 |
+
5. Only return "unclear audio" if the voice is genuinely unclear or inaudible
|
323 |
+
6. Be natural and conversational in understanding
|
324 |
|
325 |
EXAMPLES:
|
326 |
- "hydropex" -> "hydropex"
|
|
|
329 |
- "main menu" -> "main"
|
330 |
- "salam" -> "salam"
|
331 |
- "search products" -> "search products"
|
332 |
+
- "how many products" -> "how many products"
|
333 |
+
- "kitne products hain" -> "kitne products hain"
|
334 |
+
- Genuinely unclear audio -> "unclear audio"
|
335 |
"""
|
336 |
|
337 |
+
# First attempt with intelligent English/Urdu focus
|
338 |
with open(file_path, 'rb') as audio_file:
|
339 |
transcript = openai.Audio.transcribe(
|
340 |
model="whisper-1",
|
|
|
353 |
urdu_system_prompt = """
|
354 |
You are transcribing Urdu voice messages for Apex Biotical Veterinary WhatsApp Assistant.
|
355 |
|
356 |
+
FOCUS: The user will speak in Urdu, English, or a mix of both. Be intelligent and natural.
|
357 |
+
|
358 |
PRODUCT NAMES (Urdu/English):
|
359 |
- ہائیڈروپیکس (Hydropex)
|
360 |
- ریسپیرا ایڈ پلس (Respira Aid Plus)
|
|
|
367 |
- فائٹو سال (PHYTO-SAL)
|
368 |
- مائیکوپیکس سپر (Mycopex Super)
|
369 |
|
370 |
+
URDU NUMBERS (convert to digits):
|
371 |
- ایک (1), دو (2), تین (3), چار (4), پانچ (5)
|
372 |
- چھ (6), سات (7), آٹھ (8), نو (9), دس (10)
|
373 |
- گیارہ (11), بارہ (12), تیرہ (13), چودہ (14), پندرہ (15)
|
374 |
- سولہ (16), سترہ (17), اٹھارہ (18), انیس (19), بیس (20)
|
|
|
375 |
|
376 |
URDU GREETINGS:
|
377 |
- سلام (salam), السلام علیکم (assalamu alaikum)
|
|
|
383 |
- کیٹلاگ (catalog), رابطہ (contact), دستیابی (availability)
|
384 |
|
385 |
TRANSCRIPTION RULES:
|
386 |
+
1. Intelligently transcribe Urdu and English speech
|
387 |
+
2. Handle mixed language naturally
|
388 |
+
3. Convert Urdu numbers to digits
|
389 |
4. Preserve product names exactly
|
390 |
+
5. Only return "unclear audio" if voice is genuinely unclear
|
391 |
+
6. Be natural and conversational
|
392 |
"""
|
393 |
|
394 |
with open(file_path, 'rb') as audio_file:
|
|
|
401 |
|
402 |
transcribed_text = transcript.text.strip()
|
403 |
logger.info(f"[Transcribe] Second attempt transcribed (Urdu): '{transcribed_text}'")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
+
# Final check for genuinely unclear audio
|
406 |
if not transcribed_text or len(transcribed_text.strip()) < 2:
|
407 |
logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
|
408 |
return "unclear audio"
|
409 |
|
410 |
+
# Check for too many special characters (indicates unclear audio)
|
|
|
|
|
|
|
|
|
|
|
411 |
special_char_ratio = len(re.findall(r'[^\w\s]', transcribed_text)) / len(transcribed_text)
|
412 |
+
if special_char_ratio > 0.5: # More than 50% special characters
|
413 |
+
logger.warning(f"[Transcribe] Too many special characters, unclear audio: '{transcribed_text}'")
|
414 |
return "unclear audio"
|
415 |
|
416 |
return transcribed_text
|