DreamStream-1 commited on
Commit
de997e1
·
verified ·
1 Parent(s): 18558c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -72
app.py CHANGED
@@ -293,81 +293,48 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
293
  system_prompt = """
294
  You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
295
 
296
- CRITICAL LANGUAGE RESTRICTION: ONLY ENGLISH OR URDU - NOTHING ELSE
297
-
298
- CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
299
-
300
- STRICT LANGUAGE RULES:
301
- - ONLY transcribe English (en) or Urdu (ur) speech
302
- - REJECT all other languages completely
303
- - If you hear any language other than English or Urdu, transcribe as English
304
- - Never transcribe German, French, Spanish, Italian, Portuguese, Russian, Chinese, Japanese, Korean, or any other language
305
- - Always default to English if uncertain
306
- - This is a veterinary assistant - users will speak in English or Urdu only
307
-
308
- PRODUCT NAMES (Veterinary Products):
309
- - Hydropex (electrolyte supplement)
310
- - Respira Aid Plus (respiratory support)
311
- - Heposel (liver tonic)
312
- - Bromacid (respiratory/mucolytic)
313
- - Hexatox (liver & kidney support)
314
- - APMA Fort (mycotoxin binder)
315
- - Para C.E (heat stress support)
316
- - Tribiotic (antibiotic)
317
- - PHYTO-SAL (phytogenic supplement)
318
- - Mycopex Super (mycotoxin binder)
319
- - Eflin KT-20 (antibiotic)
320
- - Salcozine ST-30 (anticoccidial)
321
- - Oftilex UA-10 (antibiotic)
322
- - Biscomin 10 (injectable antibiotic)
323
- - Apvita Plus (vitamin supplement)
324
- - B-G Aspro-C (aspirin + vitamin C)
325
- - EC-Immune (immune booster)
326
- - Liverpex (liver tonic)
327
- - Symodex (multivitamin)
328
- - Respira Aid (respiratory support)
329
- - Adek Gold (multivitamin)
330
- - Immuno DX (immune enhancer)
331
-
332
- MENU SELECTIONS:
333
- - Main menu options: 1, 2, 3, 4
334
- - Product numbers: 1-23
335
- - Category numbers: 1-10
336
- - Navigation: main, menu, back, home, start
337
 
338
- NUMBERS (English & Urdu):
339
- English: one, two, three, four, five, six, seven, eight, nine, ten, eleven, twelve, thirteen, fourteen, fifteen, sixteen, seventeen, eighteen, nineteen, twenty, twenty-one, twenty-two, twenty-three
340
- Urdu (Roman): aik, ek, do, teen, char, panch, che, sat, ath, nau, das, gyara, bara, tera, choda, pandra, sola, satara, athara, unnees, bees, ikkees, baees, tees
341
- Urdu (Script): ایک, دو, تین, چار, پانچ, چھ, سات, آٹھ, نو, دس, گیارہ, بارہ, تیرہ, چودہ, پندرہ, سولہ, سترہ, اٹھارہ, انیس, بیس, اکیس, بائیس, تئیس
 
 
 
342
 
343
- COMMON GREETINGS:
344
- English: hi, hello, hey, good morning, good afternoon, good evening, how are you
345
- Urdu: salam, assalamu alaikum, adaab, namaste, khuda hafiz
 
 
 
346
 
347
  MENU COMMANDS:
348
- English: search, browse, download, catalog, contact, availability, main menu, option, number, choice
349
- Urdu: تلاش, براؤز, ڈاؤن لوڈ, کیٹلاگ, رابطہ, دستیابی, مین مینو, آپشن, نمبر, اختیار
 
 
 
 
 
350
 
351
  TRANSCRIPTION RULES:
352
- 1. ONLY transcribe English or Urdu speech
353
- 2. Transcribe product names exactly as listed above
354
- 3. Convert spoken numbers to digits (1, 2, 3, etc.)
355
- 4. Handle both English and Urdu speech
356
- 5. Preserve exact spelling for product names
357
- 6. Convert menu selections to numbers
358
- 7. Handle common transcription errors (opium->option, numara->number)
359
- 8. Maintain context for veterinary domain
360
- 9. If unsure about language, default to English
361
 
362
  EXAMPLES:
363
  - "hydropex" -> "hydropex"
364
  - "respira aid plus" -> "respira aid plus"
365
- - "option number one" -> "1"
366
- - "aik" -> "1"
367
- - "do" -> "2"
368
  - "main menu" -> "main"
369
  - "salam" -> "salam"
370
  - "search products" -> "search products"
 
371
  """
372
 
373
  # First attempt with comprehensive system prompt
@@ -474,10 +441,21 @@ TRANSCRIPTION RULES:
474
  transcribed_text = transcript.text.strip()
475
  logger.info(f"[Transcribe] Third attempt (mixed) transcribed: '{transcribed_text}'")
476
 
477
- # Final check for empty transcription
478
  if not transcribed_text or len(transcribed_text.strip()) < 2:
479
  logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
480
- return ""
 
 
 
 
 
 
 
 
 
 
 
481
 
482
  return transcribed_text
483
 
@@ -2599,8 +2577,8 @@ Response:
2599
  if reply_language == 'ur':
2600
  try:
2601
  # Get all product and category names
2602
- product_names = [p.get('Product Name', '') for p in all_products if p.get('Product Name')]
2603
- category_names = list(set([p.get('Category', '') for p in all_products if p.get('Category')]))
2604
  translated_response = GoogleTranslator(source='auto', target='ur').translate(ai_response)
2605
  # Restore English terms
2606
  translated_response = restore_english_terms(translated_response, ai_response, product_names, category_names)
@@ -3431,21 +3409,23 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
3431
  except:
3432
  pass
3433
 
3434
- # Handle empty or failed transcription
3435
- if not transcribed_text or transcribed_text.strip() == "":
3436
- logger.warning(f"[Voice] Empty transcription for {from_number}")
3437
  send_whatsjet_message(from_number,
3438
  "🎤 *Voice Message Issue*\n\n"
3439
- "I couldn't hear anything in your voice message. This can happen due to:\n"
3440
  "• Very short voice note\n"
3441
  "• Background noise\n"
3442
  "• Microphone too far away\n"
3443
- "• Audio quality issues\n\n"
 
3444
  "💡 *Tips for better voice notes:*\n"
3445
  "• Speak clearly and slowly\n"
3446
  "• Keep phone close to mouth\n"
3447
  "• Record in quiet environment\n"
3448
- "• Make voice note at least 2-3 seconds\n\n"
 
3449
  "💬 *You can also:*\n"
3450
  "• Send a text message\n"
3451
  "• Type 'main' to see menu options\n"
 
293
  system_prompt = """
294
  You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
295
 
296
+ CRITICAL: TRANSCRIBE ONLY ENGLISH OR URDU SPEECH - NOTHING ELSE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
+ IMPORTANT RULES:
299
+ 1. ONLY transcribe English or Urdu speech
300
+ 2. If you hear unclear audio, transcribe as English
301
+ 3. If you hear mixed languages, transcribe as English
302
+ 4. Never transcribe gibberish or random characters
303
+ 5. If audio is unclear, transcribe as "unclear audio"
304
+ 6. Keep transcriptions simple and clean
305
 
306
+ PRODUCT NAMES (exact spelling required):
307
+ - Hydropex, Respira Aid Plus, Heposel, Bromacid, Hexatox
308
+ - APMA Fort, Para C.E, Tribiotic, PHYTO-SAL, Mycopex Super
309
+ - Eflin KT-20, Salcozine ST-30, Oftilex UA-10, Biscomin 10
310
+ - Apvita Plus, B-G Aspro-C, EC-Immune, Liverpex, Symodex
311
+ - Respira Aid, Adek Gold, Immuno DX
312
 
313
  MENU COMMANDS:
314
+ - Numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
315
+ - Navigation: main, menu, back, home, start
316
+ - Options: option, number, choice, select
317
+
318
+ GREETINGS:
319
+ - English: hi, hello, hey, good morning, good afternoon, good evening
320
+ - Urdu: salam, assalamu alaikum, adaab, namaste, khuda hafiz
321
 
322
  TRANSCRIPTION RULES:
323
+ 1. Transcribe exactly what you hear in English or Urdu
324
+ 2. Convert numbers to digits (one->1, two->2, etc.)
325
+ 3. Preserve product names exactly
326
+ 4. If unclear, transcribe as "unclear audio"
327
+ 5. Keep it simple and clean
328
+ 6. No random characters or mixed languages
 
 
 
329
 
330
  EXAMPLES:
331
  - "hydropex" -> "hydropex"
332
  - "respira aid plus" -> "respira aid plus"
333
+ - "option one" -> "1"
 
 
334
  - "main menu" -> "main"
335
  - "salam" -> "salam"
336
  - "search products" -> "search products"
337
+ - Unclear audio -> "unclear audio"
338
  """
339
 
340
  # First attempt with comprehensive system prompt
 
441
  transcribed_text = transcript.text.strip()
442
  logger.info(f"[Transcribe] Third attempt (mixed) transcribed: '{transcribed_text}'")
443
 
444
+ # Final check for empty transcription or unclear audio
445
  if not transcribed_text or len(transcribed_text.strip()) < 2:
446
  logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
447
+ return "unclear audio"
448
+
449
+ # Check for gibberish or mixed characters
450
+ if len(transcribed_text) > 10 and not re.search(r'[a-zA-Z\u0600-\u06FF]', transcribed_text):
451
+ logger.warning(f"[Transcribe] Gibberish detected: '{transcribed_text}'")
452
+ return "unclear audio"
453
+
454
+ # Check for too many special characters
455
+ special_char_ratio = len(re.findall(r'[^\w\s]', transcribed_text)) / len(transcribed_text)
456
+ if special_char_ratio > 0.3:
457
+ logger.warning(f"[Transcribe] Too many special characters: '{transcribed_text}'")
458
+ return "unclear audio"
459
 
460
  return transcribed_text
461
 
 
2577
  if reply_language == 'ur':
2578
  try:
2579
  # Get all product and category names
2580
+ product_names = [str(p.get('Product Name', '')) for p in all_products if p.get('Product Name')]
2581
+ category_names = list(set([str(p.get('Category', '')) for p in all_products if p.get('Category')]))
2582
  translated_response = GoogleTranslator(source='auto', target='ur').translate(ai_response)
2583
  # Restore English terms
2584
  translated_response = restore_english_terms(translated_response, ai_response, product_names, category_names)
 
3409
  except:
3410
  pass
3411
 
3412
+ # Handle empty, failed, or unclear transcription
3413
+ if not transcribed_text or transcribed_text.strip() == "" or transcribed_text.lower() == "unclear audio":
3414
+ logger.warning(f"[Voice] Empty or unclear transcription for {from_number}: '{transcribed_text}'")
3415
  send_whatsjet_message(from_number,
3416
  "🎤 *Voice Message Issue*\n\n"
3417
+ "I couldn't understand your voice message clearly. This can happen due to:\n"
3418
  "• Very short voice note\n"
3419
  "• Background noise\n"
3420
  "• Microphone too far away\n"
3421
+ "• Audio quality issues\n"
3422
+ "• Speaking too fast\n\n"
3423
  "💡 *Tips for better voice notes:*\n"
3424
  "• Speak clearly and slowly\n"
3425
  "• Keep phone close to mouth\n"
3426
  "• Record in quiet environment\n"
3427
+ "• Make voice note at least 2-3 seconds\n"
3428
+ "• Speak in English or Urdu only\n\n"
3429
  "💬 *You can also:*\n"
3430
  "• Send a text message\n"
3431
  "• Type 'main' to see menu options\n"