DreamStream-1 commited on
Commit
ae9f70f
·
verified ·
1 Parent(s): 34d3fc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -37
app.py CHANGED
@@ -293,15 +293,17 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
293
  system_prompt = """
294
  You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
295
 
296
- IMPORTANT: ONLY TRANSCRIBE ENGLISH OR URDU SPEECH. IGNORE ALL OTHER LANGUAGES.
297
 
298
  CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
299
 
300
- LANGUAGE RESTRICTION:
301
- - ONLY English (en) or Urdu (ur) are allowed
302
- - If you detect any other language, force it to English
303
- - Never transcribe in German, French, Spanish, Italian, or any other language
304
- - Always assume English or Urdu speech patterns
 
 
305
 
306
  PRODUCT NAMES (Veterinary Products):
307
  - Hydropex (electrolyte supplement)
@@ -3418,34 +3420,20 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
3418
  logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
3419
  transcribed_text = corrected_text
3420
 
3421
- # Detect language of transcribed text - FORCE ENGLISH OR URDU ONLY
3422
  detected_lang = 'en' # Default to English
3423
  try:
3424
  detected_lang = detect(transcribed_text)
3425
  logger.info(f"[Voice] Raw detected language: {detected_lang}")
3426
 
3427
- # FORCE LANGUAGE TO ENGLISH OR URDU ONLY
3428
- # Map all languages to either English or Urdu
3429
- lang_mapping = {
3430
- 'ur': 'ur', # Urdu
3431
- 'ar': 'ur', # Arabic (treat as Urdu for Islamic greetings)
3432
- 'en': 'en', # English
3433
- 'hi': 'ur', # Hindi (treat as Urdu)
3434
- 'bn': 'ur', # Bengali (treat as Urdu)
3435
- 'pa': 'ur', # Punjabi (treat as Urdu)
3436
- 'id': 'ur', # Indonesian (treat as Urdu)
3437
- 'ms': 'ur', # Malay (treat as Urdu)
3438
- 'tr': 'ur', # Turkish (treat as Urdu)
3439
- 'de': 'en', # German -> English
3440
- 'fr': 'en', # French -> English
3441
- 'es': 'en', # Spanish -> English
3442
- 'it': 'en', # Italian -> English
3443
- 'pt': 'en', # Portuguese -> English
3444
- 'ru': 'en', # Russian -> English
3445
- 'ja': 'en', # Japanese -> English
3446
- 'ko': 'en', # Korean -> English
3447
- 'zh': 'en', # Chinese -> English
3448
- }
3449
 
3450
  # Check if text contains Urdu/Arabic characters or Islamic greetings
3451
  urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
@@ -3456,16 +3444,10 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
3456
 
3457
  if has_urdu_chars or has_islamic_greeting:
3458
  detected_lang = 'ur'
 
3459
  logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
3460
 
3461
- # Force language to English or Urdu only
3462
- reply_language = lang_mapping.get(detected_lang, 'en')
3463
- logger.info(f"[Voice] Language '{detected_lang}' FORCED to: {reply_language}")
3464
-
3465
- # Additional safety check - if still not English or Urdu, force to English
3466
- if reply_language not in ['en', 'ur']:
3467
- logger.warning(f"[Voice] Language '{reply_language}' not in allowed list, forcing to English")
3468
- reply_language = 'en'
3469
 
3470
  except Exception as e:
3471
  logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")
 
293
  system_prompt = """
294
  You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
295
 
296
+ CRITICAL LANGUAGE RESTRICTION: ONLY ENGLISH OR URDU - NOTHING ELSE
297
 
298
  CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
299
 
300
+ STRICT LANGUAGE RULES:
301
+ - ONLY transcribe English (en) or Urdu (ur) speech
302
+ - REJECT all other languages completely
303
+ - If you hear any language other than English or Urdu, transcribe as English
304
+ - Never transcribe German, French, Spanish, Italian, Portuguese, Russian, Chinese, Japanese, Korean, or any other language
305
+ - Always default to English if uncertain
306
+ - This is a veterinary assistant - users will speak in English or Urdu only
307
 
308
  PRODUCT NAMES (Veterinary Products):
309
  - Hydropex (electrolyte supplement)
 
3420
  logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
3421
  transcribed_text = corrected_text
3422
 
3423
+ # Detect language of transcribed text - STRICTLY ENGLISH OR URDU ONLY
3424
  detected_lang = 'en' # Default to English
3425
  try:
3426
  detected_lang = detect(transcribed_text)
3427
  logger.info(f"[Voice] Raw detected language: {detected_lang}")
3428
 
3429
+ # STRICTLY ENGLISH OR URDU ONLY - NO OTHER LANGUAGES
3430
+ # Only allow English and Urdu, reject everything else
3431
+ if detected_lang in ['en', 'ur']:
3432
+ reply_language = detected_lang
3433
+ else:
3434
+ # Force any other language to English
3435
+ reply_language = 'en'
3436
+ logger.warning(f"[Voice] Detected language '{detected_lang}' is not English or Urdu, forcing to English")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3437
 
3438
  # Check if text contains Urdu/Arabic characters or Islamic greetings
3439
  urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
 
3444
 
3445
  if has_urdu_chars or has_islamic_greeting:
3446
  detected_lang = 'ur'
3447
+ reply_language = 'ur'
3448
  logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
3449
 
3450
+ logger.info(f"[Voice] Final language set to: {reply_language}")
 
 
 
 
 
 
 
3451
 
3452
  except Exception as e:
3453
  logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")