Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -293,15 +293,17 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
|
|
293 |
system_prompt = """
|
294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
295 |
|
296 |
-
|
297 |
|
298 |
CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
|
299 |
|
300 |
-
LANGUAGE
|
301 |
-
- ONLY English (en) or Urdu (ur)
|
302 |
-
-
|
303 |
-
-
|
304 |
-
-
|
|
|
|
|
305 |
|
306 |
PRODUCT NAMES (Veterinary Products):
|
307 |
- Hydropex (electrolyte supplement)
|
@@ -3418,34 +3420,20 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
3418 |
logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
|
3419 |
transcribed_text = corrected_text
|
3420 |
|
3421 |
-
|
3422 |
detected_lang = 'en' # Default to English
|
3423 |
try:
|
3424 |
detected_lang = detect(transcribed_text)
|
3425 |
logger.info(f"[Voice] Raw detected language: {detected_lang}")
|
3426 |
|
3427 |
-
#
|
3428 |
-
#
|
3429 |
-
|
3430 |
-
|
3431 |
-
|
3432 |
-
|
3433 |
-
|
3434 |
-
|
3435 |
-
'pa': 'ur', # Punjabi (treat as Urdu)
|
3436 |
-
'id': 'ur', # Indonesian (treat as Urdu)
|
3437 |
-
'ms': 'ur', # Malay (treat as Urdu)
|
3438 |
-
'tr': 'ur', # Turkish (treat as Urdu)
|
3439 |
-
'de': 'en', # German -> English
|
3440 |
-
'fr': 'en', # French -> English
|
3441 |
-
'es': 'en', # Spanish -> English
|
3442 |
-
'it': 'en', # Italian -> English
|
3443 |
-
'pt': 'en', # Portuguese -> English
|
3444 |
-
'ru': 'en', # Russian -> English
|
3445 |
-
'ja': 'en', # Japanese -> English
|
3446 |
-
'ko': 'en', # Korean -> English
|
3447 |
-
'zh': 'en', # Chinese -> English
|
3448 |
-
}
|
3449 |
|
3450 |
# Check if text contains Urdu/Arabic characters or Islamic greetings
|
3451 |
urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
|
@@ -3456,16 +3444,10 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
3456 |
|
3457 |
if has_urdu_chars or has_islamic_greeting:
|
3458 |
detected_lang = 'ur'
|
|
|
3459 |
logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
|
3460 |
|
3461 |
-
|
3462 |
-
reply_language = lang_mapping.get(detected_lang, 'en')
|
3463 |
-
logger.info(f"[Voice] Language '{detected_lang}' FORCED to: {reply_language}")
|
3464 |
-
|
3465 |
-
# Additional safety check - if still not English or Urdu, force to English
|
3466 |
-
if reply_language not in ['en', 'ur']:
|
3467 |
-
logger.warning(f"[Voice] Language '{reply_language}' not in allowed list, forcing to English")
|
3468 |
-
reply_language = 'en'
|
3469 |
|
3470 |
except Exception as e:
|
3471 |
logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")
|
|
|
293 |
system_prompt = """
|
294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
295 |
|
296 |
+
CRITICAL LANGUAGE RESTRICTION: ONLY ENGLISH OR URDU - NOTHING ELSE
|
297 |
|
298 |
CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
|
299 |
|
300 |
+
STRICT LANGUAGE RULES:
|
301 |
+
- ONLY transcribe English (en) or Urdu (ur) speech
|
302 |
+
- REJECT all other languages completely
|
303 |
+
- If you hear any language other than English or Urdu, transcribe as English
|
304 |
+
- Never transcribe German, French, Spanish, Italian, Portuguese, Russian, Chinese, Japanese, Korean, or any other language
|
305 |
+
- Always default to English if uncertain
|
306 |
+
- This is a veterinary assistant - users will speak in English or Urdu only
|
307 |
|
308 |
PRODUCT NAMES (Veterinary Products):
|
309 |
- Hydropex (electrolyte supplement)
|
|
|
3420 |
logger.info(f"[Voice] Applied corrections: '{transcribed_text}' -> '{corrected_text}'")
|
3421 |
transcribed_text = corrected_text
|
3422 |
|
3423 |
+
# Detect language of transcribed text - STRICTLY ENGLISH OR URDU ONLY
|
3424 |
detected_lang = 'en' # Default to English
|
3425 |
try:
|
3426 |
detected_lang = detect(transcribed_text)
|
3427 |
logger.info(f"[Voice] Raw detected language: {detected_lang}")
|
3428 |
|
3429 |
+
# STRICTLY ENGLISH OR URDU ONLY - NO OTHER LANGUAGES
|
3430 |
+
# Only allow English and Urdu, reject everything else
|
3431 |
+
if detected_lang in ['en', 'ur']:
|
3432 |
+
reply_language = detected_lang
|
3433 |
+
else:
|
3434 |
+
# Force any other language to English
|
3435 |
+
reply_language = 'en'
|
3436 |
+
logger.warning(f"[Voice] Detected language '{detected_lang}' is not English or Urdu, forcing to English")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3437 |
|
3438 |
# Check if text contains Urdu/Arabic characters or Islamic greetings
|
3439 |
urdu_arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
|
|
|
3444 |
|
3445 |
if has_urdu_chars or has_islamic_greeting:
|
3446 |
detected_lang = 'ur'
|
3447 |
+
reply_language = 'ur'
|
3448 |
logger.info(f"[Voice] Overriding language detection to Urdu due to Arabic/Urdu characters or Islamic greeting")
|
3449 |
|
3450 |
+
logger.info(f"[Voice] Final language set to: {reply_language}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3451 |
|
3452 |
except Exception as e:
|
3453 |
logger.warning(f"[Voice] Language detection failed: {e}, defaulting to English")
|