Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -293,81 +293,48 @@ async def transcribe_voice_with_openai(file_path: str) -> str:
|
|
293 |
system_prompt = """
|
294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
295 |
|
296 |
-
CRITICAL
|
297 |
-
|
298 |
-
CONTEXT: Users can speak product names, menu selections, numbers, and general queries in English or Urdu ONLY.
|
299 |
-
|
300 |
-
STRICT LANGUAGE RULES:
|
301 |
-
- ONLY transcribe English (en) or Urdu (ur) speech
|
302 |
-
- REJECT all other languages completely
|
303 |
-
- If you hear any language other than English or Urdu, transcribe as English
|
304 |
-
- Never transcribe German, French, Spanish, Italian, Portuguese, Russian, Chinese, Japanese, Korean, or any other language
|
305 |
-
- Always default to English if uncertain
|
306 |
-
- This is a veterinary assistant - users will speak in English or Urdu only
|
307 |
-
|
308 |
-
PRODUCT NAMES (Veterinary Products):
|
309 |
-
- Hydropex (electrolyte supplement)
|
310 |
-
- Respira Aid Plus (respiratory support)
|
311 |
-
- Heposel (liver tonic)
|
312 |
-
- Bromacid (respiratory/mucolytic)
|
313 |
-
- Hexatox (liver & kidney support)
|
314 |
-
- APMA Fort (mycotoxin binder)
|
315 |
-
- Para C.E (heat stress support)
|
316 |
-
- Tribiotic (antibiotic)
|
317 |
-
- PHYTO-SAL (phytogenic supplement)
|
318 |
-
- Mycopex Super (mycotoxin binder)
|
319 |
-
- Eflin KT-20 (antibiotic)
|
320 |
-
- Salcozine ST-30 (anticoccidial)
|
321 |
-
- Oftilex UA-10 (antibiotic)
|
322 |
-
- Biscomin 10 (injectable antibiotic)
|
323 |
-
- Apvita Plus (vitamin supplement)
|
324 |
-
- B-G Aspro-C (aspirin + vitamin C)
|
325 |
-
- EC-Immune (immune booster)
|
326 |
-
- Liverpex (liver tonic)
|
327 |
-
- Symodex (multivitamin)
|
328 |
-
- Respira Aid (respiratory support)
|
329 |
-
- Adek Gold (multivitamin)
|
330 |
-
- Immuno DX (immune enhancer)
|
331 |
-
|
332 |
-
MENU SELECTIONS:
|
333 |
-
- Main menu options: 1, 2, 3, 4
|
334 |
-
- Product numbers: 1-23
|
335 |
-
- Category numbers: 1-10
|
336 |
-
- Navigation: main, menu, back, home, start
|
337 |
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
|
|
|
|
|
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
|
|
|
|
|
|
|
346 |
|
347 |
MENU COMMANDS:
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
350 |
|
351 |
TRANSCRIPTION RULES:
|
352 |
-
1.
|
353 |
-
2.
|
354 |
-
3.
|
355 |
-
4.
|
356 |
-
5.
|
357 |
-
6.
|
358 |
-
7. Handle common transcription errors (opium->option, numara->number)
|
359 |
-
8. Maintain context for veterinary domain
|
360 |
-
9. If unsure about language, default to English
|
361 |
|
362 |
EXAMPLES:
|
363 |
- "hydropex" -> "hydropex"
|
364 |
- "respira aid plus" -> "respira aid plus"
|
365 |
-
- "option
|
366 |
-
- "aik" -> "1"
|
367 |
-
- "do" -> "2"
|
368 |
- "main menu" -> "main"
|
369 |
- "salam" -> "salam"
|
370 |
- "search products" -> "search products"
|
|
|
371 |
"""
|
372 |
|
373 |
# First attempt with comprehensive system prompt
|
@@ -474,10 +441,21 @@ TRANSCRIPTION RULES:
|
|
474 |
transcribed_text = transcript.text.strip()
|
475 |
logger.info(f"[Transcribe] Third attempt (mixed) transcribed: '{transcribed_text}'")
|
476 |
|
477 |
-
# Final check for empty transcription
|
478 |
if not transcribed_text or len(transcribed_text.strip()) < 2:
|
479 |
logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
|
480 |
-
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
|
482 |
return transcribed_text
|
483 |
|
@@ -2599,8 +2577,8 @@ Response:
|
|
2599 |
if reply_language == 'ur':
|
2600 |
try:
|
2601 |
# Get all product and category names
|
2602 |
-
product_names = [p.get('Product Name', '') for p in all_products if p.get('Product Name')]
|
2603 |
-
category_names = list(set([p.get('Category', '') for p in all_products if p.get('Category')]))
|
2604 |
translated_response = GoogleTranslator(source='auto', target='ur').translate(ai_response)
|
2605 |
# Restore English terms
|
2606 |
translated_response = restore_english_terms(translated_response, ai_response, product_names, category_names)
|
@@ -3431,21 +3409,23 @@ async def handle_voice_message_complete(from_number: str, msg: dict):
|
|
3431 |
except:
|
3432 |
pass
|
3433 |
|
3434 |
-
# Handle empty or
|
3435 |
-
if not transcribed_text or transcribed_text.strip() == "":
|
3436 |
-
logger.warning(f"[Voice] Empty transcription for {from_number}")
|
3437 |
send_whatsjet_message(from_number,
|
3438 |
"🎤 *Voice Message Issue*\n\n"
|
3439 |
-
"I couldn't
|
3440 |
"• Very short voice note\n"
|
3441 |
"• Background noise\n"
|
3442 |
"• Microphone too far away\n"
|
3443 |
-
"• Audio quality issues\n
|
|
|
3444 |
"💡 *Tips for better voice notes:*\n"
|
3445 |
"• Speak clearly and slowly\n"
|
3446 |
"• Keep phone close to mouth\n"
|
3447 |
"• Record in quiet environment\n"
|
3448 |
-
"• Make voice note at least 2-3 seconds\n
|
|
|
3449 |
"💬 *You can also:*\n"
|
3450 |
"• Send a text message\n"
|
3451 |
"• Type 'main' to see menu options\n"
|
|
|
293 |
system_prompt = """
|
294 |
You are transcribing voice messages for Apex Biotical Veterinary WhatsApp Assistant. This is a professional veterinary products chatbot.
|
295 |
|
296 |
+
CRITICAL: TRANSCRIBE ONLY ENGLISH OR URDU SPEECH - NOTHING ELSE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
+
IMPORTANT RULES:
|
299 |
+
1. ONLY transcribe English or Urdu speech
|
300 |
+
2. If you hear unclear audio, transcribe as English
|
301 |
+
3. If you hear mixed languages, transcribe as English
|
302 |
+
4. Never transcribe gibberish or random characters
|
303 |
+
5. If audio is unclear, transcribe as "unclear audio"
|
304 |
+
6. Keep transcriptions simple and clean
|
305 |
|
306 |
+
PRODUCT NAMES (exact spelling required):
|
307 |
+
- Hydropex, Respira Aid Plus, Heposel, Bromacid, Hexatox
|
308 |
+
- APMA Fort, Para C.E, Tribiotic, PHYTO-SAL, Mycopex Super
|
309 |
+
- Eflin KT-20, Salcozine ST-30, Oftilex UA-10, Biscomin 10
|
310 |
+
- Apvita Plus, B-G Aspro-C, EC-Immune, Liverpex, Symodex
|
311 |
+
- Respira Aid, Adek Gold, Immuno DX
|
312 |
|
313 |
MENU COMMANDS:
|
314 |
+
- Numbers: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
|
315 |
+
- Navigation: main, menu, back, home, start
|
316 |
+
- Options: option, number, choice, select
|
317 |
+
|
318 |
+
GREETINGS:
|
319 |
+
- English: hi, hello, hey, good morning, good afternoon, good evening
|
320 |
+
- Urdu: salam, assalamu alaikum, adaab, namaste, khuda hafiz
|
321 |
|
322 |
TRANSCRIPTION RULES:
|
323 |
+
1. Transcribe exactly what you hear in English or Urdu
|
324 |
+
2. Convert numbers to digits (one->1, two->2, etc.)
|
325 |
+
3. Preserve product names exactly
|
326 |
+
4. If unclear, transcribe as "unclear audio"
|
327 |
+
5. Keep it simple and clean
|
328 |
+
6. No random characters or mixed languages
|
|
|
|
|
|
|
329 |
|
330 |
EXAMPLES:
|
331 |
- "hydropex" -> "hydropex"
|
332 |
- "respira aid plus" -> "respira aid plus"
|
333 |
+
- "option one" -> "1"
|
|
|
|
|
334 |
- "main menu" -> "main"
|
335 |
- "salam" -> "salam"
|
336 |
- "search products" -> "search products"
|
337 |
+
- Unclear audio -> "unclear audio"
|
338 |
"""
|
339 |
|
340 |
# First attempt with comprehensive system prompt
|
|
|
441 |
transcribed_text = transcript.text.strip()
|
442 |
logger.info(f"[Transcribe] Third attempt (mixed) transcribed: '{transcribed_text}'")
|
443 |
|
444 |
+
# Final check for empty transcription or unclear audio
|
445 |
if not transcribed_text or len(transcribed_text.strip()) < 2:
|
446 |
logger.warning(f"[Transcribe] Very short or empty transcription: '{transcribed_text}'")
|
447 |
+
return "unclear audio"
|
448 |
+
|
449 |
+
# Check for gibberish or mixed characters
|
450 |
+
if len(transcribed_text) > 10 and not re.search(r'[a-zA-Z\u0600-\u06FF]', transcribed_text):
|
451 |
+
logger.warning(f"[Transcribe] Gibberish detected: '{transcribed_text}'")
|
452 |
+
return "unclear audio"
|
453 |
+
|
454 |
+
# Check for too many special characters
|
455 |
+
special_char_ratio = len(re.findall(r'[^\w\s]', transcribed_text)) / len(transcribed_text)
|
456 |
+
if special_char_ratio > 0.3:
|
457 |
+
logger.warning(f"[Transcribe] Too many special characters: '{transcribed_text}'")
|
458 |
+
return "unclear audio"
|
459 |
|
460 |
return transcribed_text
|
461 |
|
|
|
2577 |
if reply_language == 'ur':
|
2578 |
try:
|
2579 |
# Get all product and category names
|
2580 |
+
product_names = [str(p.get('Product Name', '')) for p in all_products if p.get('Product Name')]
|
2581 |
+
category_names = list(set([str(p.get('Category', '')) for p in all_products if p.get('Category')]))
|
2582 |
translated_response = GoogleTranslator(source='auto', target='ur').translate(ai_response)
|
2583 |
# Restore English terms
|
2584 |
translated_response = restore_english_terms(translated_response, ai_response, product_names, category_names)
|
|
|
3409 |
except:
|
3410 |
pass
|
3411 |
|
3412 |
+
# Handle empty, failed, or unclear transcription
|
3413 |
+
if not transcribed_text or transcribed_text.strip() == "" or transcribed_text.lower() == "unclear audio":
|
3414 |
+
logger.warning(f"[Voice] Empty or unclear transcription for {from_number}: '{transcribed_text}'")
|
3415 |
send_whatsjet_message(from_number,
|
3416 |
"🎤 *Voice Message Issue*\n\n"
|
3417 |
+
"I couldn't understand your voice message clearly. This can happen due to:\n"
|
3418 |
"• Very short voice note\n"
|
3419 |
"• Background noise\n"
|
3420 |
"• Microphone too far away\n"
|
3421 |
+
"• Audio quality issues\n"
|
3422 |
+
"• Speaking too fast\n\n"
|
3423 |
"💡 *Tips for better voice notes:*\n"
|
3424 |
"• Speak clearly and slowly\n"
|
3425 |
"• Keep phone close to mouth\n"
|
3426 |
"• Record in quiet environment\n"
|
3427 |
+
"• Make voice note at least 2-3 seconds\n"
|
3428 |
+
"• Speak in English or Urdu only\n\n"
|
3429 |
"💬 *You can also:*\n"
|
3430 |
"• Send a text message\n"
|
3431 |
"• Type 'main' to see menu options\n"
|