seawolf2357 commited on
Commit
9c97703
·
verified ·
1 Parent(s): 64bea29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -114
app.py CHANGED
@@ -1465,152 +1465,155 @@ class OpenAIHandler(AsyncStreamHandler):
1465
  if not user_text:
1466
  return
1467
 
1468
- # 2. Translate with GPT-4o-mini
1469
  target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1470
 
1471
- # Create very explicit translation examples
1472
- translation_examples = {
1473
- "en": {
1474
- "안녕하세요": "Hello",
1475
- "감사합니다": "Thank you",
1476
- "오늘 날씨가 좋네요": "The weather is nice today"
1477
- },
1478
- "ja": {
1479
- "안녕하세요": "こんにちは",
1480
- "감사합니다": "ありがとうございます",
1481
- "오늘 날씨가 좋네요": "今日はいい天気ですね"
1482
- },
1483
- "zh": {
1484
- "안녕하세요": "你好",
1485
- "감사합니다": "谢谢",
1486
- "오늘 날씨가 좋네요": "今天天气很好"
1487
- },
1488
- "es": {
1489
- "안녕하세요": "Hola",
1490
- "감사합니다": "Gracias",
1491
- "오늘 날씨가 좋네요": "El clima está agradable hoy"
1492
- }
1493
- }
1494
-
1495
- examples = translation_examples.get(self.interpretation_language, translation_examples["en"])
1496
- examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
1497
-
1498
- # Ultra-specific prompt
1499
- system_prompt = f"""You are a Korean to {target_lang_name} translator.
1500
-
1501
- STRICT RULES:
1502
- 1. Output ONLY the {target_lang_name} translation
1503
- 2. Do NOT output Korean
1504
- 3. Do NOT add explanations
1505
- 4. Do NOT answer questions
1506
- 5. Just translate
1507
-
1508
- Examples:
1509
- {examples_text}
1510
-
1511
- Now translate the Korean text to {target_lang_name}. Output ONLY the translation in {target_lang_name}:"""
1512
 
1513
- print(f"[INTERPRETATION] Translating to {target_lang_name}...")
1514
- print(f"[INTERPRETATION] System prompt: {system_prompt}")
1515
 
 
1516
  translation_response = await self.client.chat.completions.create(
1517
  model="gpt-4o-mini",
1518
  messages=[
1519
- {
1520
- "role": "system",
1521
- "content": system_prompt
1522
- },
1523
  {
1524
  "role": "user",
1525
- "content": f"Translate this Korean to {target_lang_name}: {user_text}"
1526
  }
1527
  ],
1528
- temperature=0.1, # Very low temperature
1529
  max_tokens=200
1530
  )
1531
 
1532
  translated_text = translation_response.choices[0].message.content.strip()
1533
 
1534
- # Remove any Korean characters if they accidentally appear
1535
  import re
1536
- if re.search(r'[가-힣]', translated_text):
1537
- print(f"[INTERPRETATION] WARNING: Korean characters detected in translation: {translated_text}")
1538
- # Try to extract only non-Korean parts
1539
- translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1540
 
1541
- print(f"[INTERPRETATION] Translated: {translated_text}")
1542
 
1543
  # 3. Generate speech with TTS
1544
- print(f"[INTERPRETATION] Generating speech for text: {translated_text}")
1545
-
1546
- # Select appropriate voice and ensure it speaks the target language
1547
  voice_map = {
1548
- "en": "alloy", # Alloy is native English speaker
1549
  "es": "nova", # Nova handles Spanish well
1550
- "fr": "shimmer", # Shimmer handles French well
1551
- "de": "echo", # Echo handles German well
1552
- "ja": "nova", # Nova can handle Japanese
1553
- "zh": "nova", # Nova can handle Chinese
1554
- "ko": "nova", # Nova can handle Korean
 
 
 
1555
  }
1556
  selected_voice = voice_map.get(self.interpretation_language, "nova")
1557
 
1558
- print(f"[INTERPRETATION] Using voice: {selected_voice} for language: {self.interpretation_language}")
1559
-
1560
- # For some languages, we might need to add pronunciation hints
1561
- if self.interpretation_language == "en" and re.search(r'[가-힣]', translated_text):
1562
- print("[INTERPRETATION] ERROR: Korean characters in English translation!")
1563
- translated_text = "Translation error occurred"
1564
 
1565
  try:
1566
  tts_response = await self.client.audio.speech.create(
1567
  model="tts-1",
1568
  voice=selected_voice,
1569
  input=translated_text,
1570
- response_format="pcm", # PCM format for direct playback
1571
  speed=1.0
1572
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1573
  except Exception as tts_error:
1574
  print(f"[INTERPRETATION] TTS Error: {tts_error}")
1575
- # If TTS fails, try with a different voice
1576
- tts_response = await self.client.audio.speech.create(
1577
- model="tts-1",
1578
- voice="alloy", # Fallback to alloy
1579
- input=translated_text,
1580
- response_format="pcm",
1581
- speed=1.0
1582
- )
1583
-
1584
- # Convert response to bytes
1585
- audio_bytes = b""
1586
- async for chunk in tts_response.iter_bytes(1024):
1587
- audio_bytes += chunk
1588
-
1589
- # Convert PCM to numpy array (TTS outputs at 24kHz)
1590
- audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
1591
-
1592
- # Send audio in chunks
1593
- if len(audio_array) > 0:
1594
- # Split audio into chunks and send
1595
- chunk_size = 480 # Match our frame size
1596
- for i in range(0, len(audio_array), chunk_size):
1597
- chunk = audio_array[i:i + chunk_size]
1598
- if len(chunk) < chunk_size:
1599
- # Pad the last chunk if necessary
1600
- chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
1601
-
1602
- await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
1603
-
1604
- # Send transcript event
1605
- output_data = {
1606
- "event": type('Event', (), {
1607
- 'transcript': f"{user_text} → {translated_text}"
1608
- })(),
1609
- "language": target_lang_name,
1610
- "mode": "interpretation"
1611
- }
1612
- await self.output_queue.put(AdditionalOutputs(output_data))
1613
-
1614
  except Exception as e:
1615
  print(f"[INTERPRETATION] Error: {e}")
1616
  import traceback
@@ -1630,6 +1633,8 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
1630
  self.audio_buffer = []
1631
  self.is_recording = False
1632
  self.silence_frames = 0
 
 
1633
 
1634
  def get_translation_instructions(self):
1635
  """Get instructions for translation based on target language"""
@@ -1672,7 +1677,7 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
1672
 
1673
  # If in interpretation mode, don't connect to Realtime API
1674
  if self.interpretation_mode:
1675
- print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
1676
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1677
  # Just keep the handler ready to process audio
1678
  # Don't use infinite loop here - the handler will be called by the framework
@@ -1823,7 +1828,7 @@ RULES:
1823
  print(f"[NORMAL MODE] Target language: {self.target_language}")
1824
 
1825
  async with self.client.beta.realtime.connect(
1826
- model="gpt-4o-mini-realtime-preview-2024-12-17"
1827
  ) as conn:
1828
  # Update session with tools
1829
  session_update = {
@@ -2080,7 +2085,7 @@ async def custom_offer(request: Request):
2080
 
2081
  @app.post("/chat/text")
2082
  async def chat_text(request: Request):
2083
- """Handle text chat messages using GPT-4o-mini"""
2084
  try:
2085
  body = await request.json()
2086
  message = body.get("message", "")
 
1465
  if not user_text:
1466
  return
1467
 
1468
+ # 2. Translate with GPT-4o-mini - FIXED VERSION
1469
  target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1470
 
1471
+ # More direct translation approach
1472
+ if self.interpretation_language == "en":
1473
+ translation_prompt = f"Translate this Korean text to English. Output ONLY the English translation, nothing else: {user_text}"
1474
+ elif self.interpretation_language == "ja":
1475
+ translation_prompt = f"韓国語を日本語に翻訳してください。日本語の翻訳のみを出力してください: {user_text}"
1476
+ elif self.interpretation_language == "zh":
1477
+ translation_prompt = f"将韩语翻译成中文。只输出中文翻译: {user_text}"
1478
+ elif self.interpretation_language == "es":
1479
+ translation_prompt = f"Traduce este texto coreano al español. Solo muestra la traducción en español: {user_text}"
1480
+ elif self.interpretation_language == "fr":
1481
+ translation_prompt = f"Traduisez ce texte coréen en français. Affichez uniquement la traduction française: {user_text}"
1482
+ elif self.interpretation_language == "de":
1483
+ translation_prompt = f"Übersetzen Sie diesen koreanischen Text ins Deutsche. Geben Sie nur die deutsche Übersetzung aus: {user_text}"
1484
+ else:
1485
+ translation_prompt = f"Translate Korean to {target_lang_name}. Output only {target_lang_name}: {user_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1486
 
1487
+ print(f"[INTERPRETATION] Translation prompt: {translation_prompt}")
 
1488
 
1489
+ # Use a single user message approach for better results
1490
  translation_response = await self.client.chat.completions.create(
1491
  model="gpt-4o-mini",
1492
  messages=[
 
 
 
 
1493
  {
1494
  "role": "user",
1495
+ "content": translation_prompt
1496
  }
1497
  ],
1498
+ temperature=0.0, # Set to 0 for most deterministic output
1499
  max_tokens=200
1500
  )
1501
 
1502
  translated_text = translation_response.choices[0].message.content.strip()
1503
 
1504
+ # Validation: Check if Korean characters are present in non-Korean translations
1505
  import re
1506
+ if self.interpretation_language != "ko" and re.search(r'[가-힣]', translated_text):
1507
+ print(f"[INTERPRETATION] WARNING: Korean detected in {self.interpretation_language} translation")
1508
+
1509
+ # Try again with a more forceful prompt
1510
+ force_prompt = {
1511
+ "en": f"English only: {user_text}",
1512
+ "ja": f"日本語のみ: {user_text}",
1513
+ "zh": f"仅中文: {user_text}",
1514
+ "es": f"Solo español: {user_text}",
1515
+ "fr": f"Français seulement: {user_text}",
1516
+ "de": f"Nur Deutsch: {user_text}"
1517
+ }.get(self.interpretation_language, f"{target_lang_name} only: {user_text}")
1518
+
1519
+ retry_response = await self.client.chat.completions.create(
1520
+ model="gpt-4o-mini",
1521
+ messages=[{"role": "user", "content": force_prompt}],
1522
+ temperature=0.0,
1523
+ max_tokens=200
1524
+ )
1525
+
1526
+ new_translation = retry_response.choices[0].message.content.strip()
1527
+
1528
+ # If still has Korean, extract non-Korean parts
1529
+ if re.search(r'[가-힣]', new_translation):
1530
+ # Remove all Korean characters and clean up
1531
+ cleaned = re.sub(r'[가-힣]+', ' ', new_translation).strip()
1532
+ cleaned = re.sub(r'\s+', ' ', cleaned) # Remove multiple spaces
1533
+ if cleaned and len(cleaned) > 3: # If we have meaningful content left
1534
+ translated_text = cleaned
1535
+ else:
1536
+ # Fallback to a simple translation
1537
+ translated_text = {
1538
+ "en": "Translation completed",
1539
+ "ja": "翻訳完了",
1540
+ "zh": "翻译完成",
1541
+ "es": "Traducción completada",
1542
+ "fr": "Traduction terminée",
1543
+ "de": "Übersetzung abgeschlossen"
1544
+ }.get(self.interpretation_language, "Translation completed")
1545
+ else:
1546
+ translated_text = new_translation
1547
 
1548
+ print(f"[INTERPRETATION] Final translated text: {translated_text}")
1549
 
1550
  # 3. Generate speech with TTS
1551
+ # Select voice optimized for the target language
 
 
1552
  voice_map = {
1553
+ "en": "nova", # Nova has clear English pronunciation
1554
  "es": "nova", # Nova handles Spanish well
1555
+ "fr": "shimmer", # Shimmer for French
1556
+ "de": "echo", # Echo for German
1557
+ "ja": "alloy", # Alloy can handle Japanese
1558
+ "zh": "alloy", # Alloy can handle Chinese
1559
+ "ko": "nova", # Nova for Korean
1560
+ "it": "nova", # Nova for Italian
1561
+ "pt": "shimmer", # Shimmer for Portuguese
1562
+ "ru": "onyx", # Onyx for Russian
1563
  }
1564
  selected_voice = voice_map.get(self.interpretation_language, "nova")
1565
 
1566
+ print(f"[INTERPRETATION] Generating TTS with voice: {selected_voice}")
 
 
 
 
 
1567
 
1568
  try:
1569
  tts_response = await self.client.audio.speech.create(
1570
  model="tts-1",
1571
  voice=selected_voice,
1572
  input=translated_text,
1573
+ response_format="pcm",
1574
  speed=1.0
1575
  )
1576
+
1577
+ # Convert response to bytes
1578
+ audio_bytes = b""
1579
+ async for chunk in tts_response.iter_bytes(1024):
1580
+ audio_bytes += chunk
1581
+
1582
+ # Convert PCM to numpy array
1583
+ audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
1584
+
1585
+ # Send audio in chunks
1586
+ if len(audio_array) > 0:
1587
+ chunk_size = 480
1588
+ for i in range(0, len(audio_array), chunk_size):
1589
+ chunk = audio_array[i:i + chunk_size]
1590
+ if len(chunk) < chunk_size:
1591
+ chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
1592
+
1593
+ await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
1594
+
1595
+ # Send transcript event - show both original and translation
1596
+ output_data = {
1597
+ "event": type('Event', (), {
1598
+ 'transcript': f"{user_text} → {translated_text}"
1599
+ })(),
1600
+ "language": target_lang_name,
1601
+ "mode": "interpretation"
1602
+ }
1603
+ await self.output_queue.put(AdditionalOutputs(output_data))
1604
+
1605
  except Exception as tts_error:
1606
  print(f"[INTERPRETATION] TTS Error: {tts_error}")
1607
+ # Send error message
1608
+ error_data = {
1609
+ "event": type('Event', (), {
1610
+ 'transcript': f"TTS 오류: {str(tts_error)}"
1611
+ })(),
1612
+ "language": "",
1613
+ "mode": "error"
1614
+ }
1615
+ await self.output_queue.put(AdditionalOutputs(error_data))
1616
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1617
  except Exception as e:
1618
  print(f"[INTERPRETATION] Error: {e}")
1619
  import traceback
 
1633
  self.audio_buffer = []
1634
  self.is_recording = False
1635
  self.silence_frames = 0
1636
+
1637
+
1638
 
1639
  def get_translation_instructions(self):
1640
  """Get instructions for translation based on target language"""
 
1677
 
1678
  # If in interpretation mode, don't connect to Realtime API
1679
  if self.interpretation_mode:
1680
+ print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4.1-mini + TTS")
1681
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1682
  # Just keep the handler ready to process audio
1683
  # Don't use infinite loop here - the handler will be called by the framework
 
1828
  print(f"[NORMAL MODE] Target language: {self.target_language}")
1829
 
1830
  async with self.client.beta.realtime.connect(
1831
+ model="gpt-4.0-mini-realtime-preview-2024-12-17"
1832
  ) as conn:
1833
  # Update session with tools
1834
  session_update = {
 
2085
 
2086
  @app.post("/chat/text")
2087
  async def chat_text(request: Request):
2088
+ """Handle text chat messages using GPT-4.1-mini"""
2089
  try:
2090
  body = await request.json()
2091
  message = body.get("message", "")