seawolf2357 commited on
Commit
0232d6f
·
verified ·
1 Parent(s): 0d0e889

Update app-backup3.py

Browse files
Files changed (1) hide show
  1. app-backup3.py +114 -119
app-backup3.py CHANGED
@@ -1465,155 +1465,152 @@ class OpenAIHandler(AsyncStreamHandler):
1465
  if not user_text:
1466
  return
1467
 
1468
- # 2. Translate with GPT-4o-mini - FIXED VERSION
1469
  target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1470
 
1471
- # More direct translation approach
1472
- if self.interpretation_language == "en":
1473
- translation_prompt = f"Translate this Korean text to English. Output ONLY the English translation, nothing else: {user_text}"
1474
- elif self.interpretation_language == "ja":
1475
- translation_prompt = f"韓国語を日本語に翻訳してください。日本語の翻訳のみを出力してください: {user_text}"
1476
- elif self.interpretation_language == "zh":
1477
- translation_prompt = f"将韩语翻译成中文。只输出中文翻译: {user_text}"
1478
- elif self.interpretation_language == "es":
1479
- translation_prompt = f"Traduce este texto coreano al español. Solo muestra la traducción en español: {user_text}"
1480
- elif self.interpretation_language == "fr":
1481
- translation_prompt = f"Traduisez ce texte coréen en français. Affichez uniquement la traduction française: {user_text}"
1482
- elif self.interpretation_language == "de":
1483
- translation_prompt = f"Übersetzen Sie diesen koreanischen Text ins Deutsche. Geben Sie nur die deutsche Übersetzung aus: {user_text}"
1484
- else:
1485
- translation_prompt = f"Translate Korean to {target_lang_name}. Output only {target_lang_name}: {user_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1486
 
1487
- print(f"[INTERPRETATION] Translation prompt: {translation_prompt}")
 
1488
 
1489
- # Use a single user message approach for better results
1490
  translation_response = await self.client.chat.completions.create(
1491
  model="gpt-4o-mini",
1492
  messages=[
 
 
 
 
1493
  {
1494
  "role": "user",
1495
- "content": translation_prompt
1496
  }
1497
  ],
1498
- temperature=0.0, # Set to 0 for most deterministic output
1499
  max_tokens=200
1500
  )
1501
 
1502
  translated_text = translation_response.choices[0].message.content.strip()
1503
 
1504
- # Validation: Check if Korean characters are present in non-Korean translations
1505
  import re
1506
- if self.interpretation_language != "ko" and re.search(r'[가-힣]', translated_text):
1507
- print(f"[INTERPRETATION] WARNING: Korean detected in {self.interpretation_language} translation")
1508
-
1509
- # Try again with a more forceful prompt
1510
- force_prompt = {
1511
- "en": f"English only: {user_text}",
1512
- "ja": f"日本語のみ: {user_text}",
1513
- "zh": f"仅中文: {user_text}",
1514
- "es": f"Solo español: {user_text}",
1515
- "fr": f"Français seulement: {user_text}",
1516
- "de": f"Nur Deutsch: {user_text}"
1517
- }.get(self.interpretation_language, f"{target_lang_name} only: {user_text}")
1518
-
1519
- retry_response = await self.client.chat.completions.create(
1520
- model="gpt-4o-mini",
1521
- messages=[{"role": "user", "content": force_prompt}],
1522
- temperature=0.0,
1523
- max_tokens=200
1524
- )
1525
-
1526
- new_translation = retry_response.choices[0].message.content.strip()
1527
-
1528
- # If still has Korean, extract non-Korean parts
1529
- if re.search(r'[가-힣]', new_translation):
1530
- # Remove all Korean characters and clean up
1531
- cleaned = re.sub(r'[가-힣]+', ' ', new_translation).strip()
1532
- cleaned = re.sub(r'\s+', ' ', cleaned) # Remove multiple spaces
1533
- if cleaned and len(cleaned) > 3: # If we have meaningful content left
1534
- translated_text = cleaned
1535
- else:
1536
- # Fallback to a simple translation
1537
- translated_text = {
1538
- "en": "Translation completed",
1539
- "ja": "翻訳完了",
1540
- "zh": "翻译完成",
1541
- "es": "Traducción completada",
1542
- "fr": "Traduction terminée",
1543
- "de": "Übersetzung abgeschlossen"
1544
- }.get(self.interpretation_language, "Translation completed")
1545
- else:
1546
- translated_text = new_translation
1547
 
1548
- print(f"[INTERPRETATION] Final translated text: {translated_text}")
1549
 
1550
  # 3. Generate speech with TTS
1551
- # Select voice optimized for the target language
 
 
1552
  voice_map = {
1553
- "en": "nova", # Nova has clear English pronunciation
1554
  "es": "nova", # Nova handles Spanish well
1555
- "fr": "shimmer", # Shimmer for French
1556
- "de": "echo", # Echo for German
1557
- "ja": "alloy", # Alloy can handle Japanese
1558
- "zh": "alloy", # Alloy can handle Chinese
1559
- "ko": "nova", # Nova for Korean
1560
- "it": "nova", # Nova for Italian
1561
- "pt": "shimmer", # Shimmer for Portuguese
1562
- "ru": "onyx", # Onyx for Russian
1563
  }
1564
  selected_voice = voice_map.get(self.interpretation_language, "nova")
1565
 
1566
- print(f"[INTERPRETATION] Generating TTS with voice: {selected_voice}")
 
 
 
 
 
1567
 
1568
  try:
1569
  tts_response = await self.client.audio.speech.create(
1570
  model="tts-1",
1571
  voice=selected_voice,
1572
  input=translated_text,
1573
- response_format="pcm",
1574
  speed=1.0
1575
  )
1576
-
1577
- # Convert response to bytes
1578
- audio_bytes = b""
1579
- async for chunk in tts_response.iter_bytes(1024):
1580
- audio_bytes += chunk
1581
-
1582
- # Convert PCM to numpy array
1583
- audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
1584
-
1585
- # Send audio in chunks
1586
- if len(audio_array) > 0:
1587
- chunk_size = 480
1588
- for i in range(0, len(audio_array), chunk_size):
1589
- chunk = audio_array[i:i + chunk_size]
1590
- if len(chunk) < chunk_size:
1591
- chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
1592
-
1593
- await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
1594
-
1595
- # Send transcript event - show both original and translation
1596
- output_data = {
1597
- "event": type('Event', (), {
1598
- 'transcript': f"{user_text} → {translated_text}"
1599
- })(),
1600
- "language": target_lang_name,
1601
- "mode": "interpretation"
1602
- }
1603
- await self.output_queue.put(AdditionalOutputs(output_data))
1604
-
1605
  except Exception as tts_error:
1606
  print(f"[INTERPRETATION] TTS Error: {tts_error}")
1607
- # Send error message
1608
- error_data = {
1609
- "event": type('Event', (), {
1610
- 'transcript': f"TTS 오류: {str(tts_error)}"
1611
- })(),
1612
- "language": "",
1613
- "mode": "error"
1614
- }
1615
- await self.output_queue.put(AdditionalOutputs(error_data))
1616
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1617
  except Exception as e:
1618
  print(f"[INTERPRETATION] Error: {e}")
1619
  import traceback
@@ -1633,8 +1630,6 @@ class OpenAIHandler(AsyncStreamHandler):
1633
  self.audio_buffer = []
1634
  self.is_recording = False
1635
  self.silence_frames = 0
1636
-
1637
-
1638
 
1639
  def get_translation_instructions(self):
1640
  """Get instructions for translation based on target language"""
@@ -1677,7 +1672,7 @@ class OpenAIHandler(AsyncStreamHandler):
1677
 
1678
  # If in interpretation mode, don't connect to Realtime API
1679
  if self.interpretation_mode:
1680
- print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4.1-mini + TTS")
1681
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1682
  # Just keep the handler ready to process audio
1683
  # Don't use infinite loop here - the handler will be called by the framework
@@ -1828,7 +1823,7 @@ RULES:
1828
  print(f"[NORMAL MODE] Target language: {self.target_language}")
1829
 
1830
  async with self.client.beta.realtime.connect(
1831
- model="gpt-4.0-mini-realtime-preview-2024-12-17"
1832
  ) as conn:
1833
  # Update session with tools
1834
  session_update = {
@@ -2085,7 +2080,7 @@ async def custom_offer(request: Request):
2085
 
2086
  @app.post("/chat/text")
2087
  async def chat_text(request: Request):
2088
- """Handle text chat messages using GPT-4.1-mini"""
2089
  try:
2090
  body = await request.json()
2091
  message = body.get("message", "")
 
1465
  if not user_text:
1466
  return
1467
 
1468
+ # 2. Translate with GPT-4o-mini
1469
  target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1470
 
1471
+ # Create very explicit translation examples
1472
+ translation_examples = {
1473
+ "en": {
1474
+ "안녕하세요": "Hello",
1475
+ "감사합니다": "Thank you",
1476
+ "오늘 날씨가 좋네요": "The weather is nice today"
1477
+ },
1478
+ "ja": {
1479
+ "안녕하세요": "こんにちは",
1480
+ "감사합니다": "ありがとうございます",
1481
+ "오늘 날씨가 좋네요": "今日はいい天気ですね"
1482
+ },
1483
+ "zh": {
1484
+ "안녕하세요": "你好",
1485
+ "감사합니다": "谢谢",
1486
+ "오늘 날씨가 좋네요": "今天天气很好"
1487
+ },
1488
+ "es": {
1489
+ "안녕하세요": "Hola",
1490
+ "감사합니다": "Gracias",
1491
+ "오늘 날씨가 좋네요": "El clima está agradable hoy"
1492
+ }
1493
+ }
1494
+
1495
+ examples = translation_examples.get(self.interpretation_language, translation_examples["en"])
1496
+ examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
1497
+
1498
+ # Ultra-specific prompt
1499
+ system_prompt = f"""You are a Korean to {target_lang_name} translator.
1500
+
1501
+ STRICT RULES:
1502
+ 1. Output ONLY the {target_lang_name} translation
1503
+ 2. Do NOT output Korean
1504
+ 3. Do NOT add explanations
1505
+ 4. Do NOT answer questions
1506
+ 5. Just translate
1507
+
1508
+ Examples:
1509
+ {examples_text}
1510
+
1511
+ Now translate the Korean text to {target_lang_name}. Output ONLY the translation in {target_lang_name}:"""
1512
 
1513
+ print(f"[INTERPRETATION] Translating to {target_lang_name}...")
1514
+ print(f"[INTERPRETATION] System prompt: {system_prompt}")
1515
 
 
1516
  translation_response = await self.client.chat.completions.create(
1517
  model="gpt-4o-mini",
1518
  messages=[
1519
+ {
1520
+ "role": "system",
1521
+ "content": system_prompt
1522
+ },
1523
  {
1524
  "role": "user",
1525
+ "content": f"Translate this Korean to {target_lang_name}: {user_text}"
1526
  }
1527
  ],
1528
+ temperature=0.1, # Very low temperature
1529
  max_tokens=200
1530
  )
1531
 
1532
  translated_text = translation_response.choices[0].message.content.strip()
1533
 
1534
+ # Remove any Korean characters if they accidentally appear
1535
  import re
1536
+ if re.search(r'[가-힣]', translated_text):
1537
+ print(f"[INTERPRETATION] WARNING: Korean characters detected in translation: {translated_text}")
1538
+ # Try to extract only non-Korean parts
1539
+ translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1540
 
1541
+ print(f"[INTERPRETATION] Translated: {translated_text}")
1542
 
1543
  # 3. Generate speech with TTS
1544
+ print(f"[INTERPRETATION] Generating speech for text: {translated_text}")
1545
+
1546
+ # Select appropriate voice and ensure it speaks the target language
1547
  voice_map = {
1548
+ "en": "alloy", # Alloy is native English speaker
1549
  "es": "nova", # Nova handles Spanish well
1550
+ "fr": "shimmer", # Shimmer handles French well
1551
+ "de": "echo", # Echo handles German well
1552
+ "ja": "nova", # Nova can handle Japanese
1553
+ "zh": "nova", # Nova can handle Chinese
1554
+ "ko": "nova", # Nova can handle Korean
 
 
 
1555
  }
1556
  selected_voice = voice_map.get(self.interpretation_language, "nova")
1557
 
1558
+ print(f"[INTERPRETATION] Using voice: {selected_voice} for language: {self.interpretation_language}")
1559
+
1560
+ # For some languages, we might need to add pronunciation hints
1561
+ if self.interpretation_language == "en" and re.search(r'[가-힣]', translated_text):
1562
+ print("[INTERPRETATION] ERROR: Korean characters in English translation!")
1563
+ translated_text = "Translation error occurred"
1564
 
1565
  try:
1566
  tts_response = await self.client.audio.speech.create(
1567
  model="tts-1",
1568
  voice=selected_voice,
1569
  input=translated_text,
1570
+ response_format="pcm", # PCM format for direct playback
1571
  speed=1.0
1572
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1573
  except Exception as tts_error:
1574
  print(f"[INTERPRETATION] TTS Error: {tts_error}")
1575
+ # If TTS fails, try with a different voice
1576
+ tts_response = await self.client.audio.speech.create(
1577
+ model="tts-1",
1578
+ voice="alloy", # Fallback to alloy
1579
+ input=translated_text,
1580
+ response_format="pcm",
1581
+ speed=1.0
1582
+ )
1583
+
1584
+ # Convert response to bytes
1585
+ audio_bytes = b""
1586
+ async for chunk in tts_response.iter_bytes(1024):
1587
+ audio_bytes += chunk
1588
+
1589
+ # Convert PCM to numpy array (TTS outputs at 24kHz)
1590
+ audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
1591
+
1592
+ # Send audio in chunks
1593
+ if len(audio_array) > 0:
1594
+ # Split audio into chunks and send
1595
+ chunk_size = 480 # Match our frame size
1596
+ for i in range(0, len(audio_array), chunk_size):
1597
+ chunk = audio_array[i:i + chunk_size]
1598
+ if len(chunk) < chunk_size:
1599
+ # Pad the last chunk if necessary
1600
+ chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
1601
+
1602
+ await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
1603
+
1604
+ # Send transcript event
1605
+ output_data = {
1606
+ "event": type('Event', (), {
1607
+ 'transcript': f"{user_text} → {translated_text}"
1608
+ })(),
1609
+ "language": target_lang_name,
1610
+ "mode": "interpretation"
1611
+ }
1612
+ await self.output_queue.put(AdditionalOutputs(output_data))
1613
+
1614
  except Exception as e:
1615
  print(f"[INTERPRETATION] Error: {e}")
1616
  import traceback
 
1630
  self.audio_buffer = []
1631
  self.is_recording = False
1632
  self.silence_frames = 0
 
 
1633
 
1634
  def get_translation_instructions(self):
1635
  """Get instructions for translation based on target language"""
 
1672
 
1673
  # If in interpretation mode, don't connect to Realtime API
1674
  if self.interpretation_mode:
1675
+ print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
1676
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1677
  # Just keep the handler ready to process audio
1678
  # Don't use infinite loop here - the handler will be called by the framework
 
1823
  print(f"[NORMAL MODE] Target language: {self.target_language}")
1824
 
1825
  async with self.client.beta.realtime.connect(
1826
+ model="gpt-4o-mini-realtime-preview-2024-12-17"
1827
  ) as conn:
1828
  # Update session with tools
1829
  session_update = {
 
2080
 
2081
  @app.post("/chat/text")
2082
  async def chat_text(request: Request):
2083
+ """Handle text chat messages using GPT-4o-mini"""
2084
  try:
2085
  body = await request.json()
2086
  message = body.get("message", "")