seawolf2357 commited on
Commit
62f3257
·
verified ·
1 Parent(s): f6a65af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -129
app.py CHANGED
@@ -1243,37 +1243,29 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1243
  system_prompt: str) -> Dict[str, str]:
1244
  """Process text chat using GPT-4o-mini model"""
1245
  try:
1246
- # Prepare system message
1247
- base_instructions = system_prompt or "You are a helpful assistant."
1248
-
1249
- # Add strong language instructions if language is selected
1250
  if target_language:
1251
  language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
1252
 
1253
- # Language-specific instructions
1254
  if target_language == "en":
1255
- lang_specific = "\nYou MUST respond in English ONLY. Never use Korean or any other language."
 
1256
  elif target_language == "ja":
1257
- lang_specific = "\n日本語でのみ応答してください。韓国語や他の言語は使用しないでください。"
 
1258
  elif target_language == "zh":
1259
- lang_specific = "\n只能用中文回答。不要使用韩语或其他任何语言。"
 
1260
  elif target_language == "es":
1261
- lang_specific = "\nDebe responder SOLO en español. Nunca use coreano u otros idiomas."
1262
- elif target_language == "fr":
1263
- lang_specific = "\nVous devez répondre UNIQUEMENT en français. N'utilisez jamais le coréen ou d'autres langues."
1264
- elif target_language == "de":
1265
- lang_specific = "\nSie müssen NUR auf Deutsch antworten. Verwenden Sie niemals Koreanisch oder andere Sprachen."
1266
  else:
1267
- lang_specific = f"\nYou MUST respond ONLY in {language_name}. Never use any other language."
1268
-
1269
- translation_instructions = (
1270
- f"\n\nIMPORTANT: Your response language is set to {language_name} ({target_language})."
1271
- f"{lang_specific}"
1272
- f"\nEven if the user writes in Korean or another language, you must ALWAYS respond in {language_name}."
1273
- f"\nThis is a strict requirement. Output language: {language_name} ONLY."
1274
- )
1275
-
1276
- base_instructions = base_instructions + translation_instructions
1277
 
1278
  messages = [
1279
  {"role": "system", "content": base_instructions}
@@ -1296,14 +1288,21 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1296
  for i, result in enumerate(search_results[:5], 1):
1297
  search_context += f"{i}. {result['title']}\n{result['description']}\n\n"
1298
 
 
 
 
 
 
 
1299
  messages.append({
1300
  "role": "system",
1301
- "content": f"다음 검색 결과를 참고하여 답변하세요:\n\n{search_context}"
1302
  })
1303
 
1304
- messages.append({"role": "user", "content": message})
 
1305
 
1306
- # Call GPT-4o-mini with strong language enforcement
1307
  response = await client.chat.completions.create(
1308
  model="gpt-4o-mini",
1309
  messages=messages,
@@ -1313,7 +1312,21 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1313
 
1314
  response_text = response.choices[0].message.content
1315
 
1316
- # Debug logging
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1317
  print(f"[TEXT CHAT] Target language: {target_language}")
1318
  print(f"[TEXT CHAT] Response preview: {response_text[:100]}...")
1319
 
@@ -1455,19 +1468,51 @@ class OpenAIHandler(AsyncStreamHandler):
1455
  # 2. Translate with GPT-4o-mini
1456
  target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1457
 
1458
- # Create very specific translation prompt
1459
- if self.interpretation_language == "en":
1460
- system_prompt = "You are a translator. Translate Korean to English. Output ONLY the English translation, nothing else."
1461
- elif self.interpretation_language == "ja":
1462
- system_prompt = "You are a translator. Translate Korean to Japanese. Output ONLY the Japanese translation (日本語のみ), nothing else."
1463
- elif self.interpretation_language == "zh":
1464
- system_prompt = "You are a translator. Translate Korean to Chinese. Output ONLY the Chinese translation (只输出中文), nothing else."
1465
- elif self.interpretation_language == "es":
1466
- system_prompt = "You are a translator. Translate Korean to Spanish. Output ONLY the Spanish translation (solo español), nothing else."
1467
- else:
1468
- system_prompt = f"You are a translator. Translate Korean to {target_lang_name}. Output ONLY the {target_lang_name} translation, nothing else."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1469
 
1470
  print(f"[INTERPRETATION] Translating to {target_lang_name}...")
 
 
1471
  translation_response = await self.client.chat.completions.create(
1472
  model="gpt-4o-mini",
1473
  messages=[
@@ -1477,26 +1522,33 @@ class OpenAIHandler(AsyncStreamHandler):
1477
  },
1478
  {
1479
  "role": "user",
1480
- "content": user_text
1481
  }
1482
  ],
1483
- temperature=0.1, # Lower temperature for more literal translation
1484
  max_tokens=200
1485
  )
1486
 
1487
  translated_text = translation_response.choices[0].message.content.strip()
 
 
 
 
 
 
 
 
1488
  print(f"[INTERPRETATION] Translated: {translated_text}")
1489
 
1490
  # 3. Generate speech with TTS
1491
- print("[INTERPRETATION] Generating speech...")
1492
 
1493
- # Select appropriate voice for the language
1494
- # Using voices that work better for each language
1495
  voice_map = {
1496
- "en": "nova", # Nova has clear English pronunciation
1497
- "es": "nova", # Nova works well for Spanish
1498
- "fr": "shimmer", # Shimmer works well for French
1499
- "de": "onyx", # Onyx works well for German
1500
  "ja": "nova", # Nova can handle Japanese
1501
  "zh": "nova", # Nova can handle Chinese
1502
  "ko": "nova", # Nova can handle Korean
@@ -1505,13 +1557,29 @@ class OpenAIHandler(AsyncStreamHandler):
1505
 
1506
  print(f"[INTERPRETATION] Using voice: {selected_voice} for language: {self.interpretation_language}")
1507
 
1508
- tts_response = await self.client.audio.speech.create(
1509
- model="tts-1",
1510
- voice=selected_voice,
1511
- input=translated_text,
1512
- response_format="pcm", # PCM format for direct playback
1513
- speed=1.0
1514
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1515
 
1516
  # Convert response to bytes
1517
  audio_bytes = b""
@@ -1607,15 +1675,8 @@ class OpenAIHandler(AsyncStreamHandler):
1607
  print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
1608
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1609
  # Just keep the handler ready to process audio
1610
- # Create an infinite loop to keep the handler alive
1611
- try:
1612
- while True:
1613
- await asyncio.sleep(0.1)
1614
- # Check if we need to process any audio
1615
- if self.is_recording and self.silence_frames > self.silence_threshold:
1616
- await self.process_interpretation()
1617
- except asyncio.CancelledError:
1618
- print("[INTERPRETATION MODE] Handler cancelled")
1619
  return
1620
 
1621
  # Normal mode - connect to Realtime API
@@ -1627,63 +1688,85 @@ class OpenAIHandler(AsyncStreamHandler):
1627
  if self.target_language:
1628
  language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
1629
 
1630
- # Create base translation instruction
1631
- base_translation = f"You MUST respond ONLY in {language_name}."
1632
-
1633
- # Add language-specific instructions with examples
1634
  if self.target_language == "en":
1635
- translation_instructions = f"""
1636
- {base_translation}
1637
 
1638
- CRITICAL RULES:
1639
- 1. EVERY word you say must be in English.
1640
- 2. Do NOT use Korean (한국어) at all.
1641
- 3. If user speaks Korean, understand it but ALWAYS reply in English.
1642
- 4. Example: User says "안녕하세요" You say "Hello! How can I help you today?"
1643
- 5. Your language mode is: ENGLISH ONLY.
 
 
 
 
1644
  """
 
 
 
1645
  elif self.target_language == "ja":
1646
- translation_instructions = f"""
1647
- {base_translation}
1648
 
1649
- 重要なルール:
1650
- 1. 必ず日本語のみで応答してください。
1651
- 2. 韓国語は一切使用しないでください。
1652
- 3. ユーザーが韓国語で話しても、必ず日本語で返答してください。
1653
- 4. 例:ユーザーが「안녕하세요」と言ったら → あなたは「こんにちは!今日はどのようにお手伝いできますか?」と言う
1654
- 5. 言語モード:日本語のみ
 
 
 
 
1655
  """
 
 
1656
  elif self.target_language == "zh":
1657
- translation_instructions = f"""
1658
- {base_translation}
 
 
 
 
 
 
 
1659
 
1660
- 重要规则:
1661
- 1. 必须只用中文回答。
1662
- 2. 绝对不要使用韩语。
1663
- 3. 即使用户说韩语,也必须用中文回复。
1664
- 4. 例如:用户说"안녕하세요" → 你说"你好!我能为您做什么?"
1665
- 5. 语言模式:仅中文
1666
  """
 
 
1667
  elif self.target_language == "es":
1668
- translation_instructions = f"""
1669
- {base_translation}
 
 
 
 
 
 
 
1670
 
1671
- REGLAS CRÍTICAS:
1672
- 1. TODAS tus palabras deben estar en español.
1673
- 2. NO uses coreano en absoluto.
1674
- 3. Si el usuario habla coreano, entiéndelo pero SIEMPRE responde en español.
1675
- 4. Ejemplo: Usuario dice "안녕하세요" → Tú dices "¡Hola! ¿Cómo puedo ayudarte hoy?"
1676
- 5. Modo de idioma: SOLO ESPAÑOL
1677
  """
 
1678
  else:
1679
  translation_instructions = f"""
1680
- {base_translation}
1681
 
1682
  RULES:
1683
- 1. You must ONLY speak in {language_name}.
1684
- 2. NEVER use Korean or any other language.
1685
- 3. Always respond in {language_name} regardless of what language the user speaks.
1686
  """
 
1687
  else:
1688
  translation_instructions = ""
1689
 
@@ -1720,11 +1803,24 @@ RULES:
1720
  "When in doubt, USE web_search. It's better to search and provide accurate information "
1721
  "than to guess or use outdated information."
1722
  )
1723
- instructions = base_instructions + translation_instructions + search_instructions
 
 
 
 
 
 
1724
  else:
1725
- instructions = base_instructions + translation_instructions
 
 
 
 
1726
 
1727
- print(f"[NORMAL MODE] Instructions: {instructions[:200]}...")
 
 
 
1728
 
1729
  async with self.client.beta.realtime.connect(
1730
  model="gpt-4o-mini-realtime-preview-2024-12-17"
@@ -1734,42 +1830,39 @@ RULES:
1734
  "turn_detection": {"type": "server_vad"},
1735
  "instructions": instructions,
1736
  "tools": tools,
1737
- "tool_choice": "auto" if tools else "none"
 
 
 
 
1738
  }
1739
 
1740
  # Use appropriate voice for the language
1741
  if self.target_language:
1742
- # Use voice that works better for each language
 
1743
  voice_map = {
1744
- "en": "nova", # Nova has clear pronunciation
1745
- "es": "nova", # Nova works well for Spanish
1746
  "fr": "shimmer", # Shimmer for French
1747
- "de": "onyx", # Onyx for German
1748
- "ja": "nova", # Nova can handle Japanese
1749
- "zh": "nova", # Nova can handle Chinese
1750
- "ko": "nova", # Nova can handle Korean
1751
  }
1752
  session_update["voice"] = voice_map.get(self.target_language, "nova")
1753
 
1754
- # Force output language settings
1755
  session_update["modalities"] = ["text", "audio"]
1756
- session_update["output_audio_format"] = "pcm16"
1757
 
1758
- # Add extra language enforcement in system message
1759
- if self.target_language == "en":
1760
- extra_instruction = "\n\nREMINDER: Speak in English only. 英語のみで話してください。"
1761
- elif self.target_language == "ja":
1762
- extra_instruction = "\n\nREMINDER: 日本語のみで話してください。Speak in Japanese only."
1763
- elif self.target_language == "zh":
1764
- extra_instruction = "\n\nREMINDER: 只说中文。Speak in Chinese only."
1765
- else:
1766
- extra_instruction = ""
1767
 
1768
- session_update["instructions"] = instructions + extra_instruction
 
 
1769
 
1770
- print(f"[TRANSLATION MODE] Target language: {self.target_language}")
1771
- print(f"[TRANSLATION MODE] Voice: {session_update['voice']}")
1772
- print(f"[TRANSLATION MODE] Instructions preview: {session_update['instructions'][:200]}...")
1773
 
1774
  await conn.session.update(session=session_update)
1775
  self.connection = conn
 
1243
  system_prompt: str) -> Dict[str, str]:
1244
  """Process text chat using GPT-4o-mini model"""
1245
  try:
1246
+ # If target language is set, override system prompt completely
 
 
 
1247
  if target_language:
1248
  language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
1249
 
1250
+ # Create system prompt in target language
1251
  if target_language == "en":
1252
+ base_instructions = f"You are a helpful assistant. You speak ONLY English. Never use Korean or any other language. {system_prompt}"
1253
+ user_prefix = "Please respond in English: "
1254
  elif target_language == "ja":
1255
+ base_instructions = f"あなたは親切なアシスタントです。日本語のみを話します。韓国語や他の言語は絶対に使用しません。{system_prompt}"
1256
+ user_prefix = "日本語で答えてください: "
1257
  elif target_language == "zh":
1258
+ base_instructions = f"你是一个乐于助人的助手。你只说中文。绝不使用韩语或其他语言。{system_prompt}"
1259
+ user_prefix = "请用中文回答: "
1260
  elif target_language == "es":
1261
+ base_instructions = f"Eres un asistente útil. Solo hablas español. Nunca uses coreano u otros idiomas. {system_prompt}"
1262
+ user_prefix = "Por favor responde en español: "
 
 
 
1263
  else:
1264
+ base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}. {system_prompt}"
1265
+ user_prefix = f"Please respond in {language_name}: "
1266
+ else:
1267
+ base_instructions = system_prompt or "You are a helpful assistant."
1268
+ user_prefix = ""
 
 
 
 
 
1269
 
1270
  messages = [
1271
  {"role": "system", "content": base_instructions}
 
1288
  for i, result in enumerate(search_results[:5], 1):
1289
  search_context += f"{i}. {result['title']}\n{result['description']}\n\n"
1290
 
1291
+ # Add search context in target language if set
1292
+ if target_language:
1293
+ search_instruction = f"Use this search information but respond in {SUPPORTED_LANGUAGES.get(target_language, target_language)} only: "
1294
+ else:
1295
+ search_instruction = "다음 웹 검색 결과를 참고하여 답변하세요: "
1296
+
1297
  messages.append({
1298
  "role": "system",
1299
+ "content": search_instruction + "\n\n" + search_context
1300
  })
1301
 
1302
+ # Add user message with language prefix
1303
+ messages.append({"role": "user", "content": user_prefix + message})
1304
 
1305
+ # Call GPT-4o-mini
1306
  response = await client.chat.completions.create(
1307
  model="gpt-4o-mini",
1308
  messages=messages,
 
1312
 
1313
  response_text = response.choices[0].message.content
1314
 
1315
+ # Final check - remove any Korean if target language is not Korean
1316
+ if target_language and target_language != "ko":
1317
+ import re
1318
+ if re.search(r'[가-힣]', response_text):
1319
+ print(f"[TEXT CHAT] WARNING: Korean detected in response for {target_language}")
1320
+ # Try again with stronger prompt
1321
+ messages[-1] = {"role": "user", "content": f"ONLY {SUPPORTED_LANGUAGES.get(target_language, target_language)}, NO KOREAN: {message}"}
1322
+ retry_response = await client.chat.completions.create(
1323
+ model="gpt-4o-mini",
1324
+ messages=messages,
1325
+ temperature=0.3,
1326
+ max_tokens=2000
1327
+ )
1328
+ response_text = retry_response.choices[0].message.content
1329
+
1330
  print(f"[TEXT CHAT] Target language: {target_language}")
1331
  print(f"[TEXT CHAT] Response preview: {response_text[:100]}...")
1332
 
 
1468
  # 2. Translate with GPT-4o-mini
1469
  target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1470
 
1471
+ # Create very explicit translation examples
1472
+ translation_examples = {
1473
+ "en": {
1474
+ "안녕하세요": "Hello",
1475
+ "감사합니다": "Thank you",
1476
+ "오늘 날씨가 좋네요": "The weather is nice today"
1477
+ },
1478
+ "ja": {
1479
+ "안녕하세요": "こんにちは",
1480
+ "감사합니다": "ありがとうございます",
1481
+ "오늘 날씨가 좋네요": "今日はいい天気ですね"
1482
+ },
1483
+ "zh": {
1484
+ "안녕하세요": "你好",
1485
+ "감사합니다": "谢谢",
1486
+ "오늘 날씨가 좋네요": "今天天气很好"
1487
+ },
1488
+ "es": {
1489
+ "안녕하세요": "Hola",
1490
+ "감사합니다": "Gracias",
1491
+ "오늘 날씨가 좋네요": "El clima está agradable hoy"
1492
+ }
1493
+ }
1494
+
1495
+ examples = translation_examples.get(self.interpretation_language, translation_examples["en"])
1496
+ examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
1497
+
1498
+ # Ultra-specific prompt
1499
+ system_prompt = f"""You are a Korean to {target_lang_name} translator.
1500
+
1501
+ STRICT RULES:
1502
+ 1. Output ONLY the {target_lang_name} translation
1503
+ 2. Do NOT output Korean
1504
+ 3. Do NOT add explanations
1505
+ 4. Do NOT answer questions
1506
+ 5. Just translate
1507
+
1508
+ Examples:
1509
+ {examples_text}
1510
+
1511
+ Now translate the Korean text to {target_lang_name}. Output ONLY the translation in {target_lang_name}:"""
1512
 
1513
  print(f"[INTERPRETATION] Translating to {target_lang_name}...")
1514
+ print(f"[INTERPRETATION] System prompt: {system_prompt}")
1515
+
1516
  translation_response = await self.client.chat.completions.create(
1517
  model="gpt-4o-mini",
1518
  messages=[
 
1522
  },
1523
  {
1524
  "role": "user",
1525
+ "content": f"Translate this Korean to {target_lang_name}: {user_text}"
1526
  }
1527
  ],
1528
+ temperature=0.1, # Very low temperature
1529
  max_tokens=200
1530
  )
1531
 
1532
  translated_text = translation_response.choices[0].message.content.strip()
1533
+
1534
+ # Remove any Korean characters if they accidentally appear
1535
+ import re
1536
+ if re.search(r'[가-힣]', translated_text):
1537
+ print(f"[INTERPRETATION] WARNING: Korean characters detected in translation: {translated_text}")
1538
+ # Try to extract only non-Korean parts
1539
+ translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
1540
+
1541
  print(f"[INTERPRETATION] Translated: {translated_text}")
1542
 
1543
  # 3. Generate speech with TTS
1544
+ print(f"[INTERPRETATION] Generating speech for text: {translated_text}")
1545
 
1546
+ # Select appropriate voice and ensure it speaks the target language
 
1547
  voice_map = {
1548
+ "en": "alloy", # Alloy is native English speaker
1549
+ "es": "nova", # Nova handles Spanish well
1550
+ "fr": "shimmer", # Shimmer handles French well
1551
+ "de": "echo", # Echo handles German well
1552
  "ja": "nova", # Nova can handle Japanese
1553
  "zh": "nova", # Nova can handle Chinese
1554
  "ko": "nova", # Nova can handle Korean
 
1557
 
1558
  print(f"[INTERPRETATION] Using voice: {selected_voice} for language: {self.interpretation_language}")
1559
 
1560
+ # For some languages, we might need to add pronunciation hints
1561
+ if self.interpretation_language == "en" and re.search(r'[가-힣]', translated_text):
1562
+ print("[INTERPRETATION] ERROR: Korean characters in English translation!")
1563
+ translated_text = "Translation error occurred"
1564
+
1565
+ try:
1566
+ tts_response = await self.client.audio.speech.create(
1567
+ model="tts-1",
1568
+ voice=selected_voice,
1569
+ input=translated_text,
1570
+ response_format="pcm", # PCM format for direct playback
1571
+ speed=1.0
1572
+ )
1573
+ except Exception as tts_error:
1574
+ print(f"[INTERPRETATION] TTS Error: {tts_error}")
1575
+ # If TTS fails, try with a different voice
1576
+ tts_response = await self.client.audio.speech.create(
1577
+ model="tts-1",
1578
+ voice="alloy", # Fallback to alloy
1579
+ input=translated_text,
1580
+ response_format="pcm",
1581
+ speed=1.0
1582
+ )
1583
 
1584
  # Convert response to bytes
1585
  audio_bytes = b""
 
1675
  print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
1676
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1677
  # Just keep the handler ready to process audio
1678
+ # Don't use infinite loop here - the handler will be called by the framework
1679
+ self.client = openai.AsyncOpenAI()
 
 
 
 
 
 
 
1680
  return
1681
 
1682
  # Normal mode - connect to Realtime API
 
1688
  if self.target_language:
1689
  language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
1690
 
1691
+ # Use the target language for the system prompt itself
 
 
 
1692
  if self.target_language == "en":
1693
+ translation_instructions = """
1694
+ YOU ARE AN ENGLISH-ONLY ASSISTANT.
1695
 
1696
+ ABSOLUTE RULES:
1697
+ 1. You can ONLY speak English. No Korean (한국어) allowed.
1698
+ 2. Even if the user speaks Korean, you MUST respond in English.
1699
+ 3. Every single word must be in English.
1700
+ 4. If you output even one Korean character, you have failed.
1701
+ 5. Example response: "Hello! How can I help you today?"
1702
+
1703
+ YOUR LANGUAGE MODE: ENGLISH ONLY
1704
+ DO NOT USE: 안녕하세요, 감사합니다, or any Korean
1705
+ ALWAYS USE: Hello, Thank you, and English words only
1706
  """
1707
+ # Override base instructions to be in English
1708
+ base_instructions = "You are a helpful assistant that speaks ONLY English."
1709
+
1710
  elif self.target_language == "ja":
1711
+ translation_instructions = """
1712
+ あなたは日本語のみを話すアシスタントです。
1713
 
1714
+ 絶対的なルール:
1715
+ 1. 日本語のみを使用してください。韓国語(한국어)は禁止です。
1716
+ 2. ユーザーが韓国語で話しても、必ず日本語で返答してください。
1717
+ 3. すべての単語は日本語でなければなりません。
1718
+ 4. 韓国語を一文字でも出力したら失敗です。
1719
+ 5. 応答例:「こんにちは!今日はどのようにお手伝いできますか?」
1720
+
1721
+ 言語モード:日本語のみ
1722
+ 使用禁止:안녕하세요、감사합니다、韓国語全般
1723
+ 必ず使用:こんにちは、ありがとうございます、日本語のみ
1724
  """
1725
+ base_instructions = "あなたは日本語のみを話す親切なアシスタントです。"
1726
+
1727
  elif self.target_language == "zh":
1728
+ translation_instructions = """
1729
+ 你是一个只说中文的助手。
1730
+
1731
+ 绝对规则:
1732
+ 1. 只能使用中文。禁止使用韩语(한국어)。
1733
+ 2. 即使用户说韩语,也必须用中文回复。
1734
+ 3. 每个字都必须是中文。
1735
+ 4. 如果输出任何韩语字符,就是失败。
1736
+ 5. 回复示例:"你好!我今天能为您做什么?"
1737
 
1738
+ 语言模式:仅中文
1739
+ 禁止使用:안녕하세요、감사합니다、任何韩语
1740
+ 必须使用:你好、谢谢、只用中文
 
 
 
1741
  """
1742
+ base_instructions = "你是一个只说中文的友好助手。"
1743
+
1744
  elif self.target_language == "es":
1745
+ translation_instructions = """
1746
+ ERES UN ASISTENTE QUE SOLO HABLA ESPAÑOL.
1747
+
1748
+ REGLAS ABSOLUTAS:
1749
+ 1. Solo puedes hablar español. No se permite coreano (한국어).
1750
+ 2. Incluso si el usuario habla coreano, DEBES responder en español.
1751
+ 3. Cada palabra debe estar en español.
1752
+ 4. Si produces aunque sea un carácter coreano, has fallado.
1753
+ 5. Respuesta ejemplo: "¡Hola! ¿Cómo puedo ayudarte hoy?"
1754
 
1755
+ MODO DE IDIOMA: SOLO ESPAÑOL
1756
+ NO USAR: 안녕하세요, 감사합니다, o cualquier coreano
1757
+ SIEMPRE USAR: Hola, Gracias, y solo palabras en español
 
 
 
1758
  """
1759
+ base_instructions = "Eres un asistente útil que habla SOLO español."
1760
  else:
1761
  translation_instructions = f"""
1762
+ YOU MUST ONLY SPEAK {language_name.upper()}.
1763
 
1764
  RULES:
1765
+ 1. Output only in {language_name}
1766
+ 2. Never use Korean
1767
+ 3. Always respond in {language_name}
1768
  """
1769
+ base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}."
1770
  else:
1771
  translation_instructions = ""
1772
 
 
1803
  "When in doubt, USE web_search. It's better to search and provide accurate information "
1804
  "than to guess or use outdated information."
1805
  )
1806
+
1807
+ # Combine all instructions
1808
+ if translation_instructions:
1809
+ # Translation instructions already include base_instructions
1810
+ instructions = translation_instructions + search_instructions
1811
+ else:
1812
+ instructions = base_instructions + search_instructions
1813
  else:
1814
+ # No web search
1815
+ if translation_instructions:
1816
+ instructions = translation_instructions
1817
+ else:
1818
+ instructions = base_instructions
1819
 
1820
+ print(f"[NORMAL MODE] Base instructions: {base_instructions[:100]}...")
1821
+ print(f"[NORMAL MODE] Translation instructions: {translation_instructions[:200] if translation_instructions else 'None'}...")
1822
+ print(f"[NORMAL MODE] Combined instructions length: {len(instructions)}")
1823
+ print(f"[NORMAL MODE] Target language: {self.target_language}")
1824
 
1825
  async with self.client.beta.realtime.connect(
1826
  model="gpt-4o-mini-realtime-preview-2024-12-17"
 
1830
  "turn_detection": {"type": "server_vad"},
1831
  "instructions": instructions,
1832
  "tools": tools,
1833
+ "tool_choice": "auto" if tools else "none",
1834
+ "temperature": 0.7,
1835
+ "max_response_output_tokens": 4096,
1836
+ "modalities": ["text", "audio"],
1837
+ "voice": "alloy" # Default voice
1838
  }
1839
 
1840
  # Use appropriate voice for the language
1841
  if self.target_language:
1842
+ # Force language through multiple mechanisms
1843
+ # 1. Use voice that's known to work well with the language
1844
  voice_map = {
1845
+ "en": "nova", # Nova has clearer English
1846
+ "es": "nova", # Nova works for Spanish
1847
  "fr": "shimmer", # Shimmer for French
1848
+ "de": "echo", # Echo for German
1849
+ "ja": "alloy", # Alloy can do Japanese
1850
+ "zh": "alloy", # Alloy can do Chinese
1851
+ "ko": "nova", # Nova for Korean
1852
  }
1853
  session_update["voice"] = voice_map.get(self.target_language, "nova")
1854
 
1855
+ # 2. Add language to modalities (experimental)
1856
  session_update["modalities"] = ["text", "audio"]
 
1857
 
1858
+ # 3. Set output format
1859
+ session_update["output_audio_format"] = "pcm16"
 
 
 
 
 
 
 
1860
 
1861
+ # 4. Add language hint to the system (if supported by API)
1862
+ if self.target_language in ["en", "es", "fr", "de", "ja", "zh"]:
1863
+ session_update["language"] = self.target_language # Try setting language directly
1864
 
1865
+ print(f"[TRANSLATION MODE] Session update: {json.dumps(session_update, indent=2)}")
 
 
1866
 
1867
  await conn.session.update(session=session_update)
1868
  self.connection = conn