seawolf2357 commited on
Commit
ae55ef4
·
verified ·
1 Parent(s): 0232d6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +388 -269
app.py CHANGED
@@ -252,6 +252,46 @@ HTML_CONTENT = """<!DOCTYPE html>
252
  resize: vertical;
253
  min-height: 80px;
254
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  .chat-container {
256
  border-radius: 12px;
257
  background-color: var(--card-bg);
@@ -320,13 +360,66 @@ HTML_CONTENT = """<!DOCTYPE html>
320
  }
321
  .message.assistant.interpretation {
322
  background: linear-gradient(135deg, #1a5a3e, #2e7d32);
323
- font-style: italic;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  }
325
  .interpretation-arrow {
326
  color: #4caf50;
327
  font-weight: bold;
328
  margin: 0 10px;
329
  }
 
 
 
 
 
330
  .controls {
331
  text-align: center;
332
  margin-top: auto;
@@ -513,11 +606,6 @@ HTML_CONTENT = """<!DOCTYPE html>
513
  background-color: var(--secondary-color);
514
  border-radius: 50%;
515
  }
516
- .language-info {
517
- font-size: 12px;
518
- color: #888;
519
- margin-left: 5px;
520
- }
521
  </style>
522
  </head>
523
 
@@ -593,47 +681,22 @@ HTML_CONTENT = """<!DOCTYPE html>
593
  <div class="toggle-slider"></div>
594
  </div>
595
  </div>
596
- <div class="setting-item" id="interpretation-language-container" style="display: none;">
597
- <span class="setting-label">통역 언어</span>
598
- <select id="interpretation-language-select">
599
- <option value="">언어 선택</option>
600
- <option value="ko">한국어 (Korean)</option>
601
- <option value="en">English</option>
602
- <option value="es">Español (Spanish)</option>
603
- <option value="fr">Français (French)</option>
604
- <option value="de">Deutsch (German)</option>
605
- <option value="it">Italiano (Italian)</option>
606
- <option value="pt">Português (Portuguese)</option>
607
- <option value="ru">Русский (Russian)</option>
608
- <option value="ja">日本語 (Japanese)</option>
609
- <option value="zh">中文 (Chinese)</option>
610
- <option value="ar">العربية (Arabic)</option>
611
- <option value="hi">हिन्दी (Hindi)</option>
612
- <option value="nl">Nederlands (Dutch)</option>
613
- <option value="pl">Polski (Polish)</option>
614
- <option value="tr">Türkçe (Turkish)</option>
615
- <option value="vi">Tiếng Việt (Vietnamese)</option>
616
- <option value="th">ไทย (Thai)</option>
617
- <option value="id">Bahasa Indonesia</option>
618
- <option value="sv">Svenska (Swedish)</option>
619
- <option value="da">Dansk (Danish)</option>
620
- <option value="no">Norsk (Norwegian)</option>
621
- <option value="fi">Suomi (Finnish)</option>
622
- <option value="he">עברית (Hebrew)</option>
623
- <option value="uk">Українська (Ukrainian)</option>
624
- <option value="cs">Čeština (Czech)</option>
625
- <option value="el">Ελληνικά (Greek)</option>
626
- <option value="ro">Română (Romanian)</option>
627
- <option value="hu">Magyar (Hungarian)</option>
628
- <option value="ms">Bahasa Melayu (Malay)</option>
629
- </select>
630
  </div>
631
  </div>
632
  <div class="interpretation-info" id="interpretation-info" style="display: none;">
633
  <strong>통역 모드 안내:</strong><br>
634
- • 음성으로 말하면 선택한 언어로 자동 통역됩니다<br>
635
- • Whisper + GPT-4o-mini + TTS를 사용합니다<br>
636
- • 말을 마치고 잠시 기다리면 통역이 시작됩니다
 
637
  </div>
638
  <div class="text-input-section">
639
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
@@ -668,7 +731,7 @@ HTML_CONTENT = """<!DOCTYPE html>
668
  let webSearchEnabled = false;
669
  let selectedLanguage = "";
670
  let interpretationMode = false;
671
- let interpretationLanguage = "";
672
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
673
  const audioOutput = document.getElementById('audio-output');
674
  const startButton = document.getElementById('start-button');
@@ -679,17 +742,112 @@ HTML_CONTENT = """<!DOCTYPE html>
679
  const searchToggle = document.getElementById('search-toggle');
680
  const languageSelect = document.getElementById('language-select');
681
  const interpretationToggle = document.getElementById('interpretation-toggle');
682
- const interpretationLanguageSelect = document.getElementById('interpretation-language-select');
683
- const interpretationLanguageContainer = document.getElementById('interpretation-language-container');
684
  const interpretationInfo = document.getElementById('interpretation-info');
685
  const systemPromptInput = document.getElementById('system-prompt');
686
  const textInput = document.getElementById('text-input');
 
 
687
  let audioLevel = 0;
688
  let animationFrame;
689
  let audioContext, analyser, audioSource;
690
  let dataChannel = null;
691
  let isVoiceActive = false;
692
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
693
  // Web search toggle functionality
694
  searchToggle.addEventListener('click', () => {
695
  webSearchEnabled = !webSearchEnabled;
@@ -707,23 +865,48 @@ HTML_CONTENT = """<!DOCTYPE html>
707
  interpretationToggle.addEventListener('click', () => {
708
  if (!interpretationMode) {
709
  // Turning ON interpretation mode
710
- interpretationLanguageContainer.style.display = 'flex';
711
  interpretationInfo.style.display = 'block';
712
 
713
- // Show language selector first
714
- showError('통역 언어를 선택해주세요.');
715
- interpretationToggle.classList.remove('active');
 
 
 
716
 
717
- // Don't actually enable interpretation mode until language is selected
718
- return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  } else {
720
  // Turning OFF interpretation mode
721
  interpretationMode = false;
722
  interpretationToggle.classList.remove('active');
723
- interpretationLanguageContainer.style.display = 'none';
724
  interpretationInfo.style.display = 'none';
725
- interpretationLanguage = '';
726
- interpretationLanguageSelect.value = '';
727
 
728
  // Re-enable other features
729
  languageSelect.disabled = false;
@@ -748,41 +931,6 @@ HTML_CONTENT = """<!DOCTYPE html>
748
  console.log('Interpretation mode:', interpretationMode);
749
  });
750
 
751
- // Interpretation language selection
752
- interpretationLanguageSelect.addEventListener('change', () => {
753
- interpretationLanguage = interpretationLanguageSelect.value;
754
- console.log('Interpretation language:', interpretationLanguage);
755
-
756
- if (interpretationLanguage && !interpretationMode) {
757
- // Now actually enable interpretation mode
758
- interpretationMode = true;
759
- interpretationToggle.classList.add('active');
760
-
761
- // Disable other features
762
- languageSelect.value = '';
763
- selectedLanguage = '';
764
- languageSelect.disabled = true;
765
- searchToggle.classList.remove('active');
766
- webSearchEnabled = false;
767
- searchToggle.style.opacity = '0.5';
768
- searchToggle.style.pointerEvents = 'none';
769
- textInput.disabled = true;
770
- textInput.placeholder = '통역 모드에서는 텍스트 입력이 지원되지 않습니다';
771
- sendButton.style.display = 'none';
772
-
773
- console.log('Interpretation mode enabled with language:', interpretationLanguage);
774
-
775
- // If already connected, restart the connection with new settings
776
- if (peerConnection && peerConnection.connectionState === 'connected') {
777
- showError('통역 모드 설정을 적용하기 위해 연결을 다시 시작합니다.');
778
- stop();
779
- setTimeout(() => {
780
- setupWebRTC();
781
- }, 500);
782
- }
783
- }
784
- });
785
-
786
  // System prompt update
787
  systemPromptInput.addEventListener('input', () => {
788
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
@@ -1020,7 +1168,7 @@ HTML_CONTENT = """<!DOCTYPE html>
1020
  target_language: selectedLanguage,
1021
  system_prompt: systemPrompt,
1022
  interpretation_mode: interpretationMode,
1023
- interpretation_language: interpretationLanguage
1024
  });
1025
 
1026
  const response = await fetch('/webrtc/offer', {
@@ -1034,7 +1182,7 @@ HTML_CONTENT = """<!DOCTYPE html>
1034
  target_language: selectedLanguage,
1035
  system_prompt: systemPrompt,
1036
  interpretation_mode: interpretationMode,
1037
- interpretation_language: interpretationLanguage
1038
  })
1039
  });
1040
  const serverResponse = await response.json();
@@ -1055,28 +1203,21 @@ HTML_CONTENT = """<!DOCTYPE html>
1055
  if (interpretationMode) {
1056
  console.log('[INTERPRETATION OUTPUT]', {
1057
  content: content,
1058
- language: eventJson.language,
1059
  mode: eventJson.mode,
1060
- expectedLanguage: interpretationLanguage
1061
  });
1062
  }
1063
 
1064
  if (selectedLanguage && eventJson.language) {
1065
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
1066
- } else if (interpretationMode && eventJson.language) {
1067
- // In interpretation mode, show the translation process
1068
- if (content.includes('→')) {
1069
- // Format: "Korean text → English text"
1070
- const parts = content.split('→');
1071
- if (parts.length === 2) {
1072
- content = `<span style="color: #999;">${parts[0].trim()}</span>` +
1073
- `<span class="interpretation-arrow">→</span>` +
1074
- `<strong>${parts[1].trim()}</strong>`;
1075
- }
1076
  }
1077
- content += ` <span class="language-info">[통역: ${eventJson.language}]</span>`;
 
1078
  }
1079
- addMessage("assistant", content);
1080
  });
1081
  eventSource.addEventListener("search", (event) => {
1082
  const eventJson = JSON.parse(event.data);
@@ -1095,11 +1236,6 @@ HTML_CONTENT = """<!DOCTYPE html>
1095
  const messageDiv = document.createElement('div');
1096
  messageDiv.classList.add('message', role);
1097
 
1098
- // Check if it's an interpretation message
1099
- if (interpretationMode && role === 'assistant' && content.includes('→')) {
1100
- messageDiv.classList.add('interpretation');
1101
- }
1102
-
1103
  if (content.includes('<span')) {
1104
  messageDiv.innerHTML = content;
1105
  } else {
@@ -1108,6 +1244,45 @@ HTML_CONTENT = """<!DOCTYPE html>
1108
  chatMessages.appendChild(messageDiv);
1109
  chatMessages.scrollTop = chatMessages.scrollHeight;
1110
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1111
  function stop() {
1112
  if (animationFrame) {
1113
  cancelAnimationFrame(animationFrame);
@@ -1149,9 +1324,10 @@ HTML_CONTENT = """<!DOCTYPE html>
1149
  }
1150
  });
1151
 
1152
- // Initialize send button visibility on page load
1153
  window.addEventListener('DOMContentLoaded', () => {
1154
  sendButton.style.display = 'block';
 
1155
  });
1156
  </script>
1157
  </body>
@@ -1227,18 +1403,6 @@ def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEve
1227
  return chatbot
1228
 
1229
 
1230
- def get_translation_instructions(target_language: str) -> str:
1231
- """Get instructions for translation based on target language"""
1232
- if not target_language:
1233
- return ""
1234
-
1235
- language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
1236
- return (
1237
- f"\n\nIMPORTANT: You must respond in {language_name} ({target_language}). "
1238
- f"Translate all your responses to {language_name}."
1239
- )
1240
-
1241
-
1242
  async def process_text_chat(message: str, web_search_enabled: bool, target_language: str,
1243
  system_prompt: str) -> Dict[str, str]:
1244
  """Process text chat using GPT-4o-mini model"""
@@ -1343,7 +1507,7 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1343
  class OpenAIHandler(AsyncStreamHandler):
1344
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1345
  system_prompt: str = "", webrtc_id: str = None,
1346
- interpretation_mode: bool = False, interpretation_language: str = "") -> None:
1347
  super().__init__(
1348
  expected_layout="mono",
1349
  output_sample_rate=SAMPLE_RATE,
@@ -1361,7 +1525,7 @@ class OpenAIHandler(AsyncStreamHandler):
1361
  self.target_language = target_language
1362
  self.system_prompt = system_prompt
1363
  self.interpretation_mode = interpretation_mode
1364
- self.interpretation_language = interpretation_language
1365
 
1366
  # For interpretation mode
1367
  self.audio_buffer = []
@@ -1372,7 +1536,7 @@ class OpenAIHandler(AsyncStreamHandler):
1372
 
1373
  print(f"Handler created with web_search_enabled={web_search_enabled}, "
1374
  f"target_language={target_language}, webrtc_id={webrtc_id}, "
1375
- f"interpretation_mode={interpretation_mode}, interpretation_language={interpretation_language}")
1376
 
1377
  def copy(self):
1378
  # Get the most recent settings
@@ -1390,7 +1554,7 @@ class OpenAIHandler(AsyncStreamHandler):
1390
  system_prompt=settings.get('system_prompt', ''),
1391
  webrtc_id=recent_id,
1392
  interpretation_mode=settings.get('interpretation_mode', False),
1393
- interpretation_language=settings.get('interpretation_language', '')
1394
  )
1395
 
1396
  print(f"Handler.copy() called - creating new handler with default settings")
@@ -1430,8 +1594,8 @@ class OpenAIHandler(AsyncStreamHandler):
1430
  await self.connection.response.create()
1431
 
1432
  async def process_interpretation(self):
1433
- """Process audio buffer for interpretation"""
1434
- if not self.audio_buffer or not self.interpretation_language:
1435
  return
1436
 
1437
  try:
@@ -1465,38 +1629,50 @@ class OpenAIHandler(AsyncStreamHandler):
1465
  if not user_text:
1466
  return
1467
 
1468
- # 2. Translate with GPT-4o-mini
1469
- target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1470
 
1471
- # Create very explicit translation examples
1472
- translation_examples = {
1473
- "en": {
1474
- "안녕하세요": "Hello",
1475
- "감사합니다": "Thank you",
1476
- "오늘 날씨가 좋네요": "The weather is nice today"
1477
- },
1478
- "ja": {
1479
- "안녕하세요": "こんにちは",
1480
- "감사합니다": "ありがとうございます",
1481
- "오늘 날씨가 좋네요": "今日はいい天気ですね"
1482
- },
1483
- "zh": {
1484
- "안녕하세요": "你好",
1485
- "감사합니다": "谢谢",
1486
- "오늘 날씨가 좋네요": "今天天气很好"
1487
- },
1488
- "es": {
1489
- "안녕하세요": "Hola",
1490
- "감사합니다": "Gracias",
1491
- "오늘 날씨가 좋네요": "El clima está agradable hoy"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1492
  }
1493
- }
1494
-
1495
- examples = translation_examples.get(self.interpretation_language, translation_examples["en"])
1496
- examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
1497
-
1498
- # Ultra-specific prompt
1499
- system_prompt = f"""You are a Korean to {target_lang_name} translator.
1500
 
1501
  STRICT RULES:
1502
  1. Output ONLY the {target_lang_name} translation
@@ -1509,105 +1685,48 @@ Examples:
1509
  {examples_text}
1510
 
1511
  Now translate the Korean text to {target_lang_name}. Output ONLY the translation in {target_lang_name}:"""
1512
-
1513
- print(f"[INTERPRETATION] Translating to {target_lang_name}...")
1514
- print(f"[INTERPRETATION] System prompt: {system_prompt}")
1515
-
1516
- translation_response = await self.client.chat.completions.create(
1517
- model="gpt-4o-mini",
1518
- messages=[
1519
- {
1520
- "role": "system",
1521
- "content": system_prompt
1522
- },
1523
- {
1524
- "role": "user",
1525
- "content": f"Translate this Korean to {target_lang_name}: {user_text}"
1526
- }
1527
- ],
1528
- temperature=0.1, # Very low temperature
1529
- max_tokens=200
1530
- )
1531
-
1532
- translated_text = translation_response.choices[0].message.content.strip()
1533
-
1534
- # Remove any Korean characters if they accidentally appear
1535
- import re
1536
- if re.search(r'[가-힣]', translated_text):
1537
- print(f"[INTERPRETATION] WARNING: Korean characters detected in translation: {translated_text}")
1538
- # Try to extract only non-Korean parts
1539
- translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
1540
-
1541
- print(f"[INTERPRETATION] Translated: {translated_text}")
1542
-
1543
- # 3. Generate speech with TTS
1544
- print(f"[INTERPRETATION] Generating speech for text: {translated_text}")
1545
-
1546
- # Select appropriate voice and ensure it speaks the target language
1547
- voice_map = {
1548
- "en": "alloy", # Alloy is native English speaker
1549
- "es": "nova", # Nova handles Spanish well
1550
- "fr": "shimmer", # Shimmer handles French well
1551
- "de": "echo", # Echo handles German well
1552
- "ja": "nova", # Nova can handle Japanese
1553
- "zh": "nova", # Nova can handle Chinese
1554
- "ko": "nova", # Nova can handle Korean
1555
- }
1556
- selected_voice = voice_map.get(self.interpretation_language, "nova")
1557
-
1558
- print(f"[INTERPRETATION] Using voice: {selected_voice} for language: {self.interpretation_language}")
1559
-
1560
- # For some languages, we might need to add pronunciation hints
1561
- if self.interpretation_language == "en" and re.search(r'[가-힣]', translated_text):
1562
- print("[INTERPRETATION] ERROR: Korean characters in English translation!")
1563
- translated_text = "Translation error occurred"
1564
-
1565
- try:
1566
- tts_response = await self.client.audio.speech.create(
1567
- model="tts-1",
1568
- voice=selected_voice,
1569
- input=translated_text,
1570
- response_format="pcm", # PCM format for direct playback
1571
- speed=1.0
1572
- )
1573
- except Exception as tts_error:
1574
- print(f"[INTERPRETATION] TTS Error: {tts_error}")
1575
- # If TTS fails, try with a different voice
1576
- tts_response = await self.client.audio.speech.create(
1577
- model="tts-1",
1578
- voice="alloy", # Fallback to alloy
1579
- input=translated_text,
1580
- response_format="pcm",
1581
- speed=1.0
1582
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1583
 
1584
- # Convert response to bytes
1585
- audio_bytes = b""
1586
- async for chunk in tts_response.iter_bytes(1024):
1587
- audio_bytes += chunk
1588
-
1589
- # Convert PCM to numpy array (TTS outputs at 24kHz)
1590
- audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
1591
-
1592
- # Send audio in chunks
1593
- if len(audio_array) > 0:
1594
- # Split audio into chunks and send
1595
- chunk_size = 480 # Match our frame size
1596
- for i in range(0, len(audio_array), chunk_size):
1597
- chunk = audio_array[i:i + chunk_size]
1598
- if len(chunk) < chunk_size:
1599
- # Pad the last chunk if necessary
1600
- chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
1601
-
1602
- await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
1603
-
1604
- # Send transcript event
1605
  output_data = {
1606
  "event": type('Event', (), {
1607
- 'transcript': f"{user_text} {translated_text}"
1608
  })(),
1609
- "language": target_lang_name,
1610
- "mode": "interpretation"
 
1611
  }
1612
  await self.output_queue.put(AdditionalOutputs(output_data))
1613
 
@@ -1621,7 +1740,6 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
1621
  "event": type('Event', (), {
1622
  'transcript': f"통역 오류: {str(e)}"
1623
  })(),
1624
- "language": "",
1625
  "mode": "error"
1626
  }
1627
  await self.output_queue.put(AdditionalOutputs(error_data))
@@ -1657,25 +1775,24 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
1657
  self.target_language = settings.get('target_language', '')
1658
  self.system_prompt = settings.get('system_prompt', '')
1659
  self.interpretation_mode = settings.get('interpretation_mode', False)
1660
- self.interpretation_language = settings.get('interpretation_language', '')
1661
  self.webrtc_id = recent_id
1662
  print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
1663
  f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}, "
1664
  f"interpretation_mode={self.interpretation_mode}")
1665
- print(f"Handler interpretation settings: mode={self.interpretation_mode}, language={self.interpretation_language}")
1666
 
1667
  print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
1668
  f"target_language={self.target_language}, interpretation_mode={self.interpretation_mode}, "
1669
- f"interpretation_language={self.interpretation_language}")
1670
 
1671
  self.client = openai.AsyncOpenAI()
1672
 
1673
  # If in interpretation mode, don't connect to Realtime API
1674
  if self.interpretation_mode:
1675
- print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
1676
- print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1677
  # Just keep the handler ready to process audio
1678
- # Don't use infinite loop here - the handler will be called by the framework
1679
  self.client = openai.AsyncOpenAI()
1680
  return
1681
 
@@ -2044,11 +2161,11 @@ async def custom_offer(request: Request):
2044
  target_language = body.get("target_language", "")
2045
  system_prompt = body.get("system_prompt", "")
2046
  interpretation_mode = body.get("interpretation_mode", False)
2047
- interpretation_language = body.get("interpretation_language", "")
2048
 
2049
  print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
2050
  f"target_language: {target_language}, interpretation_mode: {interpretation_mode}, "
2051
- f"interpretation_language: {interpretation_language}")
2052
 
2053
  # Store settings with timestamp
2054
  if webrtc_id:
@@ -2057,7 +2174,7 @@ async def custom_offer(request: Request):
2057
  'target_language': target_language,
2058
  'system_prompt': system_prompt,
2059
  'interpretation_mode': interpretation_mode,
2060
- 'interpretation_language': interpretation_language,
2061
  'timestamp': asyncio.get_event_loop().time()
2062
  }
2063
 
@@ -2130,13 +2247,15 @@ async def outputs(webrtc_id: str):
2130
  yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
2131
  # Regular transcript event with language info
2132
  elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
2133
- event = output.args[0]['event']
2134
- if hasattr(event, 'transcript'):
2135
  data = {
2136
  "role": "assistant",
2137
- "content": event.transcript,
2138
- "language": output.args[0].get('language', ''),
2139
- "mode": output.args[0].get('mode', 'normal')
 
 
2140
  }
2141
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
2142
 
 
252
  resize: vertical;
253
  min-height: 80px;
254
  }
255
+ /* Multi-language selection */
256
+ .language-selection-grid {
257
+ display: grid;
258
+ grid-template-columns: repeat(2, 1fr);
259
+ gap: 10px;
260
+ margin-top: 10px;
261
+ max-height: 200px;
262
+ overflow-y: auto;
263
+ padding: 10px;
264
+ background-color: var(--dark-bg);
265
+ border-radius: 6px;
266
+ border: 1px solid var(--border-color);
267
+ }
268
+ .language-checkbox {
269
+ display: flex;
270
+ align-items: center;
271
+ gap: 8px;
272
+ font-size: 13px;
273
+ cursor: pointer;
274
+ padding: 5px;
275
+ border-radius: 4px;
276
+ transition: background-color 0.2s;
277
+ }
278
+ .language-checkbox:hover {
279
+ background-color: rgba(111, 66, 193, 0.1);
280
+ }
281
+ .language-checkbox input[type="checkbox"] {
282
+ width: 16px;
283
+ height: 16px;
284
+ cursor: pointer;
285
+ }
286
+ .language-checkbox.default {
287
+ font-weight: 500;
288
+ color: var(--primary-color);
289
+ }
290
+ .selected-languages {
291
+ margin-top: 10px;
292
+ font-size: 12px;
293
+ color: #999;
294
+ }
295
  .chat-container {
296
  border-radius: 12px;
297
  background-color: var(--card-bg);
 
360
  }
361
  .message.assistant.interpretation {
362
  background: linear-gradient(135deg, #1a5a3e, #2e7d32);
363
+ font-style: normal;
364
+ }
365
+ .interpretation-result {
366
+ background: linear-gradient(135deg, #1e3a5f, #2c5282);
367
+ padding: 15px;
368
+ margin: 10px 0;
369
+ border-radius: 8px;
370
+ border: 1px solid rgba(66, 153, 225, 0.3);
371
+ }
372
+ .interpretation-header {
373
+ font-weight: bold;
374
+ color: #90cdf4;
375
+ margin-bottom: 10px;
376
+ display: flex;
377
+ align-items: center;
378
+ gap: 10px;
379
+ }
380
+ .interpretation-original {
381
+ font-size: 14px;
382
+ color: #cbd5e0;
383
+ margin-bottom: 15px;
384
+ padding: 10px;
385
+ background-color: rgba(0, 0, 0, 0.2);
386
+ border-radius: 4px;
387
+ }
388
+ .interpretation-translations {
389
+ display: flex;
390
+ flex-direction: column;
391
+ gap: 8px;
392
+ }
393
+ .translation-item {
394
+ display: flex;
395
+ align-items: baseline;
396
+ gap: 10px;
397
+ padding: 8px 12px;
398
+ background-color: rgba(255, 255, 255, 0.05);
399
+ border-radius: 4px;
400
+ border-left: 3px solid var(--primary-color);
401
+ }
402
+ .translation-lang {
403
+ font-weight: 500;
404
+ color: var(--primary-color);
405
+ min-width: 80px;
406
+ font-size: 13px;
407
+ }
408
+ .translation-text {
409
+ flex: 1;
410
+ color: var(--text-color);
411
+ font-size: 14px;
412
  }
413
  .interpretation-arrow {
414
  color: #4caf50;
415
  font-weight: bold;
416
  margin: 0 10px;
417
  }
418
+ .language-info {
419
+ font-size: 12px;
420
+ color: #888;
421
+ margin-left: 5px;
422
+ }
423
  .controls {
424
  text-align: center;
425
  margin-top: auto;
 
606
  background-color: var(--secondary-color);
607
  border-radius: 50%;
608
  }
 
 
 
 
 
609
  </style>
610
  </head>
611
 
 
681
  <div class="toggle-slider"></div>
682
  </div>
683
  </div>
684
+ <div id="interpretation-languages-container" style="display: none;">
685
+ <div class="setting-label" style="margin-bottom: 5px;">통역 언어 선택 (최대 4개)</div>
686
+ <div class="language-selection-grid" id="language-selection-grid">
687
+ <!-- Languages will be populated by JavaScript -->
688
+ </div>
689
+ <div class="selected-languages" id="selected-languages-display">
690
+ 선택된 언어: 없음
691
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
  </div>
693
  </div>
694
  <div class="interpretation-info" id="interpretation-info" style="display: none;">
695
  <strong>통역 모드 안내:</strong><br>
696
+ • 음성으로 말하면 선택한 언어들로 자동 통역됩니다<br>
697
+ • Whisper + GPT-4o-mini를 사용합니다<br>
698
+ • 말을 마치고 잠시 기다리면 통역이 시작됩니다<br>
699
+ • 번역된 텍스트만 화면에 표시됩니다
700
  </div>
701
  <div class="text-input-section">
702
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
 
731
  let webSearchEnabled = false;
732
  let selectedLanguage = "";
733
  let interpretationMode = false;
734
+ let interpretationLanguages = [];
735
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
736
  const audioOutput = document.getElementById('audio-output');
737
  const startButton = document.getElementById('start-button');
 
742
  const searchToggle = document.getElementById('search-toggle');
743
  const languageSelect = document.getElementById('language-select');
744
  const interpretationToggle = document.getElementById('interpretation-toggle');
745
+ const interpretationLanguagesContainer = document.getElementById('interpretation-languages-container');
 
746
  const interpretationInfo = document.getElementById('interpretation-info');
747
  const systemPromptInput = document.getElementById('system-prompt');
748
  const textInput = document.getElementById('text-input');
749
+ const languageSelectionGrid = document.getElementById('language-selection-grid');
750
+ const selectedLanguagesDisplay = document.getElementById('selected-languages-display');
751
  let audioLevel = 0;
752
  let animationFrame;
753
  let audioContext, analyser, audioSource;
754
  let dataChannel = null;
755
  let isVoiceActive = false;
756
 
757
+ // Available languages for interpretation
758
+ const INTERPRETATION_LANGUAGES = {
759
+ "en": { name: "English", default: true },
760
+ "zh": { name: "中文 (Chinese)", default: true },
761
+ "th": { name: "ไทย (Thai)", default: true },
762
+ "ru": { name: "Русский (Russian)", default: true },
763
+ "ja": { name: "日本語 (Japanese)", default: false },
764
+ "es": { name: "Español (Spanish)", default: false },
765
+ "fr": { name: "Français (French)", default: false },
766
+ "de": { name: "Deutsch (German)", default: false },
767
+ "pt": { name: "Português (Portuguese)", default: false },
768
+ "ar": { name: "العربية (Arabic)", default: false },
769
+ "hi": { name: "हिन्दी (Hindi)", default: false },
770
+ "vi": { name: "Tiếng Việt (Vietnamese)", default: false },
771
+ "id": { name: "Bahasa Indonesia", default: false },
772
+ "it": { name: "Italiano (Italian)", default: false },
773
+ "nl": { name: "Nederlands (Dutch)", default: false },
774
+ "pl": { name: "Polski (Polish)", default: false },
775
+ "tr": { name: "Türkçe (Turkish)", default: false },
776
+ "sv": { name: "Svenska (Swedish)", default: false },
777
+ "da": { name: "Dansk (Danish)", default: false },
778
+ "no": { name: "Norsk (Norwegian)", default: false },
779
+ "fi": { name: "Suomi (Finnish)", default: false },
780
+ "he": { name: "עברית (Hebrew)", default: false },
781
+ "uk": { name: "Українська (Ukrainian)", default: false },
782
+ "cs": { name: "Čeština (Czech)", default: false },
783
+ "el": { name: "Ελληνικά (Greek)", default: false },
784
+ "ro": { name: "Română (Romanian)", default: false },
785
+ "hu": { name: "Magyar (Hungarian)", default: false },
786
+ "ms": { name: "Bahasa Melayu (Malay)", default: false }
787
+ };
788
+
789
+ // Initialize language selection grid
790
+ function initializeLanguageSelection() {
791
+ languageSelectionGrid.innerHTML = '';
792
+
793
+ // Sort languages: defaults first, then alphabetically
794
+ const sortedLanguages = Object.entries(INTERPRETATION_LANGUAGES).sort((a, b) => {
795
+ if (a[1].default && !b[1].default) return -1;
796
+ if (!a[1].default && b[1].default) return 1;
797
+ return a[1].name.localeCompare(b[1].name);
798
+ });
799
+
800
+ sortedLanguages.forEach(([code, lang]) => {
801
+ const label = document.createElement('label');
802
+ label.className = 'language-checkbox' + (lang.default ? ' default' : '');
803
+
804
+ const checkbox = document.createElement('input');
805
+ checkbox.type = 'checkbox';
806
+ checkbox.value = code;
807
+ checkbox.checked = lang.default;
808
+ checkbox.addEventListener('change', onLanguageCheckboxChange);
809
+
810
+ const text = document.createElement('span');
811
+ text.textContent = lang.name;
812
+
813
+ label.appendChild(checkbox);
814
+ label.appendChild(text);
815
+ languageSelectionGrid.appendChild(label);
816
+ });
817
+
818
+ // Initialize with default languages
819
+ updateSelectedLanguages();
820
+ }
821
+
822
+ function onLanguageCheckboxChange() {
823
+ const checkedBoxes = languageSelectionGrid.querySelectorAll('input[type="checkbox"]:checked');
824
+
825
+ // Limit to 4 languages
826
+ if (checkedBoxes.length > 4) {
827
+ this.checked = false;
828
+ showError('최대 4개 언어까지 선택할 수 있습니다.');
829
+ return;
830
+ }
831
+
832
+ updateSelectedLanguages();
833
+ }
834
+
835
+ function updateSelectedLanguages() {
836
+ const checkedBoxes = languageSelectionGrid.querySelectorAll('input[type="checkbox"]:checked');
837
+ interpretationLanguages = Array.from(checkedBoxes).map(cb => cb.value);
838
+
839
+ if (interpretationLanguages.length === 0) {
840
+ selectedLanguagesDisplay.textContent = '선택된 언어: 없음';
841
+ } else {
842
+ const langNames = interpretationLanguages.map(code =>
843
+ INTERPRETATION_LANGUAGES[code].name
844
+ ).join(', ');
845
+ selectedLanguagesDisplay.textContent = `선택된 언어 (${interpretationLanguages.length}/4): ${langNames}`;
846
+ }
847
+
848
+ console.log('Selected interpretation languages:', interpretationLanguages);
849
+ }
850
+
851
  // Web search toggle functionality
852
  searchToggle.addEventListener('click', () => {
853
  webSearchEnabled = !webSearchEnabled;
 
865
  interpretationToggle.addEventListener('click', () => {
866
  if (!interpretationMode) {
867
  // Turning ON interpretation mode
868
+ interpretationLanguagesContainer.style.display = 'block';
869
  interpretationInfo.style.display = 'block';
870
 
871
+ // Check if any languages are selected
872
+ if (interpretationLanguages.length === 0) {
873
+ showError('통역 언어를 선택해주세요.');
874
+ interpretationToggle.classList.remove('active');
875
+ return;
876
+ }
877
 
878
+ // Enable interpretation mode
879
+ interpretationMode = true;
880
+ interpretationToggle.classList.add('active');
881
+
882
+ // Disable other features
883
+ languageSelect.value = '';
884
+ selectedLanguage = '';
885
+ languageSelect.disabled = true;
886
+ searchToggle.classList.remove('active');
887
+ webSearchEnabled = false;
888
+ searchToggle.style.opacity = '0.5';
889
+ searchToggle.style.pointerEvents = 'none';
890
+ textInput.disabled = true;
891
+ textInput.placeholder = '통역 모드에서는 텍스트 입력이 지원되지 않습니다';
892
+ sendButton.style.display = 'none';
893
+
894
+ console.log('Interpretation mode enabled with languages:', interpretationLanguages);
895
+
896
+ // If connected, restart to apply interpretation mode
897
+ if (peerConnection && peerConnection.connectionState === 'connected') {
898
+ showError('통역 모드 설정을 적용하기 위해 연결을 다시 시작합니다.');
899
+ stop();
900
+ setTimeout(() => {
901
+ setupWebRTC();
902
+ }, 500);
903
+ }
904
  } else {
905
  // Turning OFF interpretation mode
906
  interpretationMode = false;
907
  interpretationToggle.classList.remove('active');
908
+ interpretationLanguagesContainer.style.display = 'none';
909
  interpretationInfo.style.display = 'none';
 
 
910
 
911
  // Re-enable other features
912
  languageSelect.disabled = false;
 
931
  console.log('Interpretation mode:', interpretationMode);
932
  });
933
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
934
  // System prompt update
935
  systemPromptInput.addEventListener('input', () => {
936
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
 
1168
  target_language: selectedLanguage,
1169
  system_prompt: systemPrompt,
1170
  interpretation_mode: interpretationMode,
1171
+ interpretation_languages: interpretationLanguages
1172
  });
1173
 
1174
  const response = await fetch('/webrtc/offer', {
 
1182
  target_language: selectedLanguage,
1183
  system_prompt: systemPrompt,
1184
  interpretation_mode: interpretationMode,
1185
+ interpretation_languages: interpretationLanguages
1186
  })
1187
  });
1188
  const serverResponse = await response.json();
 
1203
  if (interpretationMode) {
1204
  console.log('[INTERPRETATION OUTPUT]', {
1205
  content: content,
 
1206
  mode: eventJson.mode,
1207
+ translations: eventJson.translations
1208
  });
1209
  }
1210
 
1211
  if (selectedLanguage && eventJson.language) {
1212
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
1213
+ } else if (interpretationMode && eventJson.mode === 'interpretation') {
1214
+ // Handle interpretation mode output
1215
+ if (eventJson.original && eventJson.translations) {
1216
+ addInterpretationResult(eventJson.original, eventJson.translations);
 
 
 
 
 
 
1217
  }
1218
+ } else {
1219
+ addMessage("assistant", content);
1220
  }
 
1221
  });
1222
  eventSource.addEventListener("search", (event) => {
1223
  const eventJson = JSON.parse(event.data);
 
1236
  const messageDiv = document.createElement('div');
1237
  messageDiv.classList.add('message', role);
1238
 
 
 
 
 
 
1239
  if (content.includes('<span')) {
1240
  messageDiv.innerHTML = content;
1241
  } else {
 
1244
  chatMessages.appendChild(messageDiv);
1245
  chatMessages.scrollTop = chatMessages.scrollHeight;
1246
  }
1247
+
1248
+ function addInterpretationResult(original, translations) {
1249
+ const resultDiv = document.createElement('div');
1250
+ resultDiv.className = 'interpretation-result';
1251
+
1252
+ // Header
1253
+ const headerDiv = document.createElement('div');
1254
+ headerDiv.className = 'interpretation-header';
1255
+ headerDiv.innerHTML = `
1256
+ <span>🌐</span>
1257
+ <span>자동 통역 결과</span>
1258
+ `;
1259
+ resultDiv.appendChild(headerDiv);
1260
+
1261
+ // Original text
1262
+ const originalDiv = document.createElement('div');
1263
+ originalDiv.className = 'interpretation-original';
1264
+ originalDiv.innerHTML = `<strong>원문:</strong> ${original}`;
1265
+ resultDiv.appendChild(originalDiv);
1266
+
1267
+ // Translations
1268
+ const translationsDiv = document.createElement('div');
1269
+ translationsDiv.className = 'interpretation-translations';
1270
+
1271
+ translations.forEach(trans => {
1272
+ const transItem = document.createElement('div');
1273
+ transItem.className = 'translation-item';
1274
+ transItem.innerHTML = `
1275
+ <span class="translation-lang">${trans.language}:</span>
1276
+ <span class="translation-text">${trans.text}</span>
1277
+ `;
1278
+ translationsDiv.appendChild(transItem);
1279
+ });
1280
+
1281
+ resultDiv.appendChild(translationsDiv);
1282
+ chatMessages.appendChild(resultDiv);
1283
+ chatMessages.scrollTop = chatMessages.scrollHeight;
1284
+ }
1285
+
1286
  function stop() {
1287
  if (animationFrame) {
1288
  cancelAnimationFrame(animationFrame);
 
1324
  }
1325
  });
1326
 
1327
+ // Initialize on page load
1328
  window.addEventListener('DOMContentLoaded', () => {
1329
  sendButton.style.display = 'block';
1330
+ initializeLanguageSelection();
1331
  });
1332
  </script>
1333
  </body>
 
1403
  return chatbot
1404
 
1405
 
 
 
 
 
 
 
 
 
 
 
 
 
1406
  async def process_text_chat(message: str, web_search_enabled: bool, target_language: str,
1407
  system_prompt: str) -> Dict[str, str]:
1408
  """Process text chat using GPT-4o-mini model"""
 
1507
  class OpenAIHandler(AsyncStreamHandler):
1508
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1509
  system_prompt: str = "", webrtc_id: str = None,
1510
+ interpretation_mode: bool = False, interpretation_languages: List[str] = None) -> None:
1511
  super().__init__(
1512
  expected_layout="mono",
1513
  output_sample_rate=SAMPLE_RATE,
 
1525
  self.target_language = target_language
1526
  self.system_prompt = system_prompt
1527
  self.interpretation_mode = interpretation_mode
1528
+ self.interpretation_languages = interpretation_languages or []
1529
 
1530
  # For interpretation mode
1531
  self.audio_buffer = []
 
1536
 
1537
  print(f"Handler created with web_search_enabled={web_search_enabled}, "
1538
  f"target_language={target_language}, webrtc_id={webrtc_id}, "
1539
+ f"interpretation_mode={interpretation_mode}, interpretation_languages={interpretation_languages}")
1540
 
1541
  def copy(self):
1542
  # Get the most recent settings
 
1554
  system_prompt=settings.get('system_prompt', ''),
1555
  webrtc_id=recent_id,
1556
  interpretation_mode=settings.get('interpretation_mode', False),
1557
+ interpretation_languages=settings.get('interpretation_languages', [])
1558
  )
1559
 
1560
  print(f"Handler.copy() called - creating new handler with default settings")
 
1594
  await self.connection.response.create()
1595
 
1596
  async def process_interpretation(self):
1597
+ """Process audio buffer for interpretation - text only output"""
1598
+ if not self.audio_buffer or not self.interpretation_languages:
1599
  return
1600
 
1601
  try:
 
1629
  if not user_text:
1630
  return
1631
 
1632
+ # 2. Translate to all selected languages
1633
+ translations = []
1634
 
1635
+ for lang_code in self.interpretation_languages:
1636
+ target_lang_name = SUPPORTED_LANGUAGES.get(lang_code, lang_code)
1637
+
1638
+ # Create very explicit translation prompt
1639
+ translation_examples = {
1640
+ "en": {
1641
+ "안녕하세요": "Hello",
1642
+ "감사합니다": "Thank you",
1643
+ "오늘 날씨가 좋네요": "The weather is nice today"
1644
+ },
1645
+ "ja": {
1646
+ "안녕하세요": "こんにちは",
1647
+ "감사합니다": "ありがとうございます",
1648
+ "오늘 날씨가 좋네요": "今日はいい天気ですね"
1649
+ },
1650
+ "zh": {
1651
+ "안녕하세요": "你好",
1652
+ "감사합니다": "谢谢",
1653
+ "오늘 날씨가 좋네요": "今天天气很好"
1654
+ },
1655
+ "es": {
1656
+ "안녕하세요": "Hola",
1657
+ "감사합니다": "Gracias",
1658
+ "오늘 날씨가 좋네요": "El clima está agradable hoy"
1659
+ },
1660
+ "ru": {
1661
+ "안녕하세요": "Привет",
1662
+ "감사합니다": "Спасибо",
1663
+ "오늘 날씨가 좋네요": "Сегодня хорошая погода"
1664
+ },
1665
+ "th": {
1666
+ "안녕하세요": "สวัสดี",
1667
+ "감사합니다": "ขอบคุณ",
1668
+ "오늘 날씨가 좋네요": "วันนี้อากาศดี"
1669
+ }
1670
  }
1671
+
1672
+ examples = translation_examples.get(lang_code, translation_examples.get("en", {}))
1673
+ examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
1674
+
1675
+ system_prompt = f"""You are a Korean to {target_lang_name} translator.
 
 
1676
 
1677
  STRICT RULES:
1678
  1. Output ONLY the {target_lang_name} translation
 
1685
  {examples_text}
1686
 
1687
  Now translate the Korean text to {target_lang_name}. Output ONLY the translation in {target_lang_name}:"""
1688
+
1689
+ print(f"[INTERPRETATION] Translating to {target_lang_name}...")
1690
+
1691
+ translation_response = await self.client.chat.completions.create(
1692
+ model="gpt-4o-mini",
1693
+ messages=[
1694
+ {
1695
+ "role": "system",
1696
+ "content": system_prompt
1697
+ },
1698
+ {
1699
+ "role": "user",
1700
+ "content": f"Translate this Korean to {target_lang_name}: {user_text}"
1701
+ }
1702
+ ],
1703
+ temperature=0.1,
1704
+ max_tokens=200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1705
  )
1706
+
1707
+ translated_text = translation_response.choices[0].message.content.strip()
1708
+
1709
+ # Remove any Korean characters if they accidentally appear
1710
+ import re
1711
+ if re.search(r'[가-힣]', translated_text):
1712
+ print(f"[INTERPRETATION] WARNING: Korean characters detected in {target_lang_name} translation")
1713
+ translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
1714
+
1715
+ translations.append({
1716
+ "language": target_lang_name,
1717
+ "text": translated_text
1718
+ })
1719
+
1720
+ print(f"[INTERPRETATION] {target_lang_name}: {translated_text}")
1721
 
1722
+ # Send interpretation result (text only)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1723
  output_data = {
1724
  "event": type('Event', (), {
1725
+ 'transcript': f"통역 완료" # Simple notification
1726
  })(),
1727
+ "mode": "interpretation",
1728
+ "original": user_text,
1729
+ "translations": translations
1730
  }
1731
  await self.output_queue.put(AdditionalOutputs(output_data))
1732
 
 
1740
  "event": type('Event', (), {
1741
  'transcript': f"통역 오류: {str(e)}"
1742
  })(),
 
1743
  "mode": "error"
1744
  }
1745
  await self.output_queue.put(AdditionalOutputs(error_data))
 
1775
  self.target_language = settings.get('target_language', '')
1776
  self.system_prompt = settings.get('system_prompt', '')
1777
  self.interpretation_mode = settings.get('interpretation_mode', False)
1778
+ self.interpretation_languages = settings.get('interpretation_languages', [])
1779
  self.webrtc_id = recent_id
1780
  print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
1781
  f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}, "
1782
  f"interpretation_mode={self.interpretation_mode}")
1783
+ print(f"Handler interpretation settings: mode={self.interpretation_mode}, languages={self.interpretation_languages}")
1784
 
1785
  print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
1786
  f"target_language={self.target_language}, interpretation_mode={self.interpretation_mode}, "
1787
+ f"interpretation_languages={self.interpretation_languages}")
1788
 
1789
  self.client = openai.AsyncOpenAI()
1790
 
1791
  # If in interpretation mode, don't connect to Realtime API
1792
  if self.interpretation_mode:
1793
+ print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini (text only)")
1794
+ print(f"[INTERPRETATION MODE] Target languages: {self.interpretation_languages}")
1795
  # Just keep the handler ready to process audio
 
1796
  self.client = openai.AsyncOpenAI()
1797
  return
1798
 
 
2161
  target_language = body.get("target_language", "")
2162
  system_prompt = body.get("system_prompt", "")
2163
  interpretation_mode = body.get("interpretation_mode", False)
2164
+ interpretation_languages = body.get("interpretation_languages", [])
2165
 
2166
  print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
2167
  f"target_language: {target_language}, interpretation_mode: {interpretation_mode}, "
2168
+ f"interpretation_languages: {interpretation_languages}")
2169
 
2170
  # Store settings with timestamp
2171
  if webrtc_id:
 
2174
  'target_language': target_language,
2175
  'system_prompt': system_prompt,
2176
  'interpretation_mode': interpretation_mode,
2177
+ 'interpretation_languages': interpretation_languages,
2178
  'timestamp': asyncio.get_event_loop().time()
2179
  }
2180
 
 
2247
  yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
2248
  # Regular transcript event with language info
2249
  elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
2250
+ event_data = output.args[0]
2251
+ if 'event' in event_data and hasattr(event_data['event'], 'transcript'):
2252
  data = {
2253
  "role": "assistant",
2254
+ "content": event_data['event'].transcript,
2255
+ "language": event_data.get('language', ''),
2256
+ "mode": event_data.get('mode', 'normal'),
2257
+ "original": event_data.get('original', ''),
2258
+ "translations": event_data.get('translations', [])
2259
  }
2260
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
2261