seawolf2357 commited on
Commit
235fd3c
·
verified ·
1 Parent(s): 78b5e61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -674
app.py CHANGED
@@ -162,20 +162,6 @@ HTML_CONTENT = """<!DOCTYPE html>
162
  gap: 15px;
163
  margin-bottom: 15px;
164
  }
165
- .interpretation-section {
166
- display: flex;
167
- flex-direction: column;
168
- gap: 15px;
169
- padding: 15px;
170
- background-color: var(--dark-bg);
171
- border-radius: 8px;
172
- margin-top: 15px;
173
- }
174
- .interpretation-info {
175
- font-size: 13px;
176
- color: #999;
177
- margin-top: 5px;
178
- }
179
  .setting-item {
180
  display: flex;
181
  align-items: center;
@@ -252,46 +238,6 @@ HTML_CONTENT = """<!DOCTYPE html>
252
  resize: vertical;
253
  min-height: 80px;
254
  }
255
- /* Multi-language selection */
256
- .language-selection-grid {
257
- display: grid;
258
- grid-template-columns: repeat(2, 1fr);
259
- gap: 10px;
260
- margin-top: 10px;
261
- max-height: 200px;
262
- overflow-y: auto;
263
- padding: 10px;
264
- background-color: var(--dark-bg);
265
- border-radius: 6px;
266
- border: 1px solid var(--border-color);
267
- }
268
- .language-checkbox {
269
- display: flex;
270
- align-items: center;
271
- gap: 8px;
272
- font-size: 13px;
273
- cursor: pointer;
274
- padding: 5px;
275
- border-radius: 4px;
276
- transition: background-color 0.2s;
277
- }
278
- .language-checkbox:hover {
279
- background-color: rgba(111, 66, 193, 0.1);
280
- }
281
- .language-checkbox input[type="checkbox"] {
282
- width: 16px;
283
- height: 16px;
284
- cursor: pointer;
285
- }
286
- .language-checkbox.default {
287
- font-weight: 500;
288
- color: var(--primary-color);
289
- }
290
- .selected-languages {
291
- margin-top: 10px;
292
- font-size: 12px;
293
- color: #999;
294
- }
295
  .chat-container {
296
  border-radius: 12px;
297
  background-color: var(--card-bg);
@@ -358,63 +304,6 @@ HTML_CONTENT = """<!DOCTYPE html>
358
  padding: 10px;
359
  margin-bottom: 10px;
360
  }
361
- .message.assistant.interpretation {
362
- background: linear-gradient(135deg, #1a5a3e, #2e7d32);
363
- font-style: normal;
364
- }
365
- .interpretation-result {
366
- background: linear-gradient(135deg, #1e3a5f, #2c5282);
367
- padding: 15px;
368
- margin: 10px 0;
369
- border-radius: 8px;
370
- border: 1px solid rgba(66, 153, 225, 0.3);
371
- }
372
- .interpretation-header {
373
- font-weight: bold;
374
- color: #90cdf4;
375
- margin-bottom: 10px;
376
- display: flex;
377
- align-items: center;
378
- gap: 10px;
379
- }
380
- .interpretation-original {
381
- font-size: 14px;
382
- color: #cbd5e0;
383
- margin-bottom: 15px;
384
- padding: 10px;
385
- background-color: rgba(0, 0, 0, 0.2);
386
- border-radius: 4px;
387
- }
388
- .interpretation-translations {
389
- display: flex;
390
- flex-direction: column;
391
- gap: 8px;
392
- }
393
- .translation-item {
394
- display: flex;
395
- align-items: baseline;
396
- gap: 10px;
397
- padding: 8px 12px;
398
- background-color: rgba(255, 255, 255, 0.05);
399
- border-radius: 4px;
400
- border-left: 3px solid var(--primary-color);
401
- }
402
- .translation-lang {
403
- font-weight: 500;
404
- color: var(--primary-color);
405
- min-width: 80px;
406
- font-size: 13px;
407
- }
408
- .translation-text {
409
- flex: 1;
410
- color: var(--text-color);
411
- font-size: 14px;
412
- }
413
- .interpretation-arrow {
414
- color: #4caf50;
415
- font-weight: bold;
416
- margin: 0 10px;
417
- }
418
  .language-info {
419
  font-size: 12px;
420
  color: #888;
@@ -639,7 +528,7 @@ HTML_CONTENT = """<!DOCTYPE html>
639
  </div>
640
  </div>
641
  <div class="setting-item">
642
- <span class="setting-label">자동 번역</span>
643
  <select id="language-select">
644
  <option value="">비활성화</option>
645
  <option value="ko">한국어 (Korean)</option>
@@ -674,30 +563,6 @@ HTML_CONTENT = """<!DOCTYPE html>
674
  </select>
675
  </div>
676
  </div>
677
- <div class="interpretation-section">
678
- <div class="setting-item">
679
- <span class="setting-label">자동 통역</span>
680
- <div id="interpretation-toggle" class="toggle-switch">
681
- <div class="toggle-slider"></div>
682
- </div>
683
- </div>
684
- <div id="interpretation-languages-container" style="display: none;">
685
- <div class="setting-label" style="margin-bottom: 5px;">통역 언어 선택 (최대 4개)</div>
686
- <div class="language-selection-grid" id="language-selection-grid">
687
- <!-- Languages will be populated by JavaScript -->
688
- </div>
689
- <div class="selected-languages" id="selected-languages-display">
690
- 선택된 언어: 없음
691
- </div>
692
- </div>
693
- </div>
694
- <div class="interpretation-info" id="interpretation-info" style="display: none;">
695
- <strong>통역 모드 안내:</strong><br>
696
- • 음성으로 말하면 선택한 언어들로 자동 통역됩니다<br>
697
- • Whisper + GPT-4o-mini를 사용합니다<br>
698
- • 말을 마치고 잠시 기다리면 통역이 시작됩니다<br>
699
- • 번역된 텍스트만 화면에 표시됩니다
700
- </div>
701
  <div class="text-input-section">
702
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
703
  <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
@@ -730,8 +595,6 @@ HTML_CONTENT = """<!DOCTYPE html>
730
  let webrtc_id;
731
  let webSearchEnabled = false;
732
  let selectedLanguage = "";
733
- let interpretationMode = false;
734
- let interpretationLanguages = [];
735
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
736
  const audioOutput = document.getElementById('audio-output');
737
  const startButton = document.getElementById('start-button');
@@ -741,113 +604,14 @@ HTML_CONTENT = """<!DOCTYPE html>
741
  const statusText = document.getElementById('status-text');
742
  const searchToggle = document.getElementById('search-toggle');
743
  const languageSelect = document.getElementById('language-select');
744
- const interpretationToggle = document.getElementById('interpretation-toggle');
745
- const interpretationLanguagesContainer = document.getElementById('interpretation-languages-container');
746
- const interpretationInfo = document.getElementById('interpretation-info');
747
  const systemPromptInput = document.getElementById('system-prompt');
748
  const textInput = document.getElementById('text-input');
749
- const languageSelectionGrid = document.getElementById('language-selection-grid');
750
- const selectedLanguagesDisplay = document.getElementById('selected-languages-display');
751
  let audioLevel = 0;
752
  let animationFrame;
753
  let audioContext, analyser, audioSource;
754
  let dataChannel = null;
755
  let isVoiceActive = false;
756
 
757
- // Available languages for interpretation
758
- const INTERPRETATION_LANGUAGES = {
759
- "en": { name: "English", default: true },
760
- "zh": { name: "中文 (Chinese)", default: true },
761
- "th": { name: "ไทย (Thai)", default: true },
762
- "ru": { name: "Русский (Russian)", default: true },
763
- "ja": { name: "日本語 (Japanese)", default: false },
764
- "es": { name: "Español (Spanish)", default: false },
765
- "fr": { name: "Français (French)", default: false },
766
- "de": { name: "Deutsch (German)", default: false },
767
- "pt": { name: "Português (Portuguese)", default: false },
768
- "ar": { name: "العربية (Arabic)", default: false },
769
- "hi": { name: "हिन्दी (Hindi)", default: false },
770
- "vi": { name: "Tiếng Việt (Vietnamese)", default: false },
771
- "id": { name: "Bahasa Indonesia", default: false },
772
- "it": { name: "Italiano (Italian)", default: false },
773
- "nl": { name: "Nederlands (Dutch)", default: false },
774
- "pl": { name: "Polski (Polish)", default: false },
775
- "tr": { name: "Türkçe (Turkish)", default: false },
776
- "sv": { name: "Svenska (Swedish)", default: false },
777
- "da": { name: "Dansk (Danish)", default: false },
778
- "no": { name: "Norsk (Norwegian)", default: false },
779
- "fi": { name: "Suomi (Finnish)", default: false },
780
- "he": { name: "עברית (Hebrew)", default: false },
781
- "uk": { name: "Українська (Ukrainian)", default: false },
782
- "cs": { name: "Čeština (Czech)", default: false },
783
- "el": { name: "Ελληνικά (Greek)", default: false },
784
- "ro": { name: "Română (Romanian)", default: false },
785
- "hu": { name: "Magyar (Hungarian)", default: false },
786
- "ms": { name: "Bahasa Melayu (Malay)", default: false }
787
- };
788
-
789
- // Initialize language selection grid
790
- function initializeLanguageSelection() {
791
- languageSelectionGrid.innerHTML = '';
792
-
793
- // Sort languages: defaults first, then alphabetically
794
- const sortedLanguages = Object.entries(INTERPRETATION_LANGUAGES).sort((a, b) => {
795
- if (a[1].default && !b[1].default) return -1;
796
- if (!a[1].default && b[1].default) return 1;
797
- return a[1].name.localeCompare(b[1].name);
798
- });
799
-
800
- sortedLanguages.forEach(([code, lang]) => {
801
- const label = document.createElement('label');
802
- label.className = 'language-checkbox' + (lang.default ? ' default' : '');
803
-
804
- const checkbox = document.createElement('input');
805
- checkbox.type = 'checkbox';
806
- checkbox.value = code;
807
- checkbox.checked = lang.default;
808
- checkbox.addEventListener('change', onLanguageCheckboxChange);
809
-
810
- const text = document.createElement('span');
811
- text.textContent = lang.name;
812
-
813
- label.appendChild(checkbox);
814
- label.appendChild(text);
815
- languageSelectionGrid.appendChild(label);
816
- });
817
-
818
- // Initialize with default languages
819
- updateSelectedLanguages();
820
- }
821
-
822
- function onLanguageCheckboxChange() {
823
- const checkedBoxes = languageSelectionGrid.querySelectorAll('input[type="checkbox"]:checked');
824
-
825
- // Limit to 4 languages
826
- if (checkedBoxes.length > 4) {
827
- this.checked = false;
828
- showError('최대 4개 언어까지 선택할 수 있습니다.');
829
- return;
830
- }
831
-
832
- updateSelectedLanguages();
833
- }
834
-
835
- function updateSelectedLanguages() {
836
- const checkedBoxes = languageSelectionGrid.querySelectorAll('input[type="checkbox"]:checked');
837
- interpretationLanguages = Array.from(checkedBoxes).map(cb => cb.value);
838
-
839
- if (interpretationLanguages.length === 0) {
840
- selectedLanguagesDisplay.textContent = '선택된 언어: 없음';
841
- } else {
842
- const langNames = interpretationLanguages.map(code =>
843
- INTERPRETATION_LANGUAGES[code].name
844
- ).join(', ');
845
- selectedLanguagesDisplay.textContent = `선택된 언어 (${interpretationLanguages.length}/4): ${langNames}`;
846
- }
847
-
848
- console.log('Selected interpretation languages:', interpretationLanguages);
849
- }
850
-
851
  // Web search toggle functionality
852
  searchToggle.addEventListener('click', () => {
853
  webSearchEnabled = !webSearchEnabled;
@@ -861,90 +625,6 @@ HTML_CONTENT = """<!DOCTYPE html>
861
  console.log('Selected language:', selectedLanguage);
862
  });
863
 
864
- // Interpretation mode toggle - 수정된 버전
865
- interpretationToggle.addEventListener('click', async () => {
866
- if (!interpretationMode) {
867
- // Turning ON interpretation mode
868
- interpretationLanguagesContainer.style.display = 'block';
869
- interpretationInfo.style.display = 'block';
870
-
871
- // Check if any languages are selected
872
- if (interpretationLanguages.length === 0) {
873
- showError('통역 언어를 선택해주세요.');
874
- interpretationToggle.classList.remove('active');
875
- return;
876
- }
877
-
878
- // Enable interpretation mode
879
- interpretationMode = true;
880
- interpretationToggle.classList.add('active');
881
-
882
- // Disable other features
883
- languageSelect.value = '';
884
- selectedLanguage = '';
885
- languageSelect.disabled = true;
886
- searchToggle.classList.remove('active');
887
- webSearchEnabled = false;
888
- searchToggle.style.opacity = '0.5';
889
- searchToggle.style.pointerEvents = 'none';
890
- textInput.disabled = true;
891
- textInput.placeholder = '통역 모드에서는 텍스트 입력이 지원되지 않습니다';
892
- sendButton.style.display = 'none';
893
-
894
- console.log('[FRONTEND] Interpretation mode enabled with languages:', interpretationLanguages);
895
-
896
- // If connected, stop and restart with interpretation mode
897
- if (peerConnection && peerConnection.connectionState === 'connected') {
898
- console.log('[FRONTEND] Stopping current connection for interpretation mode');
899
- showError('통역 모드 설정을 적용하기 위해 연결을 다시 시작합니다.');
900
-
901
- // Force stop the connection
902
- stop();
903
-
904
- // Wait a bit longer to ensure cleanup
905
- await new Promise(resolve => setTimeout(resolve, 1000));
906
-
907
- // Start new connection with interpretation mode
908
- console.log('[FRONTEND] Starting new connection with interpretation mode');
909
- setupWebRTC();
910
- }
911
- } else {
912
- // Turning OFF interpretation mode
913
- interpretationMode = false;
914
- interpretationToggle.classList.remove('active');
915
- interpretationLanguagesContainer.style.display = 'none';
916
- interpretationInfo.style.display = 'none';
917
-
918
- // Re-enable other features
919
- languageSelect.disabled = false;
920
- searchToggle.style.opacity = '1';
921
- searchToggle.style.pointerEvents = 'auto';
922
- textInput.disabled = false;
923
- textInput.placeholder = '텍스트 메시지를 입력하세요...';
924
- sendButton.style.display = 'block';
925
-
926
- console.log('[FRONTEND] Interpretation mode disabled');
927
-
928
- // If connected, restart to apply normal mode
929
- if (peerConnection && peerConnection.connectionState === 'connected') {
930
- console.log('[FRONTEND] Stopping current connection for normal mode');
931
- showError('일반 모드로 전환하기 위해 연결을 다시 시작합니다.');
932
-
933
- // Force stop the connection
934
- stop();
935
-
936
- // Wait a bit longer to ensure cleanup
937
- await new Promise(resolve => setTimeout(resolve, 1000));
938
-
939
- // Start new connection with normal mode
940
- console.log('[FRONTEND] Starting new connection with normal mode');
941
- setupWebRTC();
942
- }
943
- }
944
-
945
- console.log('[FRONTEND] Final interpretation mode:', interpretationMode);
946
- });
947
-
948
  // System prompt update
949
  systemPromptInput.addEventListener('input', () => {
950
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
@@ -964,12 +644,6 @@ HTML_CONTENT = """<!DOCTYPE html>
964
  const message = textInput.value.trim();
965
  if (!message) return;
966
 
967
- // Don't allow text messages in interpretation mode
968
- if (interpretationMode) {
969
- showError('통역 모드에서는 텍스트 입력이 지원되지 않습니다.');
970
- return;
971
- }
972
-
973
  // Add user message to chat
974
  addMessage('user', message);
975
  textInput.value = '';
@@ -1023,18 +697,14 @@ HTML_CONTENT = """<!DOCTYPE html>
1023
  statusDot.className = 'status-dot ' + state;
1024
  if (state === 'connected') {
1025
  statusText.textContent = '연결됨';
1026
- if (!interpretationMode) {
1027
- sendButton.style.display = 'block';
1028
- }
1029
  isVoiceActive = true;
1030
  } else if (state === 'connecting') {
1031
  statusText.textContent = '연결 중...';
1032
  sendButton.style.display = 'none';
1033
  } else {
1034
  statusText.textContent = '연결 대기 중';
1035
- if (!interpretationMode) {
1036
- sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
1037
- }
1038
  isVoiceActive = false;
1039
  }
1040
  }
@@ -1180,9 +850,7 @@ HTML_CONTENT = """<!DOCTYPE html>
1180
  webrtc_id: webrtc_id,
1181
  web_search_enabled: webSearchEnabled,
1182
  target_language: selectedLanguage,
1183
- system_prompt: systemPrompt,
1184
- interpretation_mode: interpretationMode,
1185
- interpretation_languages: interpretationLanguages
1186
  });
1187
 
1188
  const response = await fetch('/webrtc/offer', {
@@ -1194,9 +862,7 @@ HTML_CONTENT = """<!DOCTYPE html>
1194
  webrtc_id: webrtc_id,
1195
  web_search_enabled: webSearchEnabled,
1196
  target_language: selectedLanguage,
1197
- system_prompt: systemPrompt,
1198
- interpretation_mode: interpretationMode,
1199
- interpretation_languages: interpretationLanguages
1200
  })
1201
  });
1202
  const serverResponse = await response.json();
@@ -1213,25 +879,10 @@ HTML_CONTENT = """<!DOCTYPE html>
1213
  const eventJson = JSON.parse(event.data);
1214
  let content = eventJson.content;
1215
 
1216
- // Debug logging for interpretation mode
1217
- if (interpretationMode) {
1218
- console.log('[INTERPRETATION OUTPUT]', {
1219
- content: content,
1220
- mode: eventJson.mode,
1221
- translations: eventJson.translations
1222
- });
1223
- }
1224
-
1225
  if (selectedLanguage && eventJson.language) {
1226
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
1227
- } else if (interpretationMode && eventJson.mode === 'interpretation') {
1228
- // Handle interpretation mode output
1229
- if (eventJson.original && eventJson.translations) {
1230
- addInterpretationResult(eventJson.original, eventJson.translations);
1231
- }
1232
- } else {
1233
- addMessage("assistant", content);
1234
  }
 
1235
  });
1236
  eventSource.addEventListener("search", (event) => {
1237
  const eventJson = JSON.parse(event.data);
@@ -1259,44 +910,6 @@ HTML_CONTENT = """<!DOCTYPE html>
1259
  chatMessages.scrollTop = chatMessages.scrollHeight;
1260
  }
1261
 
1262
- function addInterpretationResult(original, translations) {
1263
- const resultDiv = document.createElement('div');
1264
- resultDiv.className = 'interpretation-result';
1265
-
1266
- // Header
1267
- const headerDiv = document.createElement('div');
1268
- headerDiv.className = 'interpretation-header';
1269
- headerDiv.innerHTML = `
1270
- <span>🌐</span>
1271
- <span>자동 통역 결과</span>
1272
- `;
1273
- resultDiv.appendChild(headerDiv);
1274
-
1275
- // Original text
1276
- const originalDiv = document.createElement('div');
1277
- originalDiv.className = 'interpretation-original';
1278
- originalDiv.innerHTML = `<strong>원문:</strong> ${original}`;
1279
- resultDiv.appendChild(originalDiv);
1280
-
1281
- // Translations
1282
- const translationsDiv = document.createElement('div');
1283
- translationsDiv.className = 'interpretation-translations';
1284
-
1285
- translations.forEach(trans => {
1286
- const transItem = document.createElement('div');
1287
- transItem.className = 'translation-item';
1288
- transItem.innerHTML = `
1289
- <span class="translation-lang">${trans.language}:</span>
1290
- <span class="translation-text">${trans.text}</span>
1291
- `;
1292
- translationsDiv.appendChild(transItem);
1293
- });
1294
-
1295
- resultDiv.appendChild(translationsDiv);
1296
- chatMessages.appendChild(resultDiv);
1297
- chatMessages.scrollTop = chatMessages.scrollHeight;
1298
- }
1299
-
1300
  function stop() {
1301
  console.log('[STOP] Stopping connection...');
1302
 
@@ -1387,7 +1000,6 @@ HTML_CONTENT = """<!DOCTYPE html>
1387
  // Initialize on page load
1388
  window.addEventListener('DOMContentLoaded', () => {
1389
  sendButton.style.display = 'block';
1390
- initializeLanguageSelection();
1391
  });
1392
  </script>
1393
  </body>
@@ -1566,8 +1178,7 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1566
 
1567
  class OpenAIHandler(AsyncStreamHandler):
1568
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1569
- system_prompt: str = "", webrtc_id: str = None,
1570
- interpretation_mode: bool = False, interpretation_languages: List[str] = None) -> None:
1571
  super().__init__(
1572
  expected_layout="mono",
1573
  output_sample_rate=SAMPLE_RATE,
@@ -1584,18 +1195,9 @@ class OpenAIHandler(AsyncStreamHandler):
1584
  self.web_search_enabled = web_search_enabled
1585
  self.target_language = target_language
1586
  self.system_prompt = system_prompt
1587
- self.interpretation_mode = interpretation_mode
1588
- self.interpretation_languages = interpretation_languages or []
1589
-
1590
- # For interpretation mode
1591
- self.audio_buffer = []
1592
- self.is_recording = False
1593
- self.silence_frames = 0
1594
- self.silence_threshold = 20 # Reduced for faster response (20 frames = ~0.4 seconds)
1595
- self.min_audio_length = 10 # Minimum frames to consider as speech
1596
 
1597
- print(f"[INIT] Handler created with interpretation_mode={interpretation_mode}, "
1598
- f"interpretation_languages={interpretation_languages}")
1599
 
1600
  def copy(self):
1601
  # Get the most recent settings
@@ -1610,20 +1212,16 @@ class OpenAIHandler(AsyncStreamHandler):
1610
 
1611
  # Log the settings being copied
1612
  print(f"[COPY] Copying settings from {recent_id}:")
1613
- print(f"[COPY] interpretation_mode={settings.get('interpretation_mode', False)}")
1614
- print(f"[COPY] interpretation_languages={settings.get('interpretation_languages', [])}")
1615
 
1616
  return OpenAIHandler(
1617
  web_search_enabled=settings.get('web_search_enabled', False),
1618
  target_language=settings.get('target_language', ''),
1619
  system_prompt=settings.get('system_prompt', ''),
1620
- webrtc_id=recent_id,
1621
- interpretation_mode=settings.get('interpretation_mode', False),
1622
- interpretation_languages=settings.get('interpretation_languages', [])
1623
  )
1624
 
1625
  print(f"[COPY] No settings found, creating default handler")
1626
- return OpenAIHandler(web_search_enabled=False, interpretation_mode=False)
1627
 
1628
  async def search_web(self, query: str) -> str:
1629
  """Perform web search and return formatted results"""
@@ -1658,173 +1256,9 @@ class OpenAIHandler(AsyncStreamHandler):
1658
  )
1659
  await self.connection.response.create()
1660
 
1661
- async def _interpretation_loop(self):
1662
- """Keep the handler alive for interpretation mode"""
1663
- while self.interpretation_mode:
1664
- await asyncio.sleep(0.1)
1665
-
1666
- async def process_interpretation(self):
1667
- """Process audio buffer for interpretation - text only output"""
1668
- if not self.audio_buffer or not self.interpretation_languages:
1669
- return
1670
-
1671
- try:
1672
- print(f"[INTERPRETATION] Processing audio buffer with {len(self.audio_buffer)} frames")
1673
-
1674
- # Convert audio buffer to WAV format
1675
- audio_data = np.concatenate(self.audio_buffer)
1676
-
1677
- # Create WAV file in memory
1678
- wav_buffer = io.BytesIO()
1679
- with wave.open(wav_buffer, 'wb') as wav_file:
1680
- wav_file.setnchannels(1) # Mono
1681
- wav_file.setsampwidth(2) # 16-bit
1682
- wav_file.setframerate(SAMPLE_RATE)
1683
- wav_file.writeframes(audio_data.tobytes())
1684
-
1685
- wav_buffer.seek(0)
1686
- wav_buffer.name = "audio.wav"
1687
-
1688
- # 1. Transcribe with Whisper
1689
- print("[INTERPRETATION] Transcribing with Whisper...")
1690
- transcript = await self.client.audio.transcriptions.create(
1691
- model="whisper-1",
1692
- file=wav_buffer,
1693
- response_format="text" # Get only text, no timestamps
1694
- )
1695
-
1696
- user_text = transcript.text.strip()
1697
- print(f"[INTERPRETATION] Transcribed: {user_text}")
1698
-
1699
- if not user_text:
1700
- return
1701
-
1702
- # 2. Translate to all selected languages
1703
- translations = []
1704
-
1705
- for lang_code in self.interpretation_languages:
1706
- target_lang_name = SUPPORTED_LANGUAGES.get(lang_code, lang_code)
1707
-
1708
- # Create very explicit translation prompt
1709
- translation_examples = {
1710
- "en": {
1711
- "안녕하세요": "Hello",
1712
- "감사합니다": "Thank you",
1713
- "오늘 날씨가 좋네요": "The weather is nice today"
1714
- },
1715
- "ja": {
1716
- "안녕하세요": "こんにちは",
1717
- "감사합니다": "ありがとうございます",
1718
- "오늘 날씨가 좋네요": "今日はいい天気ですね"
1719
- },
1720
- "zh": {
1721
- "안녕하세요": "你好",
1722
- "감사합니다": "谢谢",
1723
- "오늘 날씨가 좋네요": "今天天气很好"
1724
- },
1725
- "es": {
1726
- "안녕하세요": "Hola",
1727
- "감사합니다": "Gracias",
1728
- "오늘 날씨가 좋네요": "El clima está agradable hoy"
1729
- },
1730
- "ru": {
1731
- "안녕하세요": "Привет",
1732
- "감사합니다": "Спасибо",
1733
- "오늘 날씨가 좋네요": "Сегодня хорошая погода"
1734
- },
1735
- "th": {
1736
- "안녕하세요": "สวัสดี",
1737
- "감사합니다": "ขอบคุณ",
1738
- "오늘 날씨가 좋네요": "วันนี้อากาศดี"
1739
- }
1740
- }
1741
-
1742
- examples = translation_examples.get(lang_code, translation_examples.get("en", {}))
1743
- examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
1744
-
1745
- system_prompt = f"""You are a direct translator from Korean to {target_lang_name}.
1746
-
1747
- CRITICAL RULES:
1748
- 1. TRANSLATE ONLY - Do not answer questions
1749
- 2. Do not generate responses
1750
- 3. Do not add any commentary
1751
- 4. Output ONLY the direct translation
1752
- 5. Preserve the original meaning exactly
1753
-
1754
- If the user says "오늘 날씨 어때?" translate it as "How's the weather today?" NOT answer about the weather.
1755
- If the user says "안녕하세요" translate it as "Hello" NOT respond with greetings.
1756
-
1757
- Examples:
1758
- {examples_text}
1759
-
1760
- Direct translation to {target_lang_name}:"""
1761
-
1762
- print(f"[INTERPRETATION] Translating to {target_lang_name}...")
1763
-
1764
- translation_response = await self.client.chat.completions.create(
1765
- model="gpt-4o-mini",
1766
- messages=[
1767
- {
1768
- "role": "system",
1769
- "content": system_prompt
1770
- },
1771
- {
1772
- "role": "user",
1773
- "content": user_text # Just the text, no additional prompt
1774
- }
1775
- ],
1776
- temperature=0.1,
1777
- max_tokens=200
1778
- )
1779
-
1780
- translated_text = translation_response.choices[0].message.content.strip()
1781
-
1782
- # Remove any Korean characters if they accidentally appear
1783
- import re
1784
- if re.search(r'[가-힣]', translated_text):
1785
- print(f"[INTERPRETATION] WARNING: Korean characters detected in {target_lang_name} translation")
1786
- translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
1787
-
1788
- translations.append({
1789
- "language": target_lang_name,
1790
- "text": translated_text
1791
- })
1792
-
1793
- print(f"[INTERPRETATION] {target_lang_name}: {translated_text}")
1794
-
1795
- # Send interpretation result (text only)
1796
- output_data = {
1797
- "event": type('Event', (), {
1798
- 'transcript': f"통역 완료" # Simple notification
1799
- })(),
1800
- "mode": "interpretation",
1801
- "original": user_text,
1802
- "translations": translations
1803
- }
1804
- await self.output_queue.put(AdditionalOutputs(output_data))
1805
-
1806
- except Exception as e:
1807
- print(f"[INTERPRETATION] Error: {e}")
1808
- import traceback
1809
- traceback.print_exc()
1810
-
1811
- # Send error message to client
1812
- error_data = {
1813
- "event": type('Event', (), {
1814
- 'transcript': f"통역 오류: {str(e)}"
1815
- })(),
1816
- "mode": "error"
1817
- }
1818
- await self.output_queue.put(AdditionalOutputs(error_data))
1819
- finally:
1820
- # Clear the audio buffer
1821
- self.audio_buffer = []
1822
- self.is_recording = False
1823
- self.silence_frames = 0
1824
-
1825
  def get_translation_instructions(self):
1826
  """Get instructions for translation based on target language"""
1827
- if not self.target_language or self.interpretation_mode:
1828
  return ""
1829
 
1830
  language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
@@ -1835,7 +1269,7 @@ Direct translation to {target_lang_name}:"""
1835
  )
1836
 
1837
  async def start_up(self):
1838
- """Connect to realtime API or setup interpretation mode"""
1839
  # First check if we have the most recent settings
1840
  if connection_settings and self.webrtc_id:
1841
  if self.webrtc_id in connection_settings:
@@ -1843,27 +1277,13 @@ Direct translation to {target_lang_name}:"""
1843
  self.web_search_enabled = settings.get('web_search_enabled', False)
1844
  self.target_language = settings.get('target_language', '')
1845
  self.system_prompt = settings.get('system_prompt', '')
1846
- self.interpretation_mode = settings.get('interpretation_mode', False)
1847
- self.interpretation_languages = settings.get('interpretation_languages', [])
1848
 
1849
  print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
1850
- print(f"[START_UP] interpretation_mode={self.interpretation_mode}")
1851
- print(f"[START_UP] interpretation_languages={self.interpretation_languages}")
1852
 
1853
- print(f"[START_UP] Starting with interpretation_mode={self.interpretation_mode}")
1854
 
1855
  self.client = openai.AsyncOpenAI()
1856
 
1857
- # If in interpretation mode, don't connect to Realtime API
1858
- if self.interpretation_mode:
1859
- print(f"[INTERPRETATION MODE] Active - Skipping Realtime API connection")
1860
- print(f"[INTERPRETATION MODE] Using Whisper + GPT-4o-mini (text only)")
1861
- print(f"[INTERPRETATION MODE] Target languages: {self.interpretation_languages}")
1862
-
1863
- # Do NOT connect to Realtime API
1864
- # Just keep the handler ready to process audio
1865
- return
1866
-
1867
  # Normal mode - connect to Realtime API
1868
  print(f"[NORMAL MODE] Connecting to Realtime API...")
1869
 
@@ -2080,18 +1500,18 @@ RULES:
2080
  ),
2081
  )
2082
 
2083
- # Handle function calls (only in non-interpretation mode)
2084
- elif event.type == "response.function_call_arguments.start" and not self.interpretation_mode:
2085
  print(f"Function call started")
2086
  self.function_call_in_progress = True
2087
  self.current_function_args = ""
2088
  self.current_call_id = getattr(event, 'call_id', None)
2089
 
2090
- elif event.type == "response.function_call_arguments.delta" and not self.interpretation_mode:
2091
  if self.function_call_in_progress:
2092
  self.current_function_args += event.delta
2093
 
2094
- elif event.type == "response.function_call_arguments.done" and not self.interpretation_mode:
2095
  if self.function_call_in_progress:
2096
  print(f"Function call done, args: {self.current_function_args}")
2097
  try:
@@ -2127,82 +1547,41 @@ RULES:
2127
  self.current_call_id = None
2128
 
2129
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
2130
- if self.interpretation_mode:
2131
- print(f"[RECEIVE] Processing in interpretation mode")
2132
- # In interpretation mode, buffer audio and process with Whisper
 
 
2133
  _, array = frame
2134
  array = array.squeeze()
2135
-
2136
- # Simple voice activity detection
2137
- audio_level = np.abs(array).mean()
2138
-
2139
- if audio_level > 200: # Lower threshold for better detection
2140
- if not self.is_recording:
2141
- print(f"[INTERPRETATION] Started recording, level: {audio_level:.1f}")
2142
- self.is_recording = True
2143
- self.silence_frames = 0
2144
- self.audio_buffer.append(array)
2145
- elif self.is_recording:
2146
- self.silence_frames += 1
2147
- self.audio_buffer.append(array)
2148
-
2149
- # If we've had enough silence, process the audio
2150
- if self.silence_frames > self.silence_threshold and len(self.audio_buffer) > self.min_audio_length:
2151
- print(f"[INTERPRETATION] Silence detected after {len(self.audio_buffer)} frames")
2152
- # Process in the background to avoid blocking
2153
- asyncio.create_task(self.process_interpretation())
2154
- else:
2155
- # Normal mode - use Realtime API
2156
- if not self.connection:
2157
- print(f"[RECEIVE] No connection in normal mode, skipping")
2158
- return
2159
- try:
2160
- _, array = frame
2161
- array = array.squeeze()
2162
- audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
2163
- await self.connection.input_audio_buffer.append(audio=audio_message)
2164
- except Exception as e:
2165
- print(f"Error in receive: {e}")
2166
 
2167
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
2168
- # In interpretation mode, we need to keep checking for outputs
2169
- if self.interpretation_mode:
2170
- # Use a short timeout to prevent blocking
2171
- try:
2172
- item = await asyncio.wait_for(wait_for_item(self.output_queue), timeout=0.05)
2173
- return item
2174
- except asyncio.TimeoutError:
2175
- # Return None to keep the stream alive
2176
- return None
2177
- else:
2178
- # Normal mode
2179
- item = await wait_for_item(self.output_queue)
2180
-
2181
- # Check if it's a dict with text message
2182
- if isinstance(item, dict) and item.get('type') == 'text_message':
2183
- await self.process_text_message(item['content'])
2184
- return None
2185
-
2186
- return item
2187
 
2188
  async def shutdown(self) -> None:
2189
- print(f"[SHUTDOWN] Called with interpretation_mode={self.interpretation_mode}")
2190
 
2191
- if self.interpretation_mode:
2192
- # Clean up interpretation mode
2193
- self.audio_buffer = []
2194
- self.is_recording = False
2195
- print("[INTERPRETATION MODE] Shutdown complete")
2196
- else:
2197
- # Normal mode - close Realtime API connection
2198
- if self.connection:
2199
- await self.connection.close()
2200
- self.connection = None
2201
- print("[NORMAL MODE] Connection closed")
2202
 
2203
 
2204
  # Create initial handler instance
2205
- handler = OpenAIHandler(web_search_enabled=False, interpretation_mode=False)
2206
 
2207
  # Create components
2208
  chatbot = gr.Chatbot(type="messages")
@@ -2235,12 +1614,8 @@ async def custom_offer(request: Request):
2235
  web_search_enabled = body.get("web_search_enabled", False)
2236
  target_language = body.get("target_language", "")
2237
  system_prompt = body.get("system_prompt", "")
2238
- interpretation_mode = body.get("interpretation_mode", False)
2239
- interpretation_languages = body.get("interpretation_languages", [])
2240
 
2241
  print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
2242
- print(f"[OFFER] interpretation_mode: {interpretation_mode}")
2243
- print(f"[OFFER] interpretation_languages: {interpretation_languages}")
2244
  print(f"[OFFER] web_search_enabled: {web_search_enabled}")
2245
  print(f"[OFFER] target_language: {target_language}")
2246
 
@@ -2250,8 +1625,6 @@ async def custom_offer(request: Request):
2250
  'web_search_enabled': web_search_enabled,
2251
  'target_language': target_language,
2252
  'system_prompt': system_prompt,
2253
- 'interpretation_mode': interpretation_mode,
2254
- 'interpretation_languages': interpretation_languages,
2255
  'timestamp': asyncio.get_event_loop().time()
2256
  }
2257
 
@@ -2335,10 +1708,7 @@ async def outputs(webrtc_id: str):
2335
  data = {
2336
  "role": "assistant",
2337
  "content": event_data['event'].transcript,
2338
- "language": event_data.get('language', ''),
2339
- "mode": event_data.get('mode', 'normal'),
2340
- "original": event_data.get('original', ''),
2341
- "translations": event_data.get('translations', [])
2342
  }
2343
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
2344
 
 
162
  gap: 15px;
163
  margin-bottom: 15px;
164
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  .setting-item {
166
  display: flex;
167
  align-items: center;
 
238
  resize: vertical;
239
  min-height: 80px;
240
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  .chat-container {
242
  border-radius: 12px;
243
  background-color: var(--card-bg);
 
304
  padding: 10px;
305
  margin-bottom: 10px;
306
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  .language-info {
308
  font-size: 12px;
309
  color: #888;
 
528
  </div>
529
  </div>
530
  <div class="setting-item">
531
+ <span class="setting-label">다국어 번역 채팅</span>
532
  <select id="language-select">
533
  <option value="">비활성화</option>
534
  <option value="ko">한국어 (Korean)</option>
 
563
  </select>
564
  </div>
565
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
  <div class="text-input-section">
567
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
568
  <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
 
595
  let webrtc_id;
596
  let webSearchEnabled = false;
597
  let selectedLanguage = "";
 
 
598
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
599
  const audioOutput = document.getElementById('audio-output');
600
  const startButton = document.getElementById('start-button');
 
604
  const statusText = document.getElementById('status-text');
605
  const searchToggle = document.getElementById('search-toggle');
606
  const languageSelect = document.getElementById('language-select');
 
 
 
607
  const systemPromptInput = document.getElementById('system-prompt');
608
  const textInput = document.getElementById('text-input');
 
 
609
  let audioLevel = 0;
610
  let animationFrame;
611
  let audioContext, analyser, audioSource;
612
  let dataChannel = null;
613
  let isVoiceActive = false;
614
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  // Web search toggle functionality
616
  searchToggle.addEventListener('click', () => {
617
  webSearchEnabled = !webSearchEnabled;
 
625
  console.log('Selected language:', selectedLanguage);
626
  });
627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628
  // System prompt update
629
  systemPromptInput.addEventListener('input', () => {
630
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
 
644
  const message = textInput.value.trim();
645
  if (!message) return;
646
 
 
 
 
 
 
 
647
  // Add user message to chat
648
  addMessage('user', message);
649
  textInput.value = '';
 
697
  statusDot.className = 'status-dot ' + state;
698
  if (state === 'connected') {
699
  statusText.textContent = '연결됨';
700
+ sendButton.style.display = 'block';
 
 
701
  isVoiceActive = true;
702
  } else if (state === 'connecting') {
703
  statusText.textContent = '연결 중...';
704
  sendButton.style.display = 'none';
705
  } else {
706
  statusText.textContent = '연결 대기 중';
707
+ sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
 
 
708
  isVoiceActive = false;
709
  }
710
  }
 
850
  webrtc_id: webrtc_id,
851
  web_search_enabled: webSearchEnabled,
852
  target_language: selectedLanguage,
853
+ system_prompt: systemPrompt
 
 
854
  });
855
 
856
  const response = await fetch('/webrtc/offer', {
 
862
  webrtc_id: webrtc_id,
863
  web_search_enabled: webSearchEnabled,
864
  target_language: selectedLanguage,
865
+ system_prompt: systemPrompt
 
 
866
  })
867
  });
868
  const serverResponse = await response.json();
 
879
  const eventJson = JSON.parse(event.data);
880
  let content = eventJson.content;
881
 
 
 
 
 
 
 
 
 
 
882
  if (selectedLanguage && eventJson.language) {
883
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
 
 
 
 
 
 
 
884
  }
885
+ addMessage("assistant", content);
886
  });
887
  eventSource.addEventListener("search", (event) => {
888
  const eventJson = JSON.parse(event.data);
 
910
  chatMessages.scrollTop = chatMessages.scrollHeight;
911
  }
912
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
913
  function stop() {
914
  console.log('[STOP] Stopping connection...');
915
 
 
1000
  // Initialize on page load
1001
  window.addEventListener('DOMContentLoaded', () => {
1002
  sendButton.style.display = 'block';
 
1003
  });
1004
  </script>
1005
  </body>
 
1178
 
1179
  class OpenAIHandler(AsyncStreamHandler):
1180
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1181
+ system_prompt: str = "", webrtc_id: str = None) -> None:
 
1182
  super().__init__(
1183
  expected_layout="mono",
1184
  output_sample_rate=SAMPLE_RATE,
 
1195
  self.web_search_enabled = web_search_enabled
1196
  self.target_language = target_language
1197
  self.system_prompt = system_prompt
 
 
 
 
 
 
 
 
 
1198
 
1199
+ print(f"[INIT] Handler created with web_search={web_search_enabled}, "
1200
+ f"target_language={target_language}")
1201
 
1202
  def copy(self):
1203
  # Get the most recent settings
 
1212
 
1213
  # Log the settings being copied
1214
  print(f"[COPY] Copying settings from {recent_id}:")
 
 
1215
 
1216
  return OpenAIHandler(
1217
  web_search_enabled=settings.get('web_search_enabled', False),
1218
  target_language=settings.get('target_language', ''),
1219
  system_prompt=settings.get('system_prompt', ''),
1220
+ webrtc_id=recent_id
 
 
1221
  )
1222
 
1223
  print(f"[COPY] No settings found, creating default handler")
1224
+ return OpenAIHandler(web_search_enabled=False)
1225
 
1226
  async def search_web(self, query: str) -> str:
1227
  """Perform web search and return formatted results"""
 
1256
  )
1257
  await self.connection.response.create()
1258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1259
  def get_translation_instructions(self):
1260
  """Get instructions for translation based on target language"""
1261
+ if not self.target_language:
1262
  return ""
1263
 
1264
  language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
 
1269
  )
1270
 
1271
  async def start_up(self):
1272
+ """Connect to realtime API"""
1273
  # First check if we have the most recent settings
1274
  if connection_settings and self.webrtc_id:
1275
  if self.webrtc_id in connection_settings:
 
1277
  self.web_search_enabled = settings.get('web_search_enabled', False)
1278
  self.target_language = settings.get('target_language', '')
1279
  self.system_prompt = settings.get('system_prompt', '')
 
 
1280
 
1281
  print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
 
 
1282
 
1283
+ print(f"[START_UP] Starting normal mode")
1284
 
1285
  self.client = openai.AsyncOpenAI()
1286
 
 
 
 
 
 
 
 
 
 
 
1287
  # Normal mode - connect to Realtime API
1288
  print(f"[NORMAL MODE] Connecting to Realtime API...")
1289
 
 
1500
  ),
1501
  )
1502
 
1503
+ # Handle function calls
1504
+ elif event.type == "response.function_call_arguments.start":
1505
  print(f"Function call started")
1506
  self.function_call_in_progress = True
1507
  self.current_function_args = ""
1508
  self.current_call_id = getattr(event, 'call_id', None)
1509
 
1510
+ elif event.type == "response.function_call_arguments.delta":
1511
  if self.function_call_in_progress:
1512
  self.current_function_args += event.delta
1513
 
1514
+ elif event.type == "response.function_call_arguments.done":
1515
  if self.function_call_in_progress:
1516
  print(f"Function call done, args: {self.current_function_args}")
1517
  try:
 
1547
  self.current_call_id = None
1548
 
1549
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
1550
+ # Normal mode - use Realtime API
1551
+ if not self.connection:
1552
+ print(f"[RECEIVE] No connection in normal mode, skipping")
1553
+ return
1554
+ try:
1555
  _, array = frame
1556
  array = array.squeeze()
1557
+ audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
1558
+ await self.connection.input_audio_buffer.append(audio=audio_message)
1559
+ except Exception as e:
1560
+ print(f"Error in receive: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1561
 
1562
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
1563
+ # Normal mode
1564
+ item = await wait_for_item(self.output_queue)
1565
+
1566
+ # Check if it's a dict with text message
1567
+ if isinstance(item, dict) and item.get('type') == 'text_message':
1568
+ await self.process_text_message(item['content'])
1569
+ return None
1570
+
1571
+ return item
 
 
 
 
 
 
 
 
 
 
1572
 
1573
  async def shutdown(self) -> None:
1574
+ print(f"[SHUTDOWN] Called")
1575
 
1576
+ # Normal mode - close Realtime API connection
1577
+ if self.connection:
1578
+ await self.connection.close()
1579
+ self.connection = None
1580
+ print("[NORMAL MODE] Connection closed")
 
 
 
 
 
 
1581
 
1582
 
1583
  # Create initial handler instance
1584
+ handler = OpenAIHandler(web_search_enabled=False)
1585
 
1586
  # Create components
1587
  chatbot = gr.Chatbot(type="messages")
 
1614
  web_search_enabled = body.get("web_search_enabled", False)
1615
  target_language = body.get("target_language", "")
1616
  system_prompt = body.get("system_prompt", "")
 
 
1617
 
1618
  print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
 
 
1619
  print(f"[OFFER] web_search_enabled: {web_search_enabled}")
1620
  print(f"[OFFER] target_language: {target_language}")
1621
 
 
1625
  'web_search_enabled': web_search_enabled,
1626
  'target_language': target_language,
1627
  'system_prompt': system_prompt,
 
 
1628
  'timestamp': asyncio.get_event_loop().time()
1629
  }
1630
 
 
1708
  data = {
1709
  "role": "assistant",
1710
  "content": event_data['event'].transcript,
1711
+ "language": event_data.get('language', '')
 
 
 
1712
  }
1713
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
1714