seawolf2357 commited on
Commit
f324e82
·
verified ·
1 Parent(s): c88aa2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +195 -50
app.py CHANGED
@@ -134,6 +134,20 @@ HTML_CONTENT = """<!DOCTYPE html>
134
  gap: 15px;
135
  margin-bottom: 15px;
136
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  .setting-item {
138
  display: flex;
139
  align-items: center;
@@ -507,6 +521,52 @@ HTML_CONTENT = """<!DOCTYPE html>
507
  </select>
508
  </div>
509
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
  <div class="text-input-section">
511
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
512
  <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
@@ -531,6 +591,8 @@ HTML_CONTENT = """<!DOCTYPE html>
531
  let webrtc_id;
532
  let webSearchEnabled = false;
533
  let selectedLanguage = "";
 
 
534
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
535
  const audioOutput = document.getElementById('audio-output');
536
  const startButton = document.getElementById('start-button');
@@ -540,6 +602,10 @@ HTML_CONTENT = """<!DOCTYPE html>
540
  const statusText = document.getElementById('status-text');
541
  const searchToggle = document.getElementById('search-toggle');
542
  const languageSelect = document.getElementById('language-select');
 
 
 
 
543
  const systemPromptInput = document.getElementById('system-prompt');
544
  const textInput = document.getElementById('text-input');
545
  let audioLevel = 0;
@@ -561,6 +627,37 @@ HTML_CONTENT = """<!DOCTYPE html>
561
  console.log('Selected language:', selectedLanguage);
562
  });
563
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  // System prompt update
565
  systemPromptInput.addEventListener('input', () => {
566
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
@@ -580,6 +677,12 @@ HTML_CONTENT = """<!DOCTYPE html>
580
  const message = textInput.value.trim();
581
  if (!message) return;
582
 
 
 
 
 
 
 
583
  // Add user message to chat
584
  addMessage('user', message);
585
  textInput.value = '';
@@ -789,7 +892,9 @@ HTML_CONTENT = """<!DOCTYPE html>
789
  webrtc_id: webrtc_id,
790
  web_search_enabled: webSearchEnabled,
791
  target_language: selectedLanguage,
792
- system_prompt: systemPrompt
 
 
793
  })
794
  });
795
  const serverResponse = await response.json();
@@ -1005,7 +1110,8 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1005
 
1006
  class OpenAIHandler(AsyncStreamHandler):
1007
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1008
- system_prompt: str = "", webrtc_id: str = None) -> None:
 
1009
  super().__init__(
1010
  expected_layout="mono",
1011
  output_sample_rate=SAMPLE_RATE,
@@ -1022,8 +1128,11 @@ class OpenAIHandler(AsyncStreamHandler):
1022
  self.web_search_enabled = web_search_enabled
1023
  self.target_language = target_language
1024
  self.system_prompt = system_prompt
 
 
1025
  print(f"Handler created with web_search_enabled={web_search_enabled}, "
1026
- f"target_language={target_language}, webrtc_id={webrtc_id}")
 
1027
 
1028
  def copy(self):
1029
  # Get the most recent settings
@@ -1039,7 +1148,9 @@ class OpenAIHandler(AsyncStreamHandler):
1039
  web_search_enabled=settings.get('web_search_enabled', False),
1040
  target_language=settings.get('target_language', ''),
1041
  system_prompt=settings.get('system_prompt', ''),
1042
- webrtc_id=recent_id
 
 
1043
  )
1044
 
1045
  print(f"Handler.copy() called - creating new handler with default settings")
@@ -1078,9 +1189,23 @@ class OpenAIHandler(AsyncStreamHandler):
1078
  )
1079
  await self.connection.response.create()
1080
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1081
  def get_translation_instructions(self):
1082
  """Get instructions for translation based on target language"""
1083
- if not self.target_language:
1084
  return ""
1085
 
1086
  language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
@@ -1103,57 +1228,68 @@ class OpenAIHandler(AsyncStreamHandler):
1103
  self.web_search_enabled = settings.get('web_search_enabled', False)
1104
  self.target_language = settings.get('target_language', '')
1105
  self.system_prompt = settings.get('system_prompt', '')
 
 
1106
  self.webrtc_id = recent_id
1107
  print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
1108
- f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}")
 
1109
 
1110
  print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
1111
- f"target_language={self.target_language}")
1112
  self.client = openai.AsyncOpenAI()
1113
 
1114
  # Define the web search function
1115
  tools = []
1116
  base_instructions = self.system_prompt or "You are a helpful assistant."
1117
 
1118
- # Add translation instructions if language is selected
1119
- translation_instructions = self.get_translation_instructions()
1120
-
1121
- if self.web_search_enabled and self.search_client:
1122
- tools = [{
1123
- "type": "function",
1124
- "function": {
1125
- "name": "web_search",
1126
- "description": "Search the web for current information. Use this for weather, news, prices, current events, or any time-sensitive topics.",
1127
- "parameters": {
1128
- "type": "object",
1129
- "properties": {
1130
- "query": {
1131
- "type": "string",
1132
- "description": "The search query"
1133
- }
1134
- },
1135
- "required": ["query"]
1136
- }
1137
- }
1138
- }]
1139
- print("Web search function added to tools")
1140
-
1141
- search_instructions = (
1142
- "\n\nYou have web search capabilities. "
1143
- "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
1144
- "- Weather (날씨, 기온, 비, 눈)\n"
1145
- "- News (뉴스, 소식)\n"
1146
- "- Current events (현재, 최근, 오늘, 지금)\n"
1147
- "- Prices (가격, 환율, 주가)\n"
1148
- "- Sports scores or results\n"
1149
- "- Any question about 2024 or 2025\n"
1150
- "- Any time-sensitive information\n\n"
1151
- "When in doubt, USE web_search. It's better to search and provide accurate information "
1152
- "than to guess or use outdated information."
1153
- )
1154
- instructions = base_instructions + search_instructions + translation_instructions
1155
  else:
1156
- instructions = base_instructions + translation_instructions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1157
 
1158
  async with self.client.beta.realtime.connect(
1159
  model="gpt-4o-mini-realtime-preview-2024-12-17"
@@ -1166,8 +1302,9 @@ class OpenAIHandler(AsyncStreamHandler):
1166
  "tool_choice": "auto" if tools else "none"
1167
  }
1168
 
1169
- # Add voice setting if target language is selected
1170
- if self.target_language:
 
1171
  # Map languages to appropriate voices
1172
  voice_map = {
1173
  "en": "alloy",
@@ -1178,7 +1315,7 @@ class OpenAIHandler(AsyncStreamHandler):
1178
  "zh": "nova",
1179
  # Default to alloy for other languages
1180
  }
1181
- session_update["voice"] = voice_map.get(self.target_language, "alloy")
1182
 
1183
  await conn.session.update(session=session_update)
1184
  self.connection = conn
@@ -1192,7 +1329,10 @@ class OpenAIHandler(AsyncStreamHandler):
1192
  if event.type == "response.audio_transcript.done":
1193
  output_data = {
1194
  "event": event,
1195
- "language": SUPPORTED_LANGUAGES.get(self.target_language, "") if self.target_language else ""
 
 
 
1196
  }
1197
  await self.output_queue.put(AdditionalOutputs(output_data))
1198
 
@@ -1314,9 +1454,12 @@ async def custom_offer(request: Request):
1314
  web_search_enabled = body.get("web_search_enabled", False)
1315
  target_language = body.get("target_language", "")
1316
  system_prompt = body.get("system_prompt", "")
 
 
1317
 
1318
  print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
1319
- f"target_language: {target_language}")
 
1320
 
1321
  # Store settings with timestamp
1322
  if webrtc_id:
@@ -1324,6 +1467,8 @@ async def custom_offer(request: Request):
1324
  'web_search_enabled': web_search_enabled,
1325
  'target_language': target_language,
1326
  'system_prompt': system_prompt,
 
 
1327
  'timestamp': asyncio.get_event_loop().time()
1328
  }
1329
 
 
134
  gap: 15px;
135
  margin-bottom: 15px;
136
  }
137
+ .interpretation-section {
138
+ display: flex;
139
+ align-items: center;
140
+ gap: 15px;
141
+ padding: 15px;
142
+ background-color: var(--dark-bg);
143
+ border-radius: 8px;
144
+ margin-top: 15px;
145
+ }
146
+ .interpretation-info {
147
+ font-size: 13px;
148
+ color: #999;
149
+ margin-top: 5px;
150
+ }
151
  .setting-item {
152
  display: flex;
153
  align-items: center;
 
521
  </select>
522
  </div>
523
  </div>
524
+ <div class="interpretation-section">
525
+ <div class="setting-item">
526
+ <span class="setting-label">자동 통역</span>
527
+ <div id="interpretation-toggle" class="toggle-switch">
528
+ <div class="toggle-slider"></div>
529
+ </div>
530
+ </div>
531
+ <div class="setting-item" id="interpretation-language-container" style="display: none;">
532
+ <span class="setting-label">통역 언어</span>
533
+ <select id="interpretation-language-select">
534
+ <option value="">언어 선택</option>
535
+ <option value="ko">한국어 (Korean)</option>
536
+ <option value="en">English</option>
537
+ <option value="es">Español (Spanish)</option>
538
+ <option value="fr">Français (French)</option>
539
+ <option value="de">Deutsch (German)</option>
540
+ <option value="it">Italiano (Italian)</option>
541
+ <option value="pt">Português (Portuguese)</option>
542
+ <option value="ru">Русский (Russian)</option>
543
+ <option value="ja">日本語 (Japanese)</option>
544
+ <option value="zh">中文 (Chinese)</option>
545
+ <option value="ar">العربية (Arabic)</option>
546
+ <option value="hi">हिन्दी (Hindi)</option>
547
+ <option value="nl">Nederlands (Dutch)</option>
548
+ <option value="pl">Polski (Polish)</option>
549
+ <option value="tr">Türkçe (Turkish)</option>
550
+ <option value="vi">Tiếng Việt (Vietnamese)</option>
551
+ <option value="th">ไทย (Thai)</option>
552
+ <option value="id">Bahasa Indonesia</option>
553
+ <option value="sv">Svenska (Swedish)</option>
554
+ <option value="da">Dansk (Danish)</option>
555
+ <option value="no">Norsk (Norwegian)</option>
556
+ <option value="fi">Suomi (Finnish)</option>
557
+ <option value="he">עברית (Hebrew)</option>
558
+ <option value="uk">Українська (Ukrainian)</option>
559
+ <option value="cs">Čeština (Czech)</option>
560
+ <option value="el">Ελληνικά (Greek)</option>
561
+ <option value="ro">Română (Romanian)</option>
562
+ <option value="hu">Magyar (Hungarian)</option>
563
+ <option value="ms">Bahasa Melayu (Malay)</option>
564
+ </select>
565
+ </div>
566
+ </div>
567
+ <div class="interpretation-info" id="interpretation-info" style="display: none;">
568
+ 통역 모드: 입력한 음성이 선택한 언어로 자동 통역됩니다.
569
+ </div>
570
  <div class="text-input-section">
571
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
572
  <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
 
591
  let webrtc_id;
592
  let webSearchEnabled = false;
593
  let selectedLanguage = "";
594
+ let interpretationMode = false;
595
+ let interpretationLanguage = "";
596
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
597
  const audioOutput = document.getElementById('audio-output');
598
  const startButton = document.getElementById('start-button');
 
602
  const statusText = document.getElementById('status-text');
603
  const searchToggle = document.getElementById('search-toggle');
604
  const languageSelect = document.getElementById('language-select');
605
+ const interpretationToggle = document.getElementById('interpretation-toggle');
606
+ const interpretationLanguageSelect = document.getElementById('interpretation-language-select');
607
+ const interpretationLanguageContainer = document.getElementById('interpretation-language-container');
608
+ const interpretationInfo = document.getElementById('interpretation-info');
609
  const systemPromptInput = document.getElementById('system-prompt');
610
  const textInput = document.getElementById('text-input');
611
  let audioLevel = 0;
 
627
  console.log('Selected language:', selectedLanguage);
628
  });
629
 
630
+ // Interpretation mode toggle
631
+ interpretationToggle.addEventListener('click', () => {
632
+ interpretationMode = !interpretationMode;
633
+ interpretationToggle.classList.toggle('active', interpretationMode);
634
+ interpretationLanguageContainer.style.display = interpretationMode ? 'flex' : 'none';
635
+ interpretationInfo.style.display = interpretationMode ? 'block' : 'none';
636
+
637
+ // Disable translation mode when interpretation is enabled
638
+ if (interpretationMode) {
639
+ languageSelect.value = '';
640
+ selectedLanguage = '';
641
+ languageSelect.disabled = true;
642
+ searchToggle.classList.remove('active');
643
+ webSearchEnabled = false;
644
+ searchToggle.style.opacity = '0.5';
645
+ searchToggle.style.pointerEvents = 'none';
646
+ } else {
647
+ languageSelect.disabled = false;
648
+ searchToggle.style.opacity = '1';
649
+ searchToggle.style.pointerEvents = 'auto';
650
+ }
651
+
652
+ console.log('Interpretation mode:', interpretationMode);
653
+ });
654
+
655
+ // Interpretation language selection
656
+ interpretationLanguageSelect.addEventListener('change', () => {
657
+ interpretationLanguage = interpretationLanguageSelect.value;
658
+ console.log('Interpretation language:', interpretationLanguage);
659
+ });
660
+
661
  // System prompt update
662
  systemPromptInput.addEventListener('input', () => {
663
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
 
677
  const message = textInput.value.trim();
678
  if (!message) return;
679
 
680
+ // Don't allow text messages in interpretation mode
681
+ if (interpretationMode) {
682
+ showError('통역 모드에서는 텍스트 입력이 지원되지 않습니다.');
683
+ return;
684
+ }
685
+
686
  // Add user message to chat
687
  addMessage('user', message);
688
  textInput.value = '';
 
892
  webrtc_id: webrtc_id,
893
  web_search_enabled: webSearchEnabled,
894
  target_language: selectedLanguage,
895
+ system_prompt: systemPrompt,
896
+ interpretation_mode: interpretationMode,
897
+ interpretation_language: interpretationLanguage
898
  })
899
  });
900
  const serverResponse = await response.json();
 
1110
 
1111
  class OpenAIHandler(AsyncStreamHandler):
1112
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1113
+ system_prompt: str = "", webrtc_id: str = None,
1114
+ interpretation_mode: bool = False, interpretation_language: str = "") -> None:
1115
  super().__init__(
1116
  expected_layout="mono",
1117
  output_sample_rate=SAMPLE_RATE,
 
1128
  self.web_search_enabled = web_search_enabled
1129
  self.target_language = target_language
1130
  self.system_prompt = system_prompt
1131
+ self.interpretation_mode = interpretation_mode
1132
+ self.interpretation_language = interpretation_language
1133
  print(f"Handler created with web_search_enabled={web_search_enabled}, "
1134
+ f"target_language={target_language}, webrtc_id={webrtc_id}, "
1135
+ f"interpretation_mode={interpretation_mode}, interpretation_language={interpretation_language}")
1136
 
1137
  def copy(self):
1138
  # Get the most recent settings
 
1148
  web_search_enabled=settings.get('web_search_enabled', False),
1149
  target_language=settings.get('target_language', ''),
1150
  system_prompt=settings.get('system_prompt', ''),
1151
+ webrtc_id=recent_id,
1152
+ interpretation_mode=settings.get('interpretation_mode', False),
1153
+ interpretation_language=settings.get('interpretation_language', '')
1154
  )
1155
 
1156
  print(f"Handler.copy() called - creating new handler with default settings")
 
1189
  )
1190
  await self.connection.response.create()
1191
 
1192
+ def get_interpretation_instructions(self):
1193
+ """Get instructions for interpretation mode"""
1194
+ if not self.interpretation_mode or not self.interpretation_language:
1195
+ return ""
1196
+
1197
+ target_language_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1198
+ return (
1199
+ f"\n\nIMPORTANT: You are now in INTERPRETATION MODE. "
1200
+ f"You must ONLY translate what the user says into {target_language_name} ({self.interpretation_language}). "
1201
+ f"DO NOT generate any responses, opinions, or additional content. "
1202
+ f"Your ONLY task is to translate the user's speech accurately into {target_language_name}. "
1203
+ f"You are a professional interpreter - simply translate what is said, nothing more."
1204
+ )
1205
+
1206
  def get_translation_instructions(self):
1207
  """Get instructions for translation based on target language"""
1208
+ if not self.target_language or self.interpretation_mode:
1209
  return ""
1210
 
1211
  language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
 
1228
  self.web_search_enabled = settings.get('web_search_enabled', False)
1229
  self.target_language = settings.get('target_language', '')
1230
  self.system_prompt = settings.get('system_prompt', '')
1231
+ self.interpretation_mode = settings.get('interpretation_mode', False)
1232
+ self.interpretation_language = settings.get('interpretation_language', '')
1233
  self.webrtc_id = recent_id
1234
  print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
1235
+ f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}, "
1236
+ f"interpretation_mode={self.interpretation_mode}")
1237
 
1238
  print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
1239
+ f"target_language={self.target_language}, interpretation_mode={self.interpretation_mode}")
1240
  self.client = openai.AsyncOpenAI()
1241
 
1242
  # Define the web search function
1243
  tools = []
1244
  base_instructions = self.system_prompt or "You are a helpful assistant."
1245
 
1246
+ # Check if in interpretation mode
1247
+ if self.interpretation_mode:
1248
+ # In interpretation mode, override all instructions
1249
+ interpretation_instructions = self.get_interpretation_instructions()
1250
+ instructions = interpretation_instructions
1251
+ # No tools in interpretation mode
1252
+ tools = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1253
  else:
1254
+ # Normal mode - add translation instructions if language is selected
1255
+ translation_instructions = self.get_translation_instructions()
1256
+
1257
+ if self.web_search_enabled and self.search_client:
1258
+ tools = [{
1259
+ "type": "function",
1260
+ "function": {
1261
+ "name": "web_search",
1262
+ "description": "Search the web for current information. Use this for weather, news, prices, current events, or any time-sensitive topics.",
1263
+ "parameters": {
1264
+ "type": "object",
1265
+ "properties": {
1266
+ "query": {
1267
+ "type": "string",
1268
+ "description": "The search query"
1269
+ }
1270
+ },
1271
+ "required": ["query"]
1272
+ }
1273
+ }
1274
+ }]
1275
+ print("Web search function added to tools")
1276
+
1277
+ search_instructions = (
1278
+ "\n\nYou have web search capabilities. "
1279
+ "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
1280
+ "- Weather (날씨, 기온, 비, 눈)\n"
1281
+ "- News (뉴스, 소식)\n"
1282
+ "- Current events (현재, 최근, 오늘, 지금)\n"
1283
+ "- Prices (가격, 환율, 주가)\n"
1284
+ "- Sports scores or results\n"
1285
+ "- Any question about 2024 or 2025\n"
1286
+ "- Any time-sensitive information\n\n"
1287
+ "When in doubt, USE web_search. It's better to search and provide accurate information "
1288
+ "than to guess or use outdated information."
1289
+ )
1290
+ instructions = base_instructions + search_instructions + translation_instructions
1291
+ else:
1292
+ instructions = base_instructions + translation_instructions
1293
 
1294
  async with self.client.beta.realtime.connect(
1295
  model="gpt-4o-mini-realtime-preview-2024-12-17"
 
1302
  "tool_choice": "auto" if tools else "none"
1303
  }
1304
 
1305
+ # Add voice setting based on interpretation or translation language
1306
+ voice_language = self.interpretation_language if self.interpretation_mode else self.target_language
1307
+ if voice_language:
1308
  # Map languages to appropriate voices
1309
  voice_map = {
1310
  "en": "alloy",
 
1315
  "zh": "nova",
1316
  # Default to alloy for other languages
1317
  }
1318
+ session_update["voice"] = voice_map.get(voice_language, "alloy")
1319
 
1320
  await conn.session.update(session=session_update)
1321
  self.connection = conn
 
1329
  if event.type == "response.audio_transcript.done":
1330
  output_data = {
1331
  "event": event,
1332
+ "language": SUPPORTED_LANGUAGES.get(
1333
+ self.interpretation_language if self.interpretation_mode else self.target_language,
1334
+ ""
1335
+ ) if (self.interpretation_language or self.target_language) else ""
1336
  }
1337
  await self.output_queue.put(AdditionalOutputs(output_data))
1338
 
 
1454
  web_search_enabled = body.get("web_search_enabled", False)
1455
  target_language = body.get("target_language", "")
1456
  system_prompt = body.get("system_prompt", "")
1457
+ interpretation_mode = body.get("interpretation_mode", False)
1458
+ interpretation_language = body.get("interpretation_language", "")
1459
 
1460
  print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
1461
+ f"target_language: {target_language}, interpretation_mode: {interpretation_mode}, "
1462
+ f"interpretation_language: {interpretation_language}")
1463
 
1464
  # Store settings with timestamp
1465
  if webrtc_id:
 
1467
  'web_search_enabled': web_search_enabled,
1468
  'target_language': target_language,
1469
  'system_prompt': system_prompt,
1470
+ 'interpretation_mode': interpretation_mode,
1471
+ 'interpretation_language': interpretation_language,
1472
  'timestamp': asyncio.get_event_loop().time()
1473
  }
1474