seawolf2357 commited on
Commit
17b6035
·
verified ·
1 Parent(s): cedd312

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -49
app.py CHANGED
@@ -719,6 +719,15 @@ HTML_CONTENT = """<!DOCTYPE html>
719
  sendButton.style.display = 'block';
720
 
721
  console.log('Interpretation mode disabled');
 
 
 
 
 
 
 
 
 
722
  }
723
 
724
  console.log('Interpretation mode:', interpretationMode);
@@ -747,6 +756,15 @@ HTML_CONTENT = """<!DOCTYPE html>
747
  sendButton.style.display = 'none';
748
 
749
  console.log('Interpretation mode enabled with language:', interpretationLanguage);
 
 
 
 
 
 
 
 
 
750
  }
751
  });
752
 
@@ -979,6 +997,17 @@ HTML_CONTENT = """<!DOCTYPE html>
979
  updateButtonState();
980
  });
981
  webrtc_id = Math.random().toString(36).substring(7);
 
 
 
 
 
 
 
 
 
 
 
982
  const response = await fetch('/webrtc/offer', {
983
  method: 'POST',
984
  headers: { 'Content-Type': 'application/json' },
@@ -1006,8 +1035,21 @@ HTML_CONTENT = """<!DOCTYPE html>
1006
  eventSource.addEventListener("output", (event) => {
1007
  const eventJson = JSON.parse(event.data);
1008
  let content = eventJson.content;
 
 
 
 
 
 
 
 
 
 
 
1009
  if (selectedLanguage && eventJson.language) {
1010
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
 
 
1011
  }
1012
  addMessage("assistant", content);
1013
  });
@@ -1298,30 +1340,22 @@ class OpenAIHandler(AsyncStreamHandler):
1298
  target_language_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1299
  target_code = self.interpretation_language
1300
 
1301
- # Language-specific examples
1302
- examples = {
1303
- "en": "Hello, the weather is nice today",
1304
- "es": "Hola, el clima está agradable hoy",
1305
- "fr": "Bonjour, il fait beau aujourd'hui",
1306
- "de": "Hallo, das Wetter ist heute schön",
1307
- "ja": "こんにちは、今日はいい天気ですね",
1308
- "zh": "你好,今天天气很好"
1309
- }
1310
-
1311
- example_translation = examples.get(target_code, "Hello, the weather is nice today")
1312
-
1313
  return (
1314
- f"INTERPRETATION MODE - CRITICAL RULES:\n\n"
1315
- f"1. You are ONLY a translator to {target_language_name} (language code: {target_code}).\n"
1316
- f"2. NEVER respond in any other language.\n"
1317
- f"3. NEVER generate conversation or additional content.\n"
1318
- f"4. ONLY translate what the user says.\n"
1319
- f"5. STOP immediately after translating.\n\n"
1320
- f"Example:\n"
1321
- f"If user says: '안녕하세요, 오늘 날씨가 좋네요'\n"
1322
- f"You MUST respond ONLY: '{example_translation}'\n\n"
1323
- f"DO NOT say anything else. DO NOT continue talking.\n"
1324
- f"Your output language MUST be {target_language_name} ONLY."
 
 
 
 
1325
  )
1326
 
1327
  def get_translation_instructions(self):
@@ -1368,18 +1402,18 @@ class OpenAIHandler(AsyncStreamHandler):
1368
 
1369
  # Check if in interpretation mode
1370
  if self.interpretation_mode:
1371
- # In interpretation mode, override all instructions
1372
- base_instructions = (
1373
- f"You are a professional interpreter. Your ONLY task is to translate what the user says "
1374
- f"into {SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)}. "
1375
- f"Do not add any commentary, do not continue the conversation, do not generate new content. "
1376
- f"Simply translate what was said and stop."
 
1377
  )
1378
- interpretation_instructions = self.get_interpretation_instructions()
1379
- instructions = base_instructions + "\n\n" + interpretation_instructions
1380
  # No tools in interpretation mode
1381
  tools = []
1382
  print(f"Interpretation mode active - target language: {self.interpretation_language}")
 
1383
  else:
1384
  # Normal mode - add translation instructions if language is selected
1385
  translation_instructions = self.get_translation_instructions()
@@ -1430,39 +1464,77 @@ class OpenAIHandler(AsyncStreamHandler):
1430
  "type": "server_vad",
1431
  "threshold": 0.5,
1432
  "prefix_padding_ms": 300,
1433
- "silence_duration_ms": 500 if self.interpretation_mode else 700
1434
  },
1435
  "instructions": instructions,
1436
  "tools": tools,
1437
- "tool_choice": "auto" if tools else "none"
 
 
1438
  }
1439
 
1440
  # Add voice setting based on interpretation or translation language
1441
  voice_language = self.interpretation_language if self.interpretation_mode else self.target_language
1442
- if voice_language:
1443
- # Use only alloy voice to avoid language confusion
1444
- # The model will handle the language based on instructions
 
1445
  session_update["voice"] = "alloy"
 
1446
 
1447
- # For interpretation mode, explicitly set the output language
1448
- if self.interpretation_mode:
1449
- session_update["output_audio_format"] = "pcm16"
1450
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1451
  print(f"Voice set to: alloy for language: {voice_language}")
1452
 
1453
- # For interpretation mode, ensure proper language settings
1454
- if self.interpretation_mode and self.interpretation_language:
1455
- session_update["modalities"] = ["text", "audio"]
1456
- session_update["temperature"] = 0.3 # Lower temperature for more accurate translation
1457
- session_update["max_response_output_tokens"] = 500 # Limit output to prevent long generations
1458
- print(f"Interpretation session config: voice={session_update.get('voice')}, lang={self.interpretation_language}")
1459
-
1460
  await conn.session.update(session=session_update)
1461
  self.connection = conn
1462
  print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}, "
1463
  f"interpretation_mode: {self.interpretation_mode}, language: {self.interpretation_language if self.interpretation_mode else self.target_language}")
1464
 
1465
  async for event in self.connection:
 
 
 
 
 
1466
  # Debug logging for function calls
1467
  if event.type.startswith("response.function_call"):
1468
  print(f"Function event: {event.type}")
@@ -1473,7 +1545,8 @@ class OpenAIHandler(AsyncStreamHandler):
1473
  "language": SUPPORTED_LANGUAGES.get(
1474
  self.interpretation_language if self.interpretation_mode else self.target_language,
1475
  ""
1476
- ) if (self.interpretation_language or self.target_language) else ""
 
1477
  }
1478
  await self.output_queue.put(AdditionalOutputs(output_data))
1479
 
@@ -1687,7 +1760,8 @@ async def outputs(webrtc_id: str):
1687
  data = {
1688
  "role": "assistant",
1689
  "content": event.transcript,
1690
- "language": output.args[0].get('language', '')
 
1691
  }
1692
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
1693
 
 
719
  sendButton.style.display = 'block';
720
 
721
  console.log('Interpretation mode disabled');
722
+
723
+ // If connected, restart to apply normal mode
724
+ if (peerConnection && peerConnection.connectionState === 'connected') {
725
+ showError('일반 모드로 전환하기 위해 연결을 다시 시작합니다.');
726
+ stop();
727
+ setTimeout(() => {
728
+ setupWebRTC();
729
+ }, 500);
730
+ }
731
  }
732
 
733
  console.log('Interpretation mode:', interpretationMode);
 
756
  sendButton.style.display = 'none';
757
 
758
  console.log('Interpretation mode enabled with language:', interpretationLanguage);
759
+
760
+ // If already connected, restart the connection with new settings
761
+ if (peerConnection && peerConnection.connectionState === 'connected') {
762
+ showError('통역 모드 설정을 적용하기 위해 연결을 다시 시작합니다.');
763
+ stop();
764
+ setTimeout(() => {
765
+ setupWebRTC();
766
+ }, 500);
767
+ }
768
  }
769
  });
770
 
 
997
  updateButtonState();
998
  });
999
  webrtc_id = Math.random().toString(36).substring(7);
1000
+
1001
+ // Log current settings before sending
1002
+ console.log('Sending offer with settings:', {
1003
+ webrtc_id: webrtc_id,
1004
+ web_search_enabled: webSearchEnabled,
1005
+ target_language: selectedLanguage,
1006
+ system_prompt: systemPrompt,
1007
+ interpretation_mode: interpretationMode,
1008
+ interpretation_language: interpretationLanguage
1009
+ });
1010
+
1011
  const response = await fetch('/webrtc/offer', {
1012
  method: 'POST',
1013
  headers: { 'Content-Type': 'application/json' },
 
1035
  eventSource.addEventListener("output", (event) => {
1036
  const eventJson = JSON.parse(event.data);
1037
  let content = eventJson.content;
1038
+
1039
+ // Debug logging for interpretation mode
1040
+ if (interpretationMode) {
1041
+ console.log('[INTERPRETATION OUTPUT]', {
1042
+ content: content,
1043
+ language: eventJson.language,
1044
+ mode: eventJson.mode,
1045
+ expectedLanguage: interpretationLanguage
1046
+ });
1047
+ }
1048
+
1049
  if (selectedLanguage && eventJson.language) {
1050
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
1051
+ } else if (interpretationMode && eventJson.language) {
1052
+ content += ` <span class="language-info">[통역: ${eventJson.language}]</span>`;
1053
  }
1054
  addMessage("assistant", content);
1055
  });
 
1340
  target_language_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1341
  target_code = self.interpretation_language
1342
 
 
 
 
 
 
 
 
 
 
 
 
 
1343
  return (
1344
+ f"YOU ARE IN TRANSLATION MODE. YOUR ONLY FUNCTION IS TO TRANSLATE.\n\n"
1345
+ f"RULES:\n"
1346
+ f"1. TRANSLATE the input to {target_language_name} ({target_code})\n"
1347
+ f"2. OUTPUT ONLY THE TRANSLATION\n"
1348
+ f"3. DO NOT ANSWER QUESTIONS\n"
1349
+ f"4. DO NOT PROVIDE INFORMATION\n"
1350
+ f"5. DO NOT CONTINUE SPEAKING\n"
1351
+ f"6. NEVER OUTPUT IN ANY OTHER LANGUAGE THAN {target_language_name}\n\n"
1352
+ f"EXAMPLES:\n"
1353
+ f"Input: '안녕하세요' Output: 'Hello' (if English)\n"
1354
+ f"Input: '날씨가 어때요?' Output: 'How is the weather?' (if English)\n"
1355
+ f"Input: '커피 한 잔 주세요' → Output: 'One coffee please' (if English)\n\n"
1356
+ f"YOU ARE NOT AN AI ASSISTANT. YOU ARE A TRANSLATOR.\n"
1357
+ f"TRANSLATE AND STOP. NO ADDITIONAL WORDS.\n"
1358
+ f"OUTPUT LANGUAGE: {target_language_name} ONLY."
1359
  )
1360
 
1361
  def get_translation_instructions(self):
 
1402
 
1403
  # Check if in interpretation mode
1404
  if self.interpretation_mode:
1405
+ # In interpretation mode, completely override everything
1406
+ instructions = (
1407
+ f"CRITICAL: YOU ARE A TRANSLATION SERVICE.\n"
1408
+ f"TRANSLATE USER INPUT TO {SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)}.\n"
1409
+ f"OUTPUT ONLY THE TRANSLATION. NOTHING ELSE.\n"
1410
+ f"DO NOT ACT AS AN AI. DO NOT ANSWER QUESTIONS.\n"
1411
+ f"JUST TRANSLATE AND STOP."
1412
  )
 
 
1413
  # No tools in interpretation mode
1414
  tools = []
1415
  print(f"Interpretation mode active - target language: {self.interpretation_language}")
1416
+ print(f"Instructions: {instructions}")
1417
  else:
1418
  # Normal mode - add translation instructions if language is selected
1419
  translation_instructions = self.get_translation_instructions()
 
1464
  "type": "server_vad",
1465
  "threshold": 0.5,
1466
  "prefix_padding_ms": 300,
1467
+ "silence_duration_ms": 100 if self.interpretation_mode else 700 # Even shorter silence
1468
  },
1469
  "instructions": instructions,
1470
  "tools": tools,
1471
+ "tool_choice": "none" if self.interpretation_mode else ("auto" if tools else "none"),
1472
+ "temperature": 0.0 if self.interpretation_mode else 0.7,
1473
+ "max_response_output_tokens": 50 if self.interpretation_mode else 4096 # Very short responses
1474
  }
1475
 
1476
  # Add voice setting based on interpretation or translation language
1477
  voice_language = self.interpretation_language if self.interpretation_mode else self.target_language
1478
+
1479
+ # For interpretation mode, ensure proper settings
1480
+ if self.interpretation_mode and self.interpretation_language:
1481
+ # Force simple translation behavior
1482
  session_update["voice"] = "alloy"
1483
+ session_update["modalities"] = ["audio", "text"] # Changed order
1484
 
1485
+ # Create a very specific prompt for the target language
1486
+ target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
1487
+
1488
+ # Use very explicit language instructions
1489
+ if self.interpretation_language == "en":
1490
+ lang_specific = "Respond in English only. 영어로만 답하세요."
1491
+ elif self.interpretation_language == "es":
1492
+ lang_specific = "Respond in Spanish only. Solo responde en español."
1493
+ elif self.interpretation_language == "fr":
1494
+ lang_specific = "Respond in French only. Répondez uniquement en français."
1495
+ elif self.interpretation_language == "de":
1496
+ lang_specific = "Respond in German only. Antworten Sie nur auf Deutsch."
1497
+ elif self.interpretation_language == "ja":
1498
+ lang_specific = "Respond in Japanese only. 日本語でのみ答えてください。"
1499
+ elif self.interpretation_language == "zh":
1500
+ lang_specific = "Respond in Chinese only. 只用中文回答。"
1501
+ else:
1502
+ lang_specific = f"Respond in {target_lang_name} only."
1503
+
1504
+ # Override instructions with ultra-specific directive
1505
+ session_update["instructions"] = (
1506
+ f"TRANSLATE TO {target_lang_name.upper()}. "
1507
+ f"{lang_specific} "
1508
+ f"You are a translation machine. "
1509
+ f"Input → Translation. Nothing else. "
1510
+ f"Do not chat. Do not explain. Just translate. "
1511
+ f"Maximum 20 words per response."
1512
+ )
1513
+
1514
+ # Additional session parameters for interpretation mode
1515
+ session_update["input_audio_transcription"] = {
1516
+ "model": "whisper-1"
1517
+ }
1518
+
1519
+ print(f"[INTERPRETATION MODE] Target: {self.interpretation_language} ({target_lang_name})")
1520
+ print(f"[INTERPRETATION MODE] Instructions: {session_update['instructions']}")
1521
+ print(f"[INTERPRETATION MODE] Session config: {session_update}")
1522
+ elif voice_language:
1523
+ # Normal translation mode
1524
+ session_update["voice"] = "alloy"
1525
  print(f"Voice set to: alloy for language: {voice_language}")
1526
 
 
 
 
 
 
 
 
1527
  await conn.session.update(session=session_update)
1528
  self.connection = conn
1529
  print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}, "
1530
  f"interpretation_mode: {self.interpretation_mode}, language: {self.interpretation_language if self.interpretation_mode else self.target_language}")
1531
 
1532
  async for event in self.connection:
1533
+ # Debug logging for interpretation mode
1534
+ if self.interpretation_mode and event.type == "response.audio_transcript.done":
1535
+ print(f"[INTERPRETATION] Transcript: {event.transcript[:100]}...")
1536
+ print(f"[INTERPRETATION] Expected language: {self.interpretation_language}")
1537
+
1538
  # Debug logging for function calls
1539
  if event.type.startswith("response.function_call"):
1540
  print(f"Function event: {event.type}")
 
1545
  "language": SUPPORTED_LANGUAGES.get(
1546
  self.interpretation_language if self.interpretation_mode else self.target_language,
1547
  ""
1548
+ ) if (self.interpretation_language or self.target_language) else "",
1549
+ "mode": "interpretation" if self.interpretation_mode else "normal"
1550
  }
1551
  await self.output_queue.put(AdditionalOutputs(output_data))
1552
 
 
1760
  data = {
1761
  "role": "assistant",
1762
  "content": event.transcript,
1763
+ "language": output.args[0].get('language', ''),
1764
+ "mode": output.args[0].get('mode', 'normal')
1765
  }
1766
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
1767