seawolf2357 commited on
Commit
c90fc65
·
verified ·
1 Parent(s): a9741e7

Update app-backup4.py

Browse files
Files changed (1) hide show
  1. app-backup4.py +329 -329
app-backup4.py CHANGED
@@ -23,43 +23,15 @@ import gradio as gr
23
  import io
24
  from scipy import signal
25
  import wave
 
 
 
 
26
 
27
  load_dotenv()
28
 
29
  SAMPLE_RATE = 24000
30
-
31
- # Supported languages for OpenAI Realtime API
32
- SUPPORTED_LANGUAGES = {
33
- "ko": "한국어 (Korean)",
34
- "en": "English",
35
- "es": "Español (Spanish)",
36
- "fr": "Français (French)",
37
- "de": "Deutsch (German)",
38
- "it": "Italiano (Italian)",
39
- "pt": "Português (Portuguese)",
40
- "ru": "Русский (Russian)",
41
- "ja": "日本語 (Japanese)",
42
- "zh": "中文 (Chinese)",
43
- "ar": "العربية (Arabic)",
44
- "hi": "हिन्दी (Hindi)",
45
- "nl": "Nederlands (Dutch)",
46
- "pl": "Polski (Polish)",
47
- "tr": "Türkçe (Turkish)",
48
- "vi": "Tiếng Việt (Vietnamese)",
49
- "th": "ไทย (Thai)",
50
- "id": "Bahasa Indonesia",
51
- "sv": "Svenska (Swedish)",
52
- "da": "Dansk (Danish)",
53
- "no": "Norsk (Norwegian)",
54
- "fi": "Suomi (Finnish)",
55
- "he": "עברית (Hebrew)",
56
- "uk": "Українська (Ukrainian)",
57
- "cs": "Čeština (Czech)",
58
- "el": "Ελληνικά (Greek)",
59
- "ro": "Română (Romanian)",
60
- "hu": "Magyar (Hungarian)",
61
- "ms": "Bahasa Melayu (Malay)"
62
- }
63
 
64
  # HTML content embedded as a string
65
  HTML_CONTENT = """<!DOCTYPE html>
@@ -199,21 +171,37 @@ HTML_CONTENT = """<!DOCTYPE html>
199
  .toggle-switch.active .toggle-slider {
200
  transform: translateX(24px);
201
  }
202
- /* Select dropdown */
203
- select {
204
  background-color: var(--card-bg);
205
- color: var(--text-color);
 
206
  border: 1px solid var(--border-color);
207
- padding: 8px 12px;
 
 
 
 
 
 
 
208
  border-radius: 6px;
209
- font-size: 14px;
210
  cursor: pointer;
211
- min-width: 120px;
212
- max-width: 200px;
213
  }
214
- select:focus {
215
- outline: none;
216
- border-color: var(--primary-color);
 
 
 
 
 
 
 
 
 
 
217
  }
218
  /* Text inputs */
219
  .text-input-section {
@@ -519,7 +507,7 @@ HTML_CONTENT = """<!DOCTYPE html>
519
  <div class="main-content">
520
  <div class="sidebar">
521
  <div class="settings-section">
522
- <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정(텍스트 채팅에만 적용)</h3>
523
  <div class="settings-grid">
524
  <div class="setting-item">
525
  <span class="setting-label">웹 검색</span>
@@ -527,41 +515,6 @@ HTML_CONTENT = """<!DOCTYPE html>
527
  <div class="toggle-slider"></div>
528
  </div>
529
  </div>
530
- <div class="setting-item">
531
- <span class="setting-label">다국어 번역 채팅</span>
532
- <select id="language-select">
533
- <option value="">비활성화</option>
534
- <option value="ko">한국어 (Korean)</option>
535
- <option value="en">English</option>
536
- <option value="es">Español (Spanish)</option>
537
- <option value="fr">Français (French)</option>
538
- <option value="de">Deutsch (German)</option>
539
- <option value="it">Italiano (Italian)</option>
540
- <option value="pt">Português (Portuguese)</option>
541
- <option value="ru">Русский (Russian)</option>
542
- <option value="ja">日本語 (Japanese)</option>
543
- <option value="zh">中文 (Chinese)</option>
544
- <option value="ar">العربية (Arabic)</option>
545
- <option value="hi">हिन्दी (Hindi)</option>
546
- <option value="nl">Nederlands (Dutch)</option>
547
- <option value="pl">Polski (Polish)</option>
548
- <option value="tr">Türkçe (Turkish)</option>
549
- <option value="vi">Tiếng Việt (Vietnamese)</option>
550
- <option value="th">ไทย (Thai)</option>
551
- <option value="id">Bahasa Indonesia</option>
552
- <option value="sv">Svenska (Swedish)</option>
553
- <option value="da">Dansk (Danish)</option>
554
- <option value="no">Norsk (Norwegian)</option>
555
- <option value="fi">Suomi (Finnish)</option>
556
- <option value="he">עברית (Hebrew)</option>
557
- <option value="uk">Українська (Ukrainian)</option>
558
- <option value="cs">Čeština (Czech)</option>
559
- <option value="el">Ελληνικά (Greek)</option>
560
- <option value="ro">Română (Romanian)</option>
561
- <option value="hu">Magyar (Hungarian)</option>
562
- <option value="ms">Bahasa Melayu (Malay)</option>
563
- </select>
564
- </div>
565
  </div>
566
  <div class="text-input-section">
567
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
@@ -569,6 +522,11 @@ HTML_CONTENT = """<!DOCTYPE html>
569
  </div>
570
  </div>
571
 
 
 
 
 
 
572
  <div class="controls">
573
  <button id="start-button">대화 시작</button>
574
  </div>
@@ -594,8 +552,8 @@ HTML_CONTENT = """<!DOCTYPE html>
594
  let peerConnection;
595
  let webrtc_id;
596
  let webSearchEnabled = false;
597
- let selectedLanguage = "";
598
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
 
599
  const audioOutput = document.getElementById('audio-output');
600
  const startButton = document.getElementById('start-button');
601
  const sendButton = document.getElementById('send-button');
@@ -603,15 +561,61 @@ HTML_CONTENT = """<!DOCTYPE html>
603
  const statusDot = document.getElementById('status-dot');
604
  const statusText = document.getElementById('status-text');
605
  const searchToggle = document.getElementById('search-toggle');
606
- const languageSelect = document.getElementById('language-select');
607
  const systemPromptInput = document.getElementById('system-prompt');
608
  const textInput = document.getElementById('text-input');
 
609
  let audioLevel = 0;
610
  let animationFrame;
611
  let audioContext, analyser, audioSource;
612
  let dataChannel = null;
613
  let isVoiceActive = false;
614
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  // Web search toggle functionality
616
  searchToggle.addEventListener('click', () => {
617
  webSearchEnabled = !webSearchEnabled;
@@ -619,12 +623,6 @@ HTML_CONTENT = """<!DOCTYPE html>
619
  console.log('Web search enabled:', webSearchEnabled);
620
  });
621
 
622
- // Language selection
623
- languageSelect.addEventListener('change', () => {
624
- selectedLanguage = languageSelect.value;
625
- console.log('Selected language:', selectedLanguage);
626
- });
627
-
628
  // System prompt update
629
  systemPromptInput.addEventListener('input', () => {
630
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
@@ -664,8 +662,8 @@ HTML_CONTENT = """<!DOCTYPE html>
664
  body: JSON.stringify({
665
  message: message,
666
  web_search_enabled: webSearchEnabled,
667
- target_language: selectedLanguage,
668
- system_prompt: systemPrompt
669
  })
670
  });
671
 
@@ -680,8 +678,8 @@ HTML_CONTENT = """<!DOCTYPE html>
680
  } else {
681
  // Add assistant response
682
  let content = data.response;
683
- if (selectedLanguage && data.language) {
684
- content += ` <span class="language-info">[${data.language}]</span>`;
685
  }
686
  addMessage('assistant', content);
687
  }
@@ -849,8 +847,8 @@ HTML_CONTENT = """<!DOCTYPE html>
849
  console.log('Sending offer with settings:', {
850
  webrtc_id: webrtc_id,
851
  web_search_enabled: webSearchEnabled,
852
- target_language: selectedLanguage,
853
- system_prompt: systemPrompt
854
  });
855
 
856
  const response = await fetch('/webrtc/offer', {
@@ -861,8 +859,8 @@ HTML_CONTENT = """<!DOCTYPE html>
861
  type: peerConnection.localDescription.type,
862
  webrtc_id: webrtc_id,
863
  web_search_enabled: webSearchEnabled,
864
- target_language: selectedLanguage,
865
- system_prompt: systemPrompt
866
  })
867
  });
868
  const serverResponse = await response.json();
@@ -879,8 +877,8 @@ HTML_CONTENT = """<!DOCTYPE html>
879
  const eventJson = JSON.parse(event.data);
880
  let content = eventJson.content;
881
 
882
- if (selectedLanguage && eventJson.language) {
883
- content += ` <span class="language-info">[${eventJson.language}]</span>`;
884
  }
885
  addMessage("assistant", content);
886
  });
@@ -897,7 +895,7 @@ HTML_CONTENT = """<!DOCTYPE html>
897
  stop();
898
  }
899
  }
900
- function addMessage(role, content) {
901
  const messageDiv = document.createElement('div');
902
  messageDiv.classList.add('message', role);
903
 
@@ -908,6 +906,19 @@ HTML_CONTENT = """<!DOCTYPE html>
908
  }
909
  chatMessages.appendChild(messageDiv);
910
  chatMessages.scrollTop = chatMessages.scrollHeight;
 
 
 
 
 
 
 
 
 
 
 
 
 
911
  }
912
 
913
  function stop() {
@@ -1000,6 +1011,8 @@ HTML_CONTENT = """<!DOCTYPE html>
1000
  // Initialize on page load
1001
  window.addEventListener('DOMContentLoaded', () => {
1002
  sendButton.style.display = 'block';
 
 
1003
  });
1004
  </script>
1005
  </body>
@@ -1048,6 +1061,131 @@ class BraveSearchClient:
1048
  return []
1049
 
1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
  # Initialize search client globally
1052
  brave_api_key = os.getenv("BSEARCH_API")
1053
  search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
@@ -1059,52 +1197,17 @@ connection_settings = {}
1059
  # Initialize OpenAI client for text chat
1060
  client = openai.AsyncOpenAI()
1061
 
1062
- def get_translation_instructions(target_language: str) -> str:
1063
- """Get instructions for translation based on target language"""
1064
- if not target_language:
1065
- return ""
1066
-
1067
- language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
1068
- return (
1069
- f"\n\nIMPORTANT: You must respond in {language_name} ({target_language}). "
1070
- f"Translate all your responses to {language_name}."
1071
- )
1072
 
1073
  def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
1074
  chatbot.append({"role": "assistant", "content": response.transcript})
1075
  return chatbot
1076
 
1077
 
1078
- async def process_text_chat(message: str, web_search_enabled: bool, target_language: str,
1079
- system_prompt: str) -> Dict[str, str]:
1080
  """Process text chat using GPT-4o-mini model"""
1081
  try:
1082
- # If target language is set, override system prompt completely
1083
- if target_language:
1084
- language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
1085
-
1086
- # Create system prompt in target language
1087
- if target_language == "en":
1088
- base_instructions = f"You are a helpful assistant. You speak ONLY English. Never use Korean or any other language. {system_prompt}"
1089
- user_prefix = "Please respond in English: "
1090
- elif target_language == "ja":
1091
- base_instructions = f"あなたは親切なアシスタントです。日本語のみを話します。韓国語や他の言語は絶対に使用しません。{system_prompt}"
1092
- user_prefix = "日本語で答えてください: "
1093
- elif target_language == "zh":
1094
- base_instructions = f"你是一个乐于助人的助手。你只说中文。绝不使用韩语或其他语言。{system_prompt}"
1095
- user_prefix = "请用中文回答: "
1096
- elif target_language == "es":
1097
- base_instructions = f"Eres un asistente útil. Solo hablas español. Nunca uses coreano u otros idiomas. {system_prompt}"
1098
- user_prefix = "Por favor responde en español: "
1099
- else:
1100
- base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}. {system_prompt}"
1101
- user_prefix = f"Please respond in {language_name}: "
1102
- else:
1103
- base_instructions = system_prompt or "You are a helpful assistant."
1104
- user_prefix = ""
1105
-
1106
  messages = [
1107
- {"role": "system", "content": base_instructions}
1108
  ]
1109
 
1110
  # Handle web search if enabled
@@ -1124,19 +1227,12 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1124
  for i, result in enumerate(search_results[:5], 1):
1125
  search_context += f"{i}. {result['title']}\n{result['description']}\n\n"
1126
 
1127
- # Add search context in target language if set
1128
- if target_language:
1129
- search_instruction = f"Use this search information but respond in {SUPPORTED_LANGUAGES.get(target_language, target_language)} only: "
1130
- else:
1131
- search_instruction = "다음 웹 검색 결과를 참고하여 답변하세요: "
1132
-
1133
  messages.append({
1134
  "role": "system",
1135
- "content": search_instruction + "\n\n" + search_context
1136
  })
1137
 
1138
- # Add user message with language prefix
1139
- messages.append({"role": "user", "content": user_prefix + message})
1140
 
1141
  # Call GPT-4o-mini
1142
  response = await client.chat.completions.create(
@@ -1148,27 +1244,22 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1148
 
1149
  response_text = response.choices[0].message.content
1150
 
1151
- # Final check - remove any Korean if target language is not Korean
1152
- if target_language and target_language != "ko":
1153
- import re
1154
- if re.search(r'[가-힣]', response_text):
1155
- print(f"[TEXT CHAT] WARNING: Korean detected in response for {target_language}")
1156
- # Try again with stronger prompt
1157
- messages[-1] = {"role": "user", "content": f"ONLY {SUPPORTED_LANGUAGES.get(target_language, target_language)}, NO KOREAN: {message}"}
1158
- retry_response = await client.chat.completions.create(
1159
- model="gpt-4.1-mini",
1160
- messages=messages,
1161
- temperature=0.3,
1162
- max_tokens=2000
1163
- )
1164
- response_text = retry_response.choices[0].message.content
1165
 
1166
- print(f"[TEXT CHAT] Target language: {target_language}")
1167
- print(f"[TEXT CHAT] Response preview: {response_text[:100]}...")
 
 
1168
 
1169
  return {
1170
  "response": response_text,
1171
- "language": SUPPORTED_LANGUAGES.get(target_language, "") if target_language else ""
1172
  }
1173
 
1174
  except Exception as e:
@@ -1177,8 +1268,8 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1177
 
1178
 
1179
  class OpenAIHandler(AsyncStreamHandler):
1180
- def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1181
- system_prompt: str = "", webrtc_id: str = None) -> None:
1182
  super().__init__(
1183
  expected_layout="mono",
1184
  output_sample_rate=SAMPLE_RATE,
@@ -1193,11 +1284,10 @@ class OpenAIHandler(AsyncStreamHandler):
1193
  self.current_call_id = None
1194
  self.webrtc_id = webrtc_id
1195
  self.web_search_enabled = web_search_enabled
1196
- self.target_language = target_language
1197
  self.system_prompt = system_prompt
 
1198
 
1199
- print(f"[INIT] Handler created with web_search={web_search_enabled}, "
1200
- f"target_language={target_language}")
1201
 
1202
  def copy(self):
1203
  # Get the most recent settings
@@ -1215,9 +1305,9 @@ class OpenAIHandler(AsyncStreamHandler):
1215
 
1216
  return OpenAIHandler(
1217
  web_search_enabled=settings.get('web_search_enabled', False),
1218
- target_language=settings.get('target_language', ''),
1219
  system_prompt=settings.get('system_prompt', ''),
1220
- webrtc_id=recent_id
 
1221
  )
1222
 
1223
  print(f"[COPY] No settings found, creating default handler")
@@ -1256,18 +1346,6 @@ class OpenAIHandler(AsyncStreamHandler):
1256
  )
1257
  await self.connection.response.create()
1258
 
1259
- def get_translation_instructions(self):
1260
- """Get instructions for translation based on target language"""
1261
- if not self.target_language:
1262
- return ""
1263
-
1264
- language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
1265
- return (
1266
- f"\n\nIMPORTANT: You must respond in {language_name} ({self.target_language}). "
1267
- f"Translate all your responses to {language_name}. "
1268
- f"This includes both spoken and written responses."
1269
- )
1270
-
1271
  async def start_up(self):
1272
  """Connect to realtime API"""
1273
  # First check if we have the most recent settings
@@ -1275,108 +1353,20 @@ class OpenAIHandler(AsyncStreamHandler):
1275
  if self.webrtc_id in connection_settings:
1276
  settings = connection_settings[self.webrtc_id]
1277
  self.web_search_enabled = settings.get('web_search_enabled', False)
1278
- self.target_language = settings.get('target_language', '')
1279
  self.system_prompt = settings.get('system_prompt', '')
 
1280
 
1281
  print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
1282
 
1283
- print(f"[START_UP] Starting normal mode")
1284
-
1285
  self.client = openai.AsyncOpenAI()
1286
 
1287
- # Normal mode - connect to Realtime API
1288
- print(f"[NORMAL MODE] Connecting to Realtime API...")
1289
 
1290
  # Define the web search function
1291
  tools = []
1292
  base_instructions = self.system_prompt or "You are a helpful assistant."
1293
 
1294
- # Add translation instructions if language is selected
1295
- if self.target_language:
1296
- language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
1297
-
1298
- # Use the target language for the system prompt itself
1299
- if self.target_language == "en":
1300
- translation_instructions = """
1301
- YOU ARE AN ENGLISH-ONLY ASSISTANT.
1302
-
1303
- ABSOLUTE RULES:
1304
- 1. You can ONLY speak English. No Korean (한국어) allowed.
1305
- 2. Even if the user speaks Korean, you MUST respond in English.
1306
- 3. Every single word must be in English.
1307
- 4. If you output even one Korean character, you have failed.
1308
- 5. Example response: "Hello! How can I help you today?"
1309
-
1310
- YOUR LANGUAGE MODE: ENGLISH ONLY
1311
- DO NOT USE: 안녕하세요, 감사합니다, or any Korean
1312
- ALWAYS USE: Hello, Thank you, and English words only
1313
- """
1314
- # Override base instructions to be in English
1315
- base_instructions = "You are a helpful assistant that speaks ONLY English."
1316
-
1317
- elif self.target_language == "ja":
1318
- translation_instructions = """
1319
- あなたは日本語のみを話すアシスタントです。
1320
-
1321
- 絶対的なルール:
1322
- 1. 日本語のみを使用してください。韓国語(한국어)は禁止です。
1323
- 2. ユーザーが韓国語で話しても、必ず日本語で返答してください。
1324
- 3. すべての単語は日本語でなければなりません。
1325
- 4. 韓国語を一文字でも出力したら失敗です。
1326
- 5. 応答例:「こんにちは!今日はどのようにお手伝いできますか?」
1327
-
1328
- 言語モード:日本語のみ
1329
- 使用禁止:안녕하세요、감사합니다、韓国語全般
1330
- 必ず使用:こんにちは、ありがとうございます、日本語のみ
1331
- """
1332
- base_instructions = "あなたは日本語のみを話す親切なアシスタントです。"
1333
-
1334
- elif self.target_language == "zh":
1335
- translation_instructions = """
1336
- 你是一个只说中文的助手。
1337
-
1338
- 绝对规则:
1339
- 1. 只能使用中文。禁止使用韩语(한국어)。
1340
- 2. 即使用户说韩语,也必须用中文回复。
1341
- 3. 每个字都必须是中文。
1342
- 4. 如果输出任何韩语字符,就是失败。
1343
- 5. 回复示例:"你好!我今天能为您做什么?"
1344
-
1345
- 语言模式:仅中文
1346
- 禁止使用:안녕하세요、감사합니다、任何韩语
1347
- 必须使用:你好、谢谢、只用中文
1348
- """
1349
- base_instructions = "你是一个只说中文的友好助手。"
1350
-
1351
- elif self.target_language == "es":
1352
- translation_instructions = """
1353
- ERES UN ASISTENTE QUE SOLO HABLA ESPAÑOL.
1354
-
1355
- REGLAS ABSOLUTAS:
1356
- 1. Solo puedes hablar español. No se permite coreano (한국어).
1357
- 2. Incluso si el usuario habla coreano, DEBES responder en español.
1358
- 3. Cada palabra debe estar en español.
1359
- 4. Si produces aunque sea un carácter coreano, has fallado.
1360
- 5. Respuesta ejemplo: "¡Hola! ¿Cómo puedo ayudarte hoy?"
1361
-
1362
- MODO DE IDIOMA: SOLO ESPAÑOL
1363
- NO USAR: 안녕하세요, 감사합니다, o cualquier coreano
1364
- SIEMPRE USAR: Hola, Gracias, y solo palabras en español
1365
- """
1366
- base_instructions = "Eres un asistente útil que habla SOLO español."
1367
- else:
1368
- translation_instructions = f"""
1369
- YOU MUST ONLY SPEAK {language_name.upper()}.
1370
-
1371
- RULES:
1372
- 1. Output only in {language_name}
1373
- 2. Never use Korean
1374
- 3. Always respond in {language_name}
1375
- """
1376
- base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}."
1377
- else:
1378
- translation_instructions = ""
1379
-
1380
  if self.web_search_enabled and self.search_client:
1381
  tools = [{
1382
  "type": "function",
@@ -1411,23 +1401,9 @@ RULES:
1411
  "than to guess or use outdated information."
1412
  )
1413
 
1414
- # Combine all instructions
1415
- if translation_instructions:
1416
- # Translation instructions already include base_instructions
1417
- instructions = translation_instructions + search_instructions
1418
- else:
1419
- instructions = base_instructions + search_instructions
1420
  else:
1421
- # No web search
1422
- if translation_instructions:
1423
- instructions = translation_instructions
1424
- else:
1425
- instructions = base_instructions
1426
-
1427
- print(f"[NORMAL MODE] Base instructions: {base_instructions[:100]}...")
1428
- print(f"[NORMAL MODE] Translation instructions: {translation_instructions[:200] if translation_instructions else 'None'}...")
1429
- print(f"[NORMAL MODE] Combined instructions length: {len(instructions)}")
1430
- print(f"[NORMAL MODE] Target language: {self.target_language}")
1431
 
1432
  async with self.client.beta.realtime.connect(
1433
  model="gpt-4o-mini-realtime-preview-2024-12-17"
@@ -1441,39 +1417,12 @@ RULES:
1441
  "temperature": 0.7,
1442
  "max_response_output_tokens": 4096,
1443
  "modalities": ["text", "audio"],
1444
- "voice": "alloy" # Default voice
1445
  }
1446
 
1447
- # Use appropriate voice for the language
1448
- if self.target_language:
1449
- # Force language through multiple mechanisms
1450
- # 1. Use voice that's known to work well with the language
1451
- voice_map = {
1452
- "en": "nova", # Nova has clearer English
1453
- "es": "nova", # Nova works for Spanish
1454
- "fr": "shimmer", # Shimmer for French
1455
- "de": "echo", # Echo for German
1456
- "ja": "alloy", # Alloy can do Japanese
1457
- "zh": "alloy", # Alloy can do Chinese
1458
- "ko": "nova", # Nova for Korean
1459
- }
1460
- session_update["voice"] = voice_map.get(self.target_language, "nova")
1461
-
1462
- # 2. Add language to modalities (experimental)
1463
- session_update["modalities"] = ["text", "audio"]
1464
-
1465
- # 3. Set output format
1466
- session_update["output_audio_format"] = "pcm16"
1467
-
1468
- # 4. Add language hint to the system (if supported by API)
1469
- if self.target_language in ["en", "es", "fr", "de", "ja", "zh"]:
1470
- session_update["language"] = self.target_language # Try setting language directly
1471
-
1472
- print(f"[TRANSLATION MODE] Session update: {json.dumps(session_update, indent=2)}")
1473
-
1474
  await conn.session.update(session=session_update)
1475
  self.connection = conn
1476
- print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}")
1477
 
1478
  async for event in self.connection:
1479
  # Debug logging for function calls
@@ -1482,11 +1431,22 @@ RULES:
1482
 
1483
  if event.type == "response.audio_transcript.done":
1484
  print(f"[RESPONSE] Transcript: {event.transcript[:100]}...")
1485
- print(f"[RESPONSE] Expected language: {self.target_language}")
 
 
 
 
 
 
 
 
 
 
 
1486
 
1487
  output_data = {
1488
  "event": event,
1489
- "language": SUPPORTED_LANGUAGES.get(self.target_language, "") if self.target_language else ""
1490
  }
1491
  await self.output_queue.put(AdditionalOutputs(output_data))
1492
 
@@ -1547,9 +1507,8 @@ RULES:
1547
  self.current_call_id = None
1548
 
1549
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
1550
- # Normal mode - use Realtime API
1551
  if not self.connection:
1552
- print(f"[RECEIVE] No connection in normal mode, skipping")
1553
  return
1554
  try:
1555
  _, array = frame
@@ -1560,7 +1519,6 @@ RULES:
1560
  print(f"Error in receive: {e}")
1561
 
1562
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
1563
- # Normal mode
1564
  item = await wait_for_item(self.output_queue)
1565
 
1566
  # Check if it's a dict with text message
@@ -1573,11 +1531,10 @@ RULES:
1573
  async def shutdown(self) -> None:
1574
  print(f"[SHUTDOWN] Called")
1575
 
1576
- # Normal mode - close Realtime API connection
1577
  if self.connection:
1578
  await self.connection.close()
1579
  self.connection = None
1580
- print("[NORMAL MODE] Connection closed")
1581
 
1582
 
1583
  # Create initial handler instance
@@ -1604,6 +1561,12 @@ app = FastAPI()
1604
  # Mount stream
1605
  stream.mount(app)
1606
 
 
 
 
 
 
 
1607
  # Intercept offer to capture settings
1608
  @app.post("/webrtc/offer", include_in_schema=False)
1609
  async def custom_offer(request: Request):
@@ -1612,19 +1575,19 @@ async def custom_offer(request: Request):
1612
 
1613
  webrtc_id = body.get("webrtc_id")
1614
  web_search_enabled = body.get("web_search_enabled", False)
1615
- target_language = body.get("target_language", "")
1616
  system_prompt = body.get("system_prompt", "")
 
1617
 
1618
  print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
1619
  print(f"[OFFER] web_search_enabled: {web_search_enabled}")
1620
- print(f"[OFFER] target_language: {target_language}")
1621
 
1622
  # Store settings with timestamp
1623
  if webrtc_id:
1624
  connection_settings[webrtc_id] = {
1625
  'web_search_enabled': web_search_enabled,
1626
- 'target_language': target_language,
1627
  'system_prompt': system_prompt,
 
1628
  'timestamp': asyncio.get_event_loop().time()
1629
  }
1630
 
@@ -1651,6 +1614,43 @@ async def custom_offer(request: Request):
1651
  return response
1652
 
1653
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1654
  @app.post("/chat/text")
1655
  async def chat_text(request: Request):
1656
  """Handle text chat messages using GPT-4o-mini"""
@@ -1658,14 +1658,14 @@ async def chat_text(request: Request):
1658
  body = await request.json()
1659
  message = body.get("message", "")
1660
  web_search_enabled = body.get("web_search_enabled", False)
1661
- target_language = body.get("target_language", "")
1662
  system_prompt = body.get("system_prompt", "")
 
1663
 
1664
  if not message:
1665
  return {"error": "메시지가 비어있습니다."}
1666
 
1667
  # Process text chat
1668
- result = await process_text_chat(message, web_search_enabled, target_language, system_prompt)
1669
 
1670
  return result
1671
 
@@ -1708,7 +1708,7 @@ async def outputs(webrtc_id: str):
1708
  data = {
1709
  "role": "assistant",
1710
  "content": event_data['event'].transcript,
1711
- "language": event_data.get('language', '')
1712
  }
1713
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
1714
 
 
23
  import io
24
  from scipy import signal
25
  import wave
26
+ import aiosqlite
27
+ from langdetect import detect, LangDetectException
28
+ from datetime import datetime
29
+ import uuid
30
 
31
  load_dotenv()
32
 
33
  SAMPLE_RATE = 24000
34
+ DB_PATH = "chat_history.db"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # HTML content embedded as a string
37
  HTML_CONTENT = """<!DOCTYPE html>
 
171
  .toggle-switch.active .toggle-slider {
172
  transform: translateX(24px);
173
  }
174
+ /* History section */
175
+ .history-section {
176
  background-color: var(--card-bg);
177
+ border-radius: 12px;
178
+ padding: 20px;
179
  border: 1px solid var(--border-color);
180
+ margin-top: 20px;
181
+ max-height: 300px;
182
+ overflow-y: auto;
183
+ }
184
+ .history-item {
185
+ padding: 10px;
186
+ margin-bottom: 10px;
187
+ background-color: var(--dark-bg);
188
  border-radius: 6px;
 
189
  cursor: pointer;
190
+ transition: background-color 0.2s;
 
191
  }
192
+ .history-item:hover {
193
+ background-color: var(--hover-color);
194
+ }
195
+ .history-date {
196
+ font-size: 12px;
197
+ color: #888;
198
+ }
199
+ .history-preview {
200
+ font-size: 14px;
201
+ margin-top: 5px;
202
+ overflow: hidden;
203
+ text-overflow: ellipsis;
204
+ white-space: nowrap;
205
  }
206
  /* Text inputs */
207
  .text-input-section {
 
507
  <div class="main-content">
508
  <div class="sidebar">
509
  <div class="settings-section">
510
+ <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정</h3>
511
  <div class="settings-grid">
512
  <div class="setting-item">
513
  <span class="setting-label">웹 검색</span>
 
515
  <div class="toggle-slider"></div>
516
  </div>
517
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
518
  </div>
519
  <div class="text-input-section">
520
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
 
522
  </div>
523
  </div>
524
 
525
+ <div class="history-section">
526
+ <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화 기록</h3>
527
+ <div id="history-list"></div>
528
+ </div>
529
+
530
  <div class="controls">
531
  <button id="start-button">대화 시작</button>
532
  </div>
 
552
  let peerConnection;
553
  let webrtc_id;
554
  let webSearchEnabled = false;
 
555
  let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
556
+ let currentSessionId = null;
557
  const audioOutput = document.getElementById('audio-output');
558
  const startButton = document.getElementById('start-button');
559
  const sendButton = document.getElementById('send-button');
 
561
  const statusDot = document.getElementById('status-dot');
562
  const statusText = document.getElementById('status-text');
563
  const searchToggle = document.getElementById('search-toggle');
 
564
  const systemPromptInput = document.getElementById('system-prompt');
565
  const textInput = document.getElementById('text-input');
566
+ const historyList = document.getElementById('history-list');
567
  let audioLevel = 0;
568
  let animationFrame;
569
  let audioContext, analyser, audioSource;
570
  let dataChannel = null;
571
  let isVoiceActive = false;
572
 
573
+ // Start new session
574
+ async function startNewSession() {
575
+ const response = await fetch('/session/new', { method: 'POST' });
576
+ const data = await response.json();
577
+ currentSessionId = data.session_id;
578
+ console.log('New session started:', currentSessionId);
579
+ loadHistory();
580
+ }
581
+
582
+ // Load conversation history
583
+ async function loadHistory() {
584
+ try {
585
+ const response = await fetch('/history/recent');
586
+ const conversations = await response.json();
587
+
588
+ historyList.innerHTML = '';
589
+ conversations.forEach(conv => {
590
+ const item = document.createElement('div');
591
+ item.className = 'history-item';
592
+ item.innerHTML = `
593
+ <div class="history-date">${new Date(conv.created_at).toLocaleString()}</div>
594
+ <div class="history-preview">${conv.summary || '대화 시작'}</div>
595
+ `;
596
+ item.onclick = () => loadConversation(conv.id);
597
+ historyList.appendChild(item);
598
+ });
599
+ } catch (error) {
600
+ console.error('Failed to load history:', error);
601
+ }
602
+ }
603
+
604
+ // Load specific conversation
605
+ async function loadConversation(sessionId) {
606
+ try {
607
+ const response = await fetch(`/history/${sessionId}`);
608
+ const messages = await response.json();
609
+
610
+ chatMessages.innerHTML = '';
611
+ messages.forEach(msg => {
612
+ addMessage(msg.role, msg.content, false);
613
+ });
614
+ } catch (error) {
615
+ console.error('Failed to load conversation:', error);
616
+ }
617
+ }
618
+
619
  // Web search toggle functionality
620
  searchToggle.addEventListener('click', () => {
621
  webSearchEnabled = !webSearchEnabled;
 
623
  console.log('Web search enabled:', webSearchEnabled);
624
  });
625
 
 
 
 
 
 
 
626
  // System prompt update
627
  systemPromptInput.addEventListener('input', () => {
628
  systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
 
662
  body: JSON.stringify({
663
  message: message,
664
  web_search_enabled: webSearchEnabled,
665
+ system_prompt: systemPrompt,
666
+ session_id: currentSessionId
667
  })
668
  });
669
 
 
678
  } else {
679
  // Add assistant response
680
  let content = data.response;
681
+ if (data.detected_language) {
682
+ content += ` <span class="language-info">[${data.detected_language}]</span>`;
683
  }
684
  addMessage('assistant', content);
685
  }
 
847
  console.log('Sending offer with settings:', {
848
  webrtc_id: webrtc_id,
849
  web_search_enabled: webSearchEnabled,
850
+ system_prompt: systemPrompt,
851
+ session_id: currentSessionId
852
  });
853
 
854
  const response = await fetch('/webrtc/offer', {
 
859
  type: peerConnection.localDescription.type,
860
  webrtc_id: webrtc_id,
861
  web_search_enabled: webSearchEnabled,
862
+ system_prompt: systemPrompt,
863
+ session_id: currentSessionId
864
  })
865
  });
866
  const serverResponse = await response.json();
 
877
  const eventJson = JSON.parse(event.data);
878
  let content = eventJson.content;
879
 
880
+ if (eventJson.detected_language) {
881
+ content += ` <span class="language-info">[${eventJson.detected_language}]</span>`;
882
  }
883
  addMessage("assistant", content);
884
  });
 
895
  stop();
896
  }
897
  }
898
+ function addMessage(role, content, save = true) {
899
  const messageDiv = document.createElement('div');
900
  messageDiv.classList.add('message', role);
901
 
 
906
  }
907
  chatMessages.appendChild(messageDiv);
908
  chatMessages.scrollTop = chatMessages.scrollHeight;
909
+
910
+ // Save message to database if save flag is true
911
+ if (save && currentSessionId) {
912
+ fetch('/message/save', {
913
+ method: 'POST',
914
+ headers: { 'Content-Type': 'application/json' },
915
+ body: JSON.stringify({
916
+ session_id: currentSessionId,
917
+ role: role,
918
+ content: content
919
+ })
920
+ }).catch(error => console.error('Failed to save message:', error));
921
+ }
922
  }
923
 
924
  function stop() {
 
1011
  // Initialize on page load
1012
  window.addEventListener('DOMContentLoaded', () => {
1013
  sendButton.style.display = 'block';
1014
+ startNewSession();
1015
+ loadHistory();
1016
  });
1017
  </script>
1018
  </body>
 
1061
  return []
1062
 
1063
 
1064
+ # Database helper class
1065
+ class ChatDatabase:
1066
+ """Database manager for chat history"""
1067
+
1068
+ @staticmethod
1069
+ async def init():
1070
+ """Initialize database tables"""
1071
+ async with aiosqlite.connect(DB_PATH) as db:
1072
+ await db.execute("""
1073
+ CREATE TABLE IF NOT EXISTS conversations (
1074
+ id TEXT PRIMARY KEY,
1075
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1076
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1077
+ summary TEXT
1078
+ )
1079
+ """)
1080
+
1081
+ await db.execute("""
1082
+ CREATE TABLE IF NOT EXISTS messages (
1083
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1084
+ session_id TEXT NOT NULL,
1085
+ role TEXT NOT NULL,
1086
+ content TEXT NOT NULL,
1087
+ detected_language TEXT,
1088
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1089
+ FOREIGN KEY (session_id) REFERENCES conversations(id)
1090
+ )
1091
+ """)
1092
+
1093
+ await db.commit()
1094
+
1095
+ @staticmethod
1096
+ async def create_session(session_id: str):
1097
+ """Create a new conversation session"""
1098
+ async with aiosqlite.connect(DB_PATH) as db:
1099
+ await db.execute(
1100
+ "INSERT INTO conversations (id) VALUES (?)",
1101
+ (session_id,)
1102
+ )
1103
+ await db.commit()
1104
+
1105
+ @staticmethod
1106
+ async def save_message(session_id: str, role: str, content: str):
1107
+ """Save a message to the database"""
1108
+ # Detect language
1109
+ detected_language = None
1110
+ try:
1111
+ if content and len(content) > 10: # Only detect for substantial content
1112
+ detected_language = detect(content)
1113
+ except LangDetectException:
1114
+ pass
1115
+
1116
+ async with aiosqlite.connect(DB_PATH) as db:
1117
+ await db.execute(
1118
+ """INSERT INTO messages (session_id, role, content, detected_language)
1119
+ VALUES (?, ?, ?, ?)""",
1120
+ (session_id, role, content, detected_language)
1121
+ )
1122
+
1123
+ # Update conversation's updated_at timestamp
1124
+ await db.execute(
1125
+ "UPDATE conversations SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
1126
+ (session_id,)
1127
+ )
1128
+
1129
+ # Update conversation summary (use first user message as summary)
1130
+ if role == "user":
1131
+ cursor = await db.execute(
1132
+ "SELECT summary FROM conversations WHERE id = ?",
1133
+ (session_id,)
1134
+ )
1135
+ row = await cursor.fetchone()
1136
+ if row and not row[0]: # If no summary exists
1137
+ summary = content[:100] + "..." if len(content) > 100 else content
1138
+ await db.execute(
1139
+ "UPDATE conversations SET summary = ? WHERE id = ?",
1140
+ (summary, session_id)
1141
+ )
1142
+
1143
+ await db.commit()
1144
+
1145
+ @staticmethod
1146
+ async def get_recent_conversations(limit: int = 10):
1147
+ """Get recent conversations"""
1148
+ async with aiosqlite.connect(DB_PATH) as db:
1149
+ cursor = await db.execute(
1150
+ """SELECT id, created_at, summary
1151
+ FROM conversations
1152
+ ORDER BY updated_at DESC
1153
+ LIMIT ?""",
1154
+ (limit,)
1155
+ )
1156
+ rows = await cursor.fetchall()
1157
+ return [
1158
+ {
1159
+ "id": row[0],
1160
+ "created_at": row[1],
1161
+ "summary": row[2] or "새 대화"
1162
+ }
1163
+ for row in rows
1164
+ ]
1165
+
1166
+ @staticmethod
1167
+ async def get_conversation_messages(session_id: str):
1168
+ """Get all messages for a conversation"""
1169
+ async with aiosqlite.connect(DB_PATH) as db:
1170
+ cursor = await db.execute(
1171
+ """SELECT role, content, detected_language, timestamp
1172
+ FROM messages
1173
+ WHERE session_id = ?
1174
+ ORDER BY timestamp ASC""",
1175
+ (session_id,)
1176
+ )
1177
+ rows = await cursor.fetchall()
1178
+ return [
1179
+ {
1180
+ "role": row[0],
1181
+ "content": row[1],
1182
+ "detected_language": row[2],
1183
+ "timestamp": row[3]
1184
+ }
1185
+ for row in rows
1186
+ ]
1187
+
1188
+
1189
  # Initialize search client globally
1190
  brave_api_key = os.getenv("BSEARCH_API")
1191
  search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
 
1197
  # Initialize OpenAI client for text chat
1198
  client = openai.AsyncOpenAI()
1199
 
 
 
 
 
 
 
 
 
 
 
1200
 
1201
  def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
1202
  chatbot.append({"role": "assistant", "content": response.transcript})
1203
  return chatbot
1204
 
1205
 
1206
+ async def process_text_chat(message: str, web_search_enabled: bool, system_prompt: str, session_id: str) -> Dict[str, str]:
 
1207
  """Process text chat using GPT-4o-mini model"""
1208
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
  messages = [
1210
+ {"role": "system", "content": system_prompt or "You are a helpful assistant."}
1211
  ]
1212
 
1213
  # Handle web search if enabled
 
1227
  for i, result in enumerate(search_results[:5], 1):
1228
  search_context += f"{i}. {result['title']}\n{result['description']}\n\n"
1229
 
 
 
 
 
 
 
1230
  messages.append({
1231
  "role": "system",
1232
+ "content": "다음 검색 결과를 참고하여 답변하세요:\n\n" + search_context
1233
  })
1234
 
1235
+ messages.append({"role": "user", "content": message})
 
1236
 
1237
  # Call GPT-4o-mini
1238
  response = await client.chat.completions.create(
 
1244
 
1245
  response_text = response.choices[0].message.content
1246
 
1247
+ # Detect language
1248
+ detected_language = None
1249
+ try:
1250
+ if response_text and len(response_text) > 10:
1251
+ detected_language = detect(response_text)
1252
+ except:
1253
+ pass
 
 
 
 
 
 
 
1254
 
1255
+ # Save messages to database
1256
+ if session_id:
1257
+ await ChatDatabase.save_message(session_id, "user", message)
1258
+ await ChatDatabase.save_message(session_id, "assistant", response_text)
1259
 
1260
  return {
1261
  "response": response_text,
1262
+ "detected_language": detected_language
1263
  }
1264
 
1265
  except Exception as e:
 
1268
 
1269
 
1270
  class OpenAIHandler(AsyncStreamHandler):
1271
+ def __init__(self, web_search_enabled: bool = False, system_prompt: str = "",
1272
+ webrtc_id: str = None, session_id: str = None) -> None:
1273
  super().__init__(
1274
  expected_layout="mono",
1275
  output_sample_rate=SAMPLE_RATE,
 
1284
  self.current_call_id = None
1285
  self.webrtc_id = webrtc_id
1286
  self.web_search_enabled = web_search_enabled
 
1287
  self.system_prompt = system_prompt
1288
+ self.session_id = session_id
1289
 
1290
+ print(f"[INIT] Handler created with web_search={web_search_enabled}, session_id={session_id}")
 
1291
 
1292
  def copy(self):
1293
  # Get the most recent settings
 
1305
 
1306
  return OpenAIHandler(
1307
  web_search_enabled=settings.get('web_search_enabled', False),
 
1308
  system_prompt=settings.get('system_prompt', ''),
1309
+ webrtc_id=recent_id,
1310
+ session_id=settings.get('session_id')
1311
  )
1312
 
1313
  print(f"[COPY] No settings found, creating default handler")
 
1346
  )
1347
  await self.connection.response.create()
1348
 
 
 
 
 
 
 
 
 
 
 
 
 
1349
  async def start_up(self):
1350
  """Connect to realtime API"""
1351
  # First check if we have the most recent settings
 
1353
  if self.webrtc_id in connection_settings:
1354
  settings = connection_settings[self.webrtc_id]
1355
  self.web_search_enabled = settings.get('web_search_enabled', False)
 
1356
  self.system_prompt = settings.get('system_prompt', '')
1357
+ self.session_id = settings.get('session_id')
1358
 
1359
  print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
1360
 
 
 
1361
  self.client = openai.AsyncOpenAI()
1362
 
1363
+ # Connect to Realtime API
1364
+ print(f"[REALTIME API] Connecting...")
1365
 
1366
  # Define the web search function
1367
  tools = []
1368
  base_instructions = self.system_prompt or "You are a helpful assistant."
1369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1370
  if self.web_search_enabled and self.search_client:
1371
  tools = [{
1372
  "type": "function",
 
1401
  "than to guess or use outdated information."
1402
  )
1403
 
1404
+ instructions = base_instructions + search_instructions
 
 
 
 
 
1405
  else:
1406
+ instructions = base_instructions
 
 
 
 
 
 
 
 
 
1407
 
1408
  async with self.client.beta.realtime.connect(
1409
  model="gpt-4o-mini-realtime-preview-2024-12-17"
 
1417
  "temperature": 0.7,
1418
  "max_response_output_tokens": 4096,
1419
  "modalities": ["text", "audio"],
1420
+ "voice": "alloy"
1421
  }
1422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1423
  await conn.session.update(session=session_update)
1424
  self.connection = conn
1425
+ print(f"Connected with tools: {len(tools)} functions")
1426
 
1427
  async for event in self.connection:
1428
  # Debug logging for function calls
 
1431
 
1432
  if event.type == "response.audio_transcript.done":
1433
  print(f"[RESPONSE] Transcript: {event.transcript[:100]}...")
1434
+
1435
+ # Detect language
1436
+ detected_language = None
1437
+ try:
1438
+ if event.transcript and len(event.transcript) > 10:
1439
+ detected_language = detect(event.transcript)
1440
+ except:
1441
+ pass
1442
+
1443
+ # Save to database
1444
+ if self.session_id:
1445
+ await ChatDatabase.save_message(self.session_id, "assistant", event.transcript)
1446
 
1447
  output_data = {
1448
  "event": event,
1449
+ "detected_language": detected_language
1450
  }
1451
  await self.output_queue.put(AdditionalOutputs(output_data))
1452
 
 
1507
  self.current_call_id = None
1508
 
1509
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
 
1510
  if not self.connection:
1511
+ print(f"[RECEIVE] No connection, skipping")
1512
  return
1513
  try:
1514
  _, array = frame
 
1519
  print(f"Error in receive: {e}")
1520
 
1521
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
 
1522
  item = await wait_for_item(self.output_queue)
1523
 
1524
  # Check if it's a dict with text message
 
1531
  async def shutdown(self) -> None:
1532
  print(f"[SHUTDOWN] Called")
1533
 
 
1534
  if self.connection:
1535
  await self.connection.close()
1536
  self.connection = None
1537
+ print("[REALTIME API] Connection closed")
1538
 
1539
 
1540
  # Create initial handler instance
 
1561
  # Mount stream
1562
  stream.mount(app)
1563
 
1564
+ # Initialize database on startup
1565
+ @app.on_event("startup")
1566
+ async def startup_event():
1567
+ await ChatDatabase.init()
1568
+ print("Database initialized")
1569
+
1570
  # Intercept offer to capture settings
1571
  @app.post("/webrtc/offer", include_in_schema=False)
1572
  async def custom_offer(request: Request):
 
1575
 
1576
  webrtc_id = body.get("webrtc_id")
1577
  web_search_enabled = body.get("web_search_enabled", False)
 
1578
  system_prompt = body.get("system_prompt", "")
1579
+ session_id = body.get("session_id")
1580
 
1581
  print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
1582
  print(f"[OFFER] web_search_enabled: {web_search_enabled}")
1583
+ print(f"[OFFER] session_id: {session_id}")
1584
 
1585
  # Store settings with timestamp
1586
  if webrtc_id:
1587
  connection_settings[webrtc_id] = {
1588
  'web_search_enabled': web_search_enabled,
 
1589
  'system_prompt': system_prompt,
1590
+ 'session_id': session_id,
1591
  'timestamp': asyncio.get_event_loop().time()
1592
  }
1593
 
 
1614
  return response
1615
 
1616
 
1617
+ @app.post("/session/new")
1618
+ async def create_new_session():
1619
+ """Create a new chat session"""
1620
+ session_id = str(uuid.uuid4())
1621
+ await ChatDatabase.create_session(session_id)
1622
+ return {"session_id": session_id}
1623
+
1624
+
1625
+ @app.post("/message/save")
1626
+ async def save_message(request: Request):
1627
+ """Save a message to the database"""
1628
+ body = await request.json()
1629
+ session_id = body.get("session_id")
1630
+ role = body.get("role")
1631
+ content = body.get("content")
1632
+
1633
+ if not all([session_id, role, content]):
1634
+ return {"error": "Missing required fields"}
1635
+
1636
+ await ChatDatabase.save_message(session_id, role, content)
1637
+ return {"status": "ok"}
1638
+
1639
+
1640
+ @app.get("/history/recent")
1641
+ async def get_recent_history():
1642
+ """Get recent conversation history"""
1643
+ conversations = await ChatDatabase.get_recent_conversations()
1644
+ return conversations
1645
+
1646
+
1647
+ @app.get("/history/{session_id}")
1648
+ async def get_conversation(session_id: str):
1649
+ """Get messages for a specific conversation"""
1650
+ messages = await ChatDatabase.get_conversation_messages(session_id)
1651
+ return messages
1652
+
1653
+
1654
  @app.post("/chat/text")
1655
  async def chat_text(request: Request):
1656
  """Handle text chat messages using GPT-4o-mini"""
 
1658
  body = await request.json()
1659
  message = body.get("message", "")
1660
  web_search_enabled = body.get("web_search_enabled", False)
 
1661
  system_prompt = body.get("system_prompt", "")
1662
+ session_id = body.get("session_id")
1663
 
1664
  if not message:
1665
  return {"error": "메시지가 비어있습니다."}
1666
 
1667
  # Process text chat
1668
+ result = await process_text_chat(message, web_search_enabled, system_prompt, session_id)
1669
 
1670
  return result
1671
 
 
1708
  data = {
1709
  "role": "assistant",
1710
  "content": event_data['event'].transcript,
1711
+ "detected_language": event_data.get('detected_language')
1712
  }
1713
  yield f"event: output\ndata: {json.dumps(data)}\n\n"
1714