seawolf2357 commited on
Commit
4b7d6e9
·
verified ·
1 Parent(s): 6415ee8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +384 -322
app.py CHANGED
@@ -23,6 +23,11 @@ import gradio as gr
23
  import io
24
  from scipy import signal
25
  import wave
 
 
 
 
 
26
 
27
  load_dotenv()
28
 
@@ -61,6 +66,106 @@ SUPPORTED_LANGUAGES = {
61
  "ms": "Bahasa Melayu (Malay)"
62
  }
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # HTML content embedded as a string
65
  HTML_CONTENT = """<!DOCTYPE html>
66
  <html lang="ko">
@@ -147,6 +252,60 @@ HTML_CONTENT = """<!DOCTYPE html>
147
  font-size: 32px;
148
  letter-spacing: 1px;
149
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  /* Settings section */
151
  .settings-section {
152
  background-color: var(--card-bg);
@@ -518,8 +677,14 @@ HTML_CONTENT = """<!DOCTYPE html>
518
 
519
  <div class="main-content">
520
  <div class="sidebar">
 
 
 
 
 
 
521
  <div class="settings-section">
522
- <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정(텍스트 채팅에만 적용)</h3>
523
  <div class="settings-grid">
524
  <div class="setting-item">
525
  <span class="setting-label">웹 검색</span>
@@ -528,9 +693,15 @@ HTML_CONTENT = """<!DOCTYPE html>
528
  </div>
529
  </div>
530
  <div class="setting-item">
531
- <span class="setting-label">다국어 번역 채팅</span>
 
 
 
 
 
 
532
  <select id="language-select">
533
- <option value="">비활성화</option>
534
  <option value="ko">한국어 (Korean)</option>
535
  <option value="en">English</option>
536
  <option value="es">Español (Spanish)</option>
@@ -565,7 +736,7 @@ HTML_CONTENT = """<!DOCTYPE html>
565
  </div>
566
  <div class="text-input-section">
567
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
568
- <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
569
  </div>
570
  </div>
571
 
@@ -594,8 +765,11 @@ HTML_CONTENT = """<!DOCTYPE html>
594
  let peerConnection;
595
  let webrtc_id;
596
  let webSearchEnabled = false;
 
597
  let selectedLanguage = "";
598
- let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
 
 
599
  const audioOutput = document.getElementById('audio-output');
600
  const startButton = document.getElementById('start-button');
601
  const sendButton = document.getElementById('send-button');
@@ -603,15 +777,87 @@ HTML_CONTENT = """<!DOCTYPE html>
603
  const statusDot = document.getElementById('status-dot');
604
  const statusText = document.getElementById('status-text');
605
  const searchToggle = document.getElementById('search-toggle');
 
606
  const languageSelect = document.getElementById('language-select');
607
  const systemPromptInput = document.getElementById('system-prompt');
608
  const textInput = document.getElementById('text-input');
 
 
609
  let audioLevel = 0;
610
  let animationFrame;
611
  let audioContext, analyser, audioSource;
612
  let dataChannel = null;
613
  let isVoiceActive = false;
614
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
615
  // Web search toggle functionality
616
  searchToggle.addEventListener('click', () => {
617
  webSearchEnabled = !webSearchEnabled;
@@ -619,6 +865,14 @@ HTML_CONTENT = """<!DOCTYPE html>
619
  console.log('Web search enabled:', webSearchEnabled);
620
  });
621
 
 
 
 
 
 
 
 
 
622
  // Language selection
623
  languageSelect.addEventListener('change', () => {
624
  selectedLanguage = languageSelect.value;
@@ -627,7 +881,7 @@ HTML_CONTENT = """<!DOCTYPE html>
627
 
628
  // System prompt update
629
  systemPromptInput.addEventListener('input', () => {
630
- systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
631
  });
632
 
633
  // Text input handling
@@ -645,7 +899,7 @@ HTML_CONTENT = """<!DOCTYPE html>
645
  if (!message) return;
646
 
647
  // Add user message to chat
648
- addMessage('user', message);
649
  textInput.value = '';
650
 
651
  // Show sending indicator
@@ -664,8 +918,10 @@ HTML_CONTENT = """<!DOCTYPE html>
664
  body: JSON.stringify({
665
  message: message,
666
  web_search_enabled: webSearchEnabled,
667
- target_language: selectedLanguage,
668
- system_prompt: systemPrompt
 
 
669
  })
670
  });
671
 
@@ -680,10 +936,13 @@ HTML_CONTENT = """<!DOCTYPE html>
680
  } else {
681
  // Add assistant response
682
  let content = data.response;
683
- if (selectedLanguage && data.language) {
684
  content += ` <span class="language-info">[${data.language}]</span>`;
685
  }
686
- addMessage('assistant', content);
 
 
 
687
  }
688
  } catch (error) {
689
  console.error('Error sending text message:', error);
@@ -704,10 +963,11 @@ HTML_CONTENT = """<!DOCTYPE html>
704
  sendButton.style.display = 'none';
705
  } else {
706
  statusText.textContent = '연결 대기 중';
707
- sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
708
  isVoiceActive = false;
709
  }
710
  }
 
711
  function updateButtonState() {
712
  const button = document.getElementById('start-button');
713
  if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
@@ -737,6 +997,7 @@ HTML_CONTENT = """<!DOCTYPE html>
737
  updateStatus('disconnected');
738
  }
739
  }
 
740
  function setupAudioVisualization(stream) {
741
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
742
  analyser = audioContext.createAnalyser();
@@ -771,6 +1032,7 @@ HTML_CONTENT = """<!DOCTYPE html>
771
 
772
  updateAudioLevel();
773
  }
 
774
  function showError(message) {
775
  const toast = document.getElementById('error-toast');
776
  toast.textContent = message;
@@ -780,6 +1042,7 @@ HTML_CONTENT = """<!DOCTYPE html>
780
  toast.style.display = 'none';
781
  }, 5000);
782
  }
 
783
  async function setupWebRTC() {
784
  const config = __RTC_CONFIGURATION__;
785
  peerConnection = new RTCPeerConnection(config);
@@ -792,6 +1055,7 @@ HTML_CONTENT = """<!DOCTYPE html>
792
  toast.style.display = 'none';
793
  }, 5000);
794
  }, 5000);
 
795
  try {
796
  const stream = await navigator.mediaDevices.getUserMedia({
797
  audio: true
@@ -834,6 +1098,7 @@ HTML_CONTENT = """<!DOCTYPE html>
834
  peerConnection.addEventListener("icegatheringstatechange", checkState);
835
  }
836
  });
 
837
  peerConnection.addEventListener('connectionstatechange', () => {
838
  console.log('connectionstatechange', peerConnection.connectionState);
839
  if (peerConnection.connectionState === 'connected') {
@@ -843,15 +1108,8 @@ HTML_CONTENT = """<!DOCTYPE html>
843
  }
844
  updateButtonState();
845
  });
846
- webrtc_id = Math.random().toString(36).substring(7);
847
 
848
- // Log current settings before sending
849
- console.log('Sending offer with settings:', {
850
- webrtc_id: webrtc_id,
851
- web_search_enabled: webSearchEnabled,
852
- target_language: selectedLanguage,
853
- system_prompt: systemPrompt
854
- });
855
 
856
  const response = await fetch('/webrtc/offer', {
857
  method: 'POST',
@@ -861,10 +1119,13 @@ HTML_CONTENT = """<!DOCTYPE html>
861
  type: peerConnection.localDescription.type,
862
  webrtc_id: webrtc_id,
863
  web_search_enabled: webSearchEnabled,
864
- target_language: selectedLanguage,
865
- system_prompt: systemPrompt
 
 
866
  })
867
  });
 
868
  const serverResponse = await response.json();
869
  if (serverResponse.status === 'failed') {
870
  showError(serverResponse.meta.error === 'concurrency_limit_reached'
@@ -873,21 +1134,25 @@ HTML_CONTENT = """<!DOCTYPE html>
873
  stop();
874
  return;
875
  }
 
876
  await peerConnection.setRemoteDescription(serverResponse);
877
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
878
- eventSource.addEventListener("output", (event) => {
879
  const eventJson = JSON.parse(event.data);
880
  let content = eventJson.content;
881
 
882
- if (selectedLanguage && eventJson.language) {
883
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
884
  }
885
- addMessage("assistant", content);
 
 
 
886
  });
887
  eventSource.addEventListener("search", (event) => {
888
  const eventJson = JSON.parse(event.data);
889
  if (eventJson.query) {
890
- addMessage("search-result", `웹 검색 중: "${eventJson.query}"`);
891
  }
892
  });
893
  } catch (err) {
@@ -897,7 +1162,8 @@ HTML_CONTENT = """<!DOCTYPE html>
897
  stop();
898
  }
899
  }
900
- function addMessage(role, content) {
 
901
  const messageDiv = document.createElement('div');
902
  messageDiv.classList.add('message', role);
903
 
@@ -908,18 +1174,30 @@ HTML_CONTENT = """<!DOCTYPE html>
908
  }
909
  chatMessages.appendChild(messageDiv);
910
  chatMessages.scrollTop = chatMessages.scrollHeight;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
  }
912
 
913
  function stop() {
914
  console.log('[STOP] Stopping connection...');
915
 
916
- // Cancel animation frame first
917
  if (animationFrame) {
918
  cancelAnimationFrame(animationFrame);
919
  animationFrame = null;
920
  }
921
 
922
- // Close audio context
923
  if (audioContext) {
924
  audioContext.close();
925
  audioContext = null;
@@ -927,17 +1205,14 @@ HTML_CONTENT = """<!DOCTYPE html>
927
  audioSource = null;
928
  }
929
 
930
- // Close data channel
931
  if (dataChannel) {
932
  dataChannel.close();
933
  dataChannel = null;
934
  }
935
 
936
- // Close peer connection
937
  if (peerConnection) {
938
  console.log('[STOP] Current connection state:', peerConnection.connectionState);
939
 
940
- // Stop all transceivers
941
  if (peerConnection.getTransceivers) {
942
  peerConnection.getTransceivers().forEach(transceiver => {
943
  if (transceiver.stop) {
@@ -946,7 +1221,6 @@ HTML_CONTENT = """<!DOCTYPE html>
946
  });
947
  }
948
 
949
- // Stop all senders
950
  if (peerConnection.getSenders) {
951
  peerConnection.getSenders().forEach(sender => {
952
  if (sender.track) {
@@ -955,7 +1229,6 @@ HTML_CONTENT = """<!DOCTYPE html>
955
  });
956
  }
957
 
958
- // Stop all receivers
959
  if (peerConnection.getReceivers) {
960
  peerConnection.getReceivers().forEach(receiver => {
961
  if (receiver.track) {
@@ -964,28 +1237,22 @@ HTML_CONTENT = """<!DOCTYPE html>
964
  });
965
  }
966
 
967
- // Close the connection
968
  peerConnection.close();
969
-
970
- // Clear the reference
971
  peerConnection = null;
972
 
973
  console.log('[STOP] Connection closed');
974
  }
975
 
976
- // Reset audio level
977
  audioLevel = 0;
978
  isVoiceActive = false;
979
-
980
- // Update UI
981
  updateButtonState();
982
 
983
- // Clear any existing webrtc_id
984
  if (webrtc_id) {
985
  console.log('[STOP] Clearing webrtc_id:', webrtc_id);
986
  webrtc_id = null;
987
  }
988
  }
 
989
  startButton.addEventListener('click', () => {
990
  console.log('clicked');
991
  console.log(peerConnection, peerConnection?.connectionState);
@@ -996,11 +1263,6 @@ HTML_CONTENT = """<!DOCTYPE html>
996
  stop();
997
  }
998
  });
999
-
1000
- // Initialize on page load
1001
- window.addEventListener('DOMContentLoaded', () => {
1002
- sendButton.style.display = 'block';
1003
- });
1004
  </script>
1005
  </body>
1006
 
@@ -1051,7 +1313,6 @@ class BraveSearchClient:
1051
  # Initialize search client globally
1052
  brave_api_key = os.getenv("BSEARCH_API")
1053
  search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
1054
- print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
1055
 
1056
  # Store connection settings
1057
  connection_settings = {}
@@ -1076,32 +1337,25 @@ def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEve
1076
 
1077
 
1078
  async def process_text_chat(message: str, web_search_enabled: bool, target_language: str,
1079
- system_prompt: str) -> Dict[str, str]:
1080
  """Process text chat using GPT-4o-mini model"""
1081
  try:
1082
- # If target language is set, override system prompt completely
1083
- if target_language:
1084
- language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
1085
-
1086
- # Create system prompt in target language
1087
- if target_language == "en":
1088
- base_instructions = f"You are a helpful assistant. You speak ONLY English. Never use Korean or any other language. {system_prompt}"
1089
- user_prefix = "Please respond in English: "
1090
- elif target_language == "ja":
1091
- base_instructions = f"あなたは親切なアシスタントです。日本語のみを話します。韓国語や他の言語は絶対に使用しません。{system_prompt}"
1092
- user_prefix = "日本語で答えてください: "
1093
- elif target_language == "zh":
1094
- base_instructions = f"你是一个乐于助人的助手。你只说中文。绝不使用韩语或其他语言。{system_prompt}"
1095
- user_prefix = "请用中文回答: "
1096
- elif target_language == "es":
1097
- base_instructions = f"Eres un asistente útil. Solo hablas español. Nunca uses coreano u otros idiomas. {system_prompt}"
1098
- user_prefix = "Por favor responde en español: "
1099
  else:
1100
- base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}. {system_prompt}"
1101
- user_prefix = f"Please respond in {language_name}: "
 
 
 
 
 
 
1102
  else:
1103
- base_instructions = system_prompt or "You are a helpful assistant."
1104
- user_prefix = ""
1105
 
1106
  messages = [
1107
  {"role": "system", "content": base_instructions}
@@ -1109,7 +1363,6 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1109
 
1110
  # Handle web search if enabled
1111
  if web_search_enabled and search_client:
1112
- # Check if the message requires web search
1113
  search_keywords = ["날씨", "기온", "비", "눈", "뉴스", "소식", "현재", "최근",
1114
  "오늘", "지금", "가격", "환율", "주가", "weather", "news",
1115
  "current", "today", "price", "2024", "2025"]
@@ -1117,26 +1370,18 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1117
  should_search = any(keyword in message.lower() for keyword in search_keywords)
1118
 
1119
  if should_search:
1120
- # Perform web search
1121
  search_results = await search_client.search(message)
1122
  if search_results:
1123
  search_context = "웹 검색 결과:\n\n"
1124
  for i, result in enumerate(search_results[:5], 1):
1125
  search_context += f"{i}. {result['title']}\n{result['description']}\n\n"
1126
 
1127
- # Add search context in target language if set
1128
- if target_language:
1129
- search_instruction = f"Use this search information but respond in {SUPPORTED_LANGUAGES.get(target_language, target_language)} only: "
1130
- else:
1131
- search_instruction = "다음 웹 검색 결과를 참고하여 답변하세요: "
1132
-
1133
  messages.append({
1134
  "role": "system",
1135
- "content": search_instruction + "\n\n" + search_context
1136
  })
1137
 
1138
- # Add user message with language prefix
1139
- messages.append({"role": "user", "content": user_prefix + message})
1140
 
1141
  # Call GPT-4o-mini
1142
  response = await client.chat.completions.create(
@@ -1148,23 +1393,8 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1148
 
1149
  response_text = response.choices[0].message.content
1150
 
1151
- # Final check - remove any Korean if target language is not Korean
1152
- if target_language and target_language != "ko":
1153
- import re
1154
- if re.search(r'[가-힣]', response_text):
1155
- print(f"[TEXT CHAT] WARNING: Korean detected in response for {target_language}")
1156
- # Try again with stronger prompt
1157
- messages[-1] = {"role": "user", "content": f"ONLY {SUPPORTED_LANGUAGES.get(target_language, target_language)}, NO KOREAN: {message}"}
1158
- retry_response = await client.chat.completions.create(
1159
- model="gpt-4.1-mini",
1160
- messages=messages,
1161
- temperature=0.3,
1162
- max_tokens=2000
1163
- )
1164
- response_text = retry_response.choices[0].message.content
1165
-
1166
- print(f"[TEXT CHAT] Target language: {target_language}")
1167
- print(f"[TEXT CHAT] Response preview: {response_text[:100]}...")
1168
 
1169
  return {
1170
  "response": response_text,
@@ -1178,7 +1408,8 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1178
 
1179
  class OpenAIHandler(AsyncStreamHandler):
1180
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1181
- system_prompt: str = "", webrtc_id: str = None) -> None:
 
1182
  super().__init__(
1183
  expected_layout="mono",
1184
  output_sample_rate=SAMPLE_RATE,
@@ -1195,14 +1426,11 @@ class OpenAIHandler(AsyncStreamHandler):
1195
  self.web_search_enabled = web_search_enabled
1196
  self.target_language = target_language
1197
  self.system_prompt = system_prompt
1198
-
1199
- print(f"[INIT] Handler created with web_search={web_search_enabled}, "
1200
- f"target_language={target_language}")
1201
 
1202
  def copy(self):
1203
- # Get the most recent settings
1204
  if connection_settings:
1205
- # Get the most recent webrtc_id
1206
  recent_ids = sorted(connection_settings.keys(),
1207
  key=lambda k: connection_settings[k].get('timestamp', 0),
1208
  reverse=True)
@@ -1210,17 +1438,15 @@ class OpenAIHandler(AsyncStreamHandler):
1210
  recent_id = recent_ids[0]
1211
  settings = connection_settings[recent_id]
1212
 
1213
- # Log the settings being copied
1214
- print(f"[COPY] Copying settings from {recent_id}:")
1215
-
1216
  return OpenAIHandler(
1217
  web_search_enabled=settings.get('web_search_enabled', False),
1218
  target_language=settings.get('target_language', ''),
1219
  system_prompt=settings.get('system_prompt', ''),
1220
- webrtc_id=recent_id
 
 
1221
  )
1222
 
1223
- print(f"[COPY] No settings found, creating default handler")
1224
  return OpenAIHandler(web_search_enabled=False)
1225
 
1226
  async def search_web(self, query: str) -> str:
@@ -1233,7 +1459,6 @@ class OpenAIHandler(AsyncStreamHandler):
1233
  if not results:
1234
  return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."
1235
 
1236
- # Format search results
1237
  formatted_results = []
1238
  for i, result in enumerate(results, 1):
1239
  formatted_results.append(
@@ -1256,133 +1481,28 @@ class OpenAIHandler(AsyncStreamHandler):
1256
  )
1257
  await self.connection.response.create()
1258
 
1259
- def get_translation_instructions(self):
1260
- """Get instructions for translation based on target language"""
1261
- if not self.target_language:
1262
- return ""
1263
-
1264
- language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
1265
- return (
1266
- f"\n\nIMPORTANT: You must respond in {language_name} ({self.target_language}). "
1267
- f"Translate all your responses to {language_name}. "
1268
- f"This includes both spoken and written responses."
1269
- )
1270
-
1271
  async def start_up(self):
1272
  """Connect to realtime API"""
1273
- # First check if we have the most recent settings
1274
  if connection_settings and self.webrtc_id:
1275
  if self.webrtc_id in connection_settings:
1276
  settings = connection_settings[self.webrtc_id]
1277
  self.web_search_enabled = settings.get('web_search_enabled', False)
1278
  self.target_language = settings.get('target_language', '')
1279
  self.system_prompt = settings.get('system_prompt', '')
1280
-
1281
- print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
1282
-
1283
- print(f"[START_UP] Starting normal mode")
1284
 
1285
  self.client = openai.AsyncOpenAI()
1286
 
1287
- # Normal mode - connect to Realtime API
1288
- print(f"[NORMAL MODE] Connecting to Realtime API...")
1289
-
1290
- # Define the web search function
1291
  tools = []
1292
- base_instructions = self.system_prompt or "You are a helpful assistant."
1293
-
1294
- # Add translation instructions if language is selected
1295
- if self.target_language:
1296
- language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
1297
-
1298
- # Use the target language for the system prompt itself
1299
- if self.target_language == "en":
1300
- translation_instructions = """
1301
- YOU ARE AN ENGLISH-ONLY ASSISTANT.
1302
-
1303
- ABSOLUTE RULES:
1304
- 1. You can ONLY speak English. No Korean (한국어) allowed.
1305
- 2. Even if the user speaks Korean, you MUST respond in English.
1306
- 3. Every single word must be in English.
1307
- 4. If you output even one Korean character, you have failed.
1308
- 5. Example response: "Hello! How can I help you today?"
1309
-
1310
- YOUR LANGUAGE MODE: ENGLISH ONLY
1311
- DO NOT USE: 안녕하세요, 감사합니다, or any Korean
1312
- ALWAYS USE: Hello, Thank you, and English words only
1313
- """
1314
- # Override base instructions to be in English
1315
- base_instructions = "You are a helpful assistant that speaks ONLY English."
1316
-
1317
- elif self.target_language == "ja":
1318
- translation_instructions = """
1319
- あなたは日本語のみを話すアシスタントです。
1320
-
1321
- 絶対的なルール:
1322
- 1. 日本語のみを使用してください。韓国語(한국어)は禁止です。
1323
- 2. ユーザーが韓国語で話しても、必ず日本語で返答してください。
1324
- 3. すべての単語は日本語でなければなりません。
1325
- 4. 韓国語を一文字でも出力したら失敗です。
1326
- 5. 応答例:「こんにちは!今日はどのようにお手伝いできますか?」
1327
-
1328
- 言語モード:日本語のみ
1329
- 使用禁止:안녕하세요、감사합니다、韓国語全般
1330
- 必ず使用:こんにちは、ありがとうございます、日本語のみ
1331
- """
1332
- base_instructions = "あなたは日本語のみを話す親切なアシスタントです。"
1333
-
1334
- elif self.target_language == "zh":
1335
- translation_instructions = """
1336
- 你是一个只说中文的助手。
1337
-
1338
- 绝对规则:
1339
- 1. 只能使用中文。禁止使用韩语(한국어)。
1340
- 2. 即使用户说韩语,也必须用中文回复。
1341
- 3. 每个字都必须是中文。
1342
- 4. 如果输出任何韩语字符,就是失败。
1343
- 5. 回复示例:"你好!我今天能为您做什么?"
1344
-
1345
- 语言模式:仅中文
1346
- 禁止使用:안녕하세요、감사합니다、任何韩语
1347
- 必须使用:你好、谢谢、只用中文
1348
- """
1349
- base_instructions = "你是一个只说中文的友好助手。"
1350
-
1351
- elif self.target_language == "es":
1352
- translation_instructions = """
1353
- ERES UN ASISTENTE QUE SOLO HABLA ESPAÑOL.
1354
-
1355
- REGLAS ABSOLUTAS:
1356
- 1. Solo puedes hablar español. No se permite coreano (한국어).
1357
- 2. Incluso si el usuario habla coreano, DEBES responder en español.
1358
- 3. Cada palabra debe estar en español.
1359
- 4. Si produces aunque sea un carácter coreano, has fallado.
1360
- 5. Respuesta ejemplo: "¡Hola! ¿Cómo puedo ayudarte hoy?"
1361
-
1362
- MODO DE IDIOMA: SOLO ESPAÑOL
1363
- NO USAR: 안녕하세요, 감사합니다, o cualquier coreano
1364
- SIEMPRE USAR: Hola, Gracias, y solo palabras en español
1365
- """
1366
- base_instructions = "Eres un asistente útil que habla SOLO español."
1367
- else:
1368
- translation_instructions = f"""
1369
- YOU MUST ONLY SPEAK {language_name.upper()}.
1370
-
1371
- RULES:
1372
- 1. Output only in {language_name}
1373
- 2. Never use Korean
1374
- 3. Always respond in {language_name}
1375
- """
1376
- base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}."
1377
- else:
1378
- translation_instructions = ""
1379
 
1380
  if self.web_search_enabled and self.search_client:
1381
  tools = [{
1382
  "type": "function",
1383
  "function": {
1384
  "name": "web_search",
1385
- "description": "Search the web for current information. Use this for weather, news, prices, current events, or any time-sensitive topics.",
1386
  "parameters": {
1387
  "type": "object",
1388
  "properties": {
@@ -1395,94 +1515,29 @@ RULES:
1395
  }
1396
  }
1397
  }]
1398
- print("Web search function added to tools")
1399
-
1400
- search_instructions = (
1401
- "\n\nYou have web search capabilities. "
1402
- "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
1403
- "- Weather (날씨, 기온, 비, 눈)\n"
1404
- "- News (뉴스, 소식)\n"
1405
- "- Current events (현재, 최근, 오늘, 지금)\n"
1406
- "- Prices (가격, 환율, 주가)\n"
1407
- "- Sports scores or results\n"
1408
- "- Any question about 2024 or 2025\n"
1409
- "- Any time-sensitive information\n\n"
1410
- "When in doubt, USE web_search. It's better to search and provide accurate information "
1411
- "than to guess or use outdated information."
1412
- )
1413
-
1414
- # Combine all instructions
1415
- if translation_instructions:
1416
- # Translation instructions already include base_instructions
1417
- instructions = translation_instructions + search_instructions
1418
- else:
1419
- instructions = base_instructions + search_instructions
1420
- else:
1421
- # No web search
1422
- if translation_instructions:
1423
- instructions = translation_instructions
1424
- else:
1425
- instructions = base_instructions
1426
-
1427
- print(f"[NORMAL MODE] Base instructions: {base_instructions[:100]}...")
1428
- print(f"[NORMAL MODE] Translation instructions: {translation_instructions[:200] if translation_instructions else 'None'}...")
1429
- print(f"[NORMAL MODE] Combined instructions length: {len(instructions)}")
1430
- print(f"[NORMAL MODE] Target language: {self.target_language}")
1431
 
1432
  async with self.client.beta.realtime.connect(
1433
  model="gpt-4o-mini-realtime-preview-2024-12-17"
1434
  ) as conn:
1435
- # Update session with tools
1436
  session_update = {
1437
  "turn_detection": {"type": "server_vad"},
1438
- "instructions": instructions,
1439
  "tools": tools,
1440
  "tool_choice": "auto" if tools else "none",
1441
  "temperature": 0.7,
1442
  "max_response_output_tokens": 4096,
1443
  "modalities": ["text", "audio"],
1444
- "voice": "alloy" # Default voice
1445
  }
1446
 
1447
- # Use appropriate voice for the language
1448
- if self.target_language:
1449
- # Force language through multiple mechanisms
1450
- # 1. Use voice that's known to work well with the language
1451
- voice_map = {
1452
- "en": "nova", # Nova has clearer English
1453
- "es": "nova", # Nova works for Spanish
1454
- "fr": "shimmer", # Shimmer for French
1455
- "de": "echo", # Echo for German
1456
- "ja": "alloy", # Alloy can do Japanese
1457
- "zh": "alloy", # Alloy can do Chinese
1458
- "ko": "nova", # Nova for Korean
1459
- }
1460
- session_update["voice"] = voice_map.get(self.target_language, "nova")
1461
-
1462
- # 2. Add language to modalities (experimental)
1463
- session_update["modalities"] = ["text", "audio"]
1464
-
1465
- # 3. Set output format
1466
- session_update["output_audio_format"] = "pcm16"
1467
-
1468
- # 4. Add language hint to the system (if supported by API)
1469
- if self.target_language in ["en", "es", "fr", "de", "ja", "zh"]:
1470
- session_update["language"] = self.target_language # Try setting language directly
1471
-
1472
- print(f"[TRANSLATION MODE] Session update: {json.dumps(session_update, indent=2)}")
1473
-
1474
  await conn.session.update(session=session_update)
1475
  self.connection = conn
1476
- print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}")
1477
 
1478
  async for event in self.connection:
1479
- # Debug logging for function calls
1480
- if event.type.startswith("response.function_call"):
1481
- print(f"Function event: {event.type}")
1482
-
1483
  if event.type == "response.audio_transcript.done":
1484
- print(f"[RESPONSE] Transcript: {event.transcript[:100]}...")
1485
- print(f"[RESPONSE] Expected language: {self.target_language}")
 
1486
 
1487
  output_data = {
1488
  "event": event,
@@ -1502,7 +1557,6 @@ RULES:
1502
 
1503
  # Handle function calls
1504
  elif event.type == "response.function_call_arguments.start":
1505
- print(f"Function call started")
1506
  self.function_call_in_progress = True
1507
  self.current_function_args = ""
1508
  self.current_call_id = getattr(event, 'call_id', None)
@@ -1513,22 +1567,17 @@ RULES:
1513
 
1514
  elif event.type == "response.function_call_arguments.done":
1515
  if self.function_call_in_progress:
1516
- print(f"Function call done, args: {self.current_function_args}")
1517
  try:
1518
  args = json.loads(self.current_function_args)
1519
  query = args.get("query", "")
1520
 
1521
- # Emit search event to client
1522
  await self.output_queue.put(AdditionalOutputs({
1523
  "type": "search",
1524
  "query": query
1525
  }))
1526
 
1527
- # Perform the search
1528
  search_results = await self.search_web(query)
1529
- print(f"Search results length: {len(search_results)}")
1530
 
1531
- # Send function result back to the model
1532
  if self.connection and self.current_call_id:
1533
  await self.connection.conversation.item.create(
1534
  item={
@@ -1547,9 +1596,7 @@ RULES:
1547
  self.current_call_id = None
1548
 
1549
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
1550
- # Normal mode - use Realtime API
1551
  if not self.connection:
1552
- print(f"[RECEIVE] No connection in normal mode, skipping")
1553
  return
1554
  try:
1555
  _, array = frame
@@ -1560,10 +1607,8 @@ RULES:
1560
  print(f"Error in receive: {e}")
1561
 
1562
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
1563
- # Normal mode
1564
  item = await wait_for_item(self.output_queue)
1565
 
1566
- # Check if it's a dict with text message
1567
  if isinstance(item, dict) and item.get('type') == 'text_message':
1568
  await self.process_text_message(item['content'])
1569
  return None
@@ -1571,13 +1616,9 @@ RULES:
1571
  return item
1572
 
1573
  async def shutdown(self) -> None:
1574
- print(f"[SHUTDOWN] Called")
1575
-
1576
- # Normal mode - close Realtime API connection
1577
  if self.connection:
1578
  await self.connection.close()
1579
  self.connection = None
1580
- print("[NORMAL MODE] Connection closed")
1581
 
1582
 
1583
  # Create initial handler instance
@@ -1588,7 +1629,7 @@ chatbot = gr.Chatbot(type="messages")
1588
 
1589
  # Create stream with handler instance
1590
  stream = Stream(
1591
- handler, # Pass instance, not factory
1592
  mode="send-receive",
1593
  modality="audio",
1594
  additional_inputs=[chatbot],
@@ -1601,6 +1642,11 @@ stream = Stream(
1601
 
1602
  app = FastAPI()
1603
 
 
 
 
 
 
1604
  # Mount stream
1605
  stream.mount(app)
1606
 
@@ -1614,22 +1660,18 @@ async def custom_offer(request: Request):
1614
  web_search_enabled = body.get("web_search_enabled", False)
1615
  target_language = body.get("target_language", "")
1616
  system_prompt = body.get("system_prompt", "")
 
 
1617
 
1618
- print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
1619
- print(f"[OFFER] web_search_enabled: {web_search_enabled}")
1620
- print(f"[OFFER] target_language: {target_language}")
1621
-
1622
- # Store settings with timestamp
1623
  if webrtc_id:
1624
  connection_settings[webrtc_id] = {
1625
  'web_search_enabled': web_search_enabled,
1626
  'target_language': target_language,
1627
  'system_prompt': system_prompt,
 
 
1628
  'timestamp': asyncio.get_event_loop().time()
1629
  }
1630
-
1631
- print(f"[OFFER] Stored settings for {webrtc_id}:")
1632
- print(f"[OFFER] {connection_settings[webrtc_id]}")
1633
 
1634
  # Remove our custom route temporarily
1635
  custom_route = None
@@ -1638,16 +1680,11 @@ async def custom_offer(request: Request):
1638
  custom_route = app.routes.pop(i)
1639
  break
1640
 
1641
- # Forward to stream's offer handler
1642
- print(f"[OFFER] Forwarding to stream.offer()")
1643
  response = await stream.offer(body)
1644
 
1645
- # Re-add our custom route
1646
  if custom_route:
1647
  app.routes.insert(0, custom_route)
1648
 
1649
- print(f"[OFFER] Response status: {response.get('status', 'unknown') if isinstance(response, dict) else 'OK'}")
1650
-
1651
  return response
1652
 
1653
 
@@ -1660,12 +1697,14 @@ async def chat_text(request: Request):
1660
  web_search_enabled = body.get("web_search_enabled", False)
1661
  target_language = body.get("target_language", "")
1662
  system_prompt = body.get("system_prompt", "")
 
 
1663
 
1664
  if not message:
1665
  return {"error": "메시지가 비어있습니다."}
1666
 
1667
- # Process text chat
1668
- result = await process_text_chat(message, web_search_enabled, target_language, system_prompt)
1669
 
1670
  return result
1671
 
@@ -1674,16 +1713,41 @@ async def chat_text(request: Request):
1674
  return {"error": "채팅 처리 중 오류가 발생했습니다."}
1675
 
1676
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1677
  @app.post("/text_message/{webrtc_id}")
1678
  async def receive_text_message(webrtc_id: str, request: Request):
1679
  """Receive text message from client"""
1680
  body = await request.json()
1681
  message = body.get("content", "")
1682
 
1683
- # Find the handler for this connection
1684
  if webrtc_id in stream.handlers:
1685
  handler = stream.handlers[webrtc_id]
1686
- # Queue the text message for processing
1687
  await handler.output_queue.put({
1688
  'type': 'text_message',
1689
  'content': message
@@ -1698,10 +1762,8 @@ async def outputs(webrtc_id: str):
1698
  async def output_stream():
1699
  async for output in stream.output_stream(webrtc_id):
1700
  if hasattr(output, 'args') and output.args:
1701
- # Check if it's a search event
1702
  if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
1703
  yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
1704
- # Regular transcript event with language info
1705
  elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
1706
  event_data = output.args[0]
1707
  if 'event' in event_data and hasattr(event_data['event'], 'transcript'):
 
23
  import io
24
  from scipy import signal
25
  import wave
26
+ from datetime import datetime
27
+ import sqlite3
28
+ import aiosqlite
29
+ from langdetect import detect, LangDetectException
30
+ import uuid
31
 
32
  load_dotenv()
33
 
 
66
  "ms": "Bahasa Melayu (Malay)"
67
  }
68
 
69
+ # Database setup
70
+ DB_PATH = "chat_history.db"
71
+
72
+ async def init_db():
73
+ """Initialize the SQLite database"""
74
+ async with aiosqlite.connect(DB_PATH) as db:
75
+ await db.execute("""
76
+ CREATE TABLE IF NOT EXISTS conversations (
77
+ id TEXT PRIMARY KEY,
78
+ title TEXT NOT NULL,
79
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
80
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
81
+ )
82
+ """)
83
+
84
+ await db.execute("""
85
+ CREATE TABLE IF NOT EXISTS messages (
86
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
87
+ conversation_id TEXT NOT NULL,
88
+ role TEXT NOT NULL,
89
+ content TEXT NOT NULL,
90
+ language TEXT,
91
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
92
+ FOREIGN KEY (conversation_id) REFERENCES conversations (id)
93
+ )
94
+ """)
95
+
96
+ await db.commit()
97
+
98
+ async def save_message(conversation_id: str, role: str, content: str, language: str = None):
99
+ """Save a message to the database"""
100
+ async with aiosqlite.connect(DB_PATH) as db:
101
+ # Check if conversation exists
102
+ cursor = await db.execute(
103
+ "SELECT id FROM conversations WHERE id = ?",
104
+ (conversation_id,)
105
+ )
106
+ exists = await cursor.fetchone()
107
+
108
+ if not exists:
109
+ # Create new conversation
110
+ title = content[:50] + "..." if len(content) > 50 else content
111
+ await db.execute(
112
+ "INSERT INTO conversations (id, title) VALUES (?, ?)",
113
+ (conversation_id, title)
114
+ )
115
+ else:
116
+ # Update conversation timestamp
117
+ await db.execute(
118
+ "UPDATE conversations SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
119
+ (conversation_id,)
120
+ )
121
+
122
+ # Insert message
123
+ await db.execute(
124
+ "INSERT INTO messages (conversation_id, role, content, language) VALUES (?, ?, ?, ?)",
125
+ (conversation_id, role, content, language)
126
+ )
127
+
128
+ await db.commit()
129
+
130
+ async def get_conversations():
131
+ """Get all conversations"""
132
+ async with aiosqlite.connect(DB_PATH) as db:
133
+ db.row_factory = aiosqlite.Row
134
+ cursor = await db.execute(
135
+ "SELECT * FROM conversations ORDER BY updated_at DESC"
136
+ )
137
+ conversations = await cursor.fetchall()
138
+ return [dict(conv) for conv in conversations]
139
+
140
+ async def get_conversation_messages(conversation_id: str):
141
+ """Get all messages for a conversation"""
142
+ async with aiosqlite.connect(DB_PATH) as db:
143
+ db.row_factory = aiosqlite.Row
144
+ cursor = await db.execute(
145
+ "SELECT * FROM messages WHERE conversation_id = ? ORDER BY created_at",
146
+ (conversation_id,)
147
+ )
148
+ messages = await cursor.fetchall()
149
+ return [dict(msg) for msg in messages]
150
+
151
+ def detect_language(text: str) -> str:
152
+ """Detect the language of the input text"""
153
+ try:
154
+ lang = detect(text)
155
+ # Map detected language to our supported languages
156
+ if lang == 'ko':
157
+ return 'ko'
158
+ elif lang == 'en':
159
+ return 'en'
160
+ elif lang in SUPPORTED_LANGUAGES:
161
+ return lang
162
+ else:
163
+ # Default to Korean if unsupported language
164
+ return 'ko'
165
+ except LangDetectException:
166
+ # Default to Korean if detection fails
167
+ return 'ko'
168
+
169
  # HTML content embedded as a string
170
  HTML_CONTENT = """<!DOCTYPE html>
171
  <html lang="ko">
 
252
  font-size: 32px;
253
  letter-spacing: 1px;
254
  }
255
+ /* History section */
256
+ .history-section {
257
+ background-color: var(--card-bg);
258
+ border-radius: 12px;
259
+ padding: 20px;
260
+ border: 1px solid var(--border-color);
261
+ overflow-y: auto;
262
+ flex-shrink: 0;
263
+ max-height: 300px;
264
+ }
265
+ .history-item {
266
+ padding: 10px;
267
+ margin-bottom: 8px;
268
+ background-color: var(--dark-bg);
269
+ border-radius: 6px;
270
+ cursor: pointer;
271
+ transition: all 0.2s;
272
+ display: flex;
273
+ justify-content: space-between;
274
+ align-items: center;
275
+ }
276
+ .history-item:hover {
277
+ background-color: var(--hover-color);
278
+ transform: translateX(5px);
279
+ }
280
+ .history-item-title {
281
+ font-size: 14px;
282
+ color: var(--text-color);
283
+ overflow: hidden;
284
+ text-overflow: ellipsis;
285
+ white-space: nowrap;
286
+ flex-grow: 1;
287
+ }
288
+ .history-item-date {
289
+ font-size: 12px;
290
+ color: #888;
291
+ margin-left: 10px;
292
+ }
293
+ .new-chat-button {
294
+ width: 100%;
295
+ background: linear-gradient(135deg, #2ecc71, #27ae60);
296
+ color: white;
297
+ border: none;
298
+ padding: 10px 20px;
299
+ font-size: 14px;
300
+ border-radius: 6px;
301
+ cursor: pointer;
302
+ margin-bottom: 10px;
303
+ transition: all 0.3s;
304
+ }
305
+ .new-chat-button:hover {
306
+ background: linear-gradient(135deg, #27ae60, #229954);
307
+ transform: translateY(-2px);
308
+ }
309
  /* Settings section */
310
  .settings-section {
311
  background-color: var(--card-bg);
 
677
 
678
  <div class="main-content">
679
  <div class="sidebar">
680
+ <div class="history-section">
681
+ <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화 기록</h3>
682
+ <button class="new-chat-button" onclick="startNewChat()">+ 새 대화</button>
683
+ <div id="history-list"></div>
684
+ </div>
685
+
686
  <div class="settings-section">
687
+ <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정</h3>
688
  <div class="settings-grid">
689
  <div class="setting-item">
690
  <span class="setting-label">웹 검색</span>
 
693
  </div>
694
  </div>
695
  <div class="setting-item">
696
+ <span class="setting-label">자동 언어 감지</span>
697
+ <div id="auto-lang-toggle" class="toggle-switch active">
698
+ <div class="toggle-slider"></div>
699
+ </div>
700
+ </div>
701
+ <div class="setting-item">
702
+ <span class="setting-label">번역 언어</span>
703
  <select id="language-select">
704
+ <option value="">자동 감지</option>
705
  <option value="ko">한국어 (Korean)</option>
706
  <option value="en">English</option>
707
  <option value="es">Español (Spanish)</option>
 
736
  </div>
737
  <div class="text-input-section">
738
  <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
739
+ <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">당신은 친절하고 도움이 되는 AI 어시스턴트입니다. 사용자의 요청에 정확하고 유용한 답변을 제공합니다.</textarea>
740
  </div>
741
  </div>
742
 
 
765
  let peerConnection;
766
  let webrtc_id;
767
  let webSearchEnabled = false;
768
+ let autoLanguageDetection = true;
769
  let selectedLanguage = "";
770
+ let systemPrompt = "당신은 친절하고 도움이 되는 AI 어시스턴트입니다. 사용자의 요청에 정확하고 유용한 답변을 제공합니다.";
771
+ let currentConversationId = null;
772
+
773
  const audioOutput = document.getElementById('audio-output');
774
  const startButton = document.getElementById('start-button');
775
  const sendButton = document.getElementById('send-button');
 
777
  const statusDot = document.getElementById('status-dot');
778
  const statusText = document.getElementById('status-text');
779
  const searchToggle = document.getElementById('search-toggle');
780
+ const autoLangToggle = document.getElementById('auto-lang-toggle');
781
  const languageSelect = document.getElementById('language-select');
782
  const systemPromptInput = document.getElementById('system-prompt');
783
  const textInput = document.getElementById('text-input');
784
+ const historyList = document.getElementById('history-list');
785
+
786
  let audioLevel = 0;
787
  let animationFrame;
788
  let audioContext, analyser, audioSource;
789
  let dataChannel = null;
790
  let isVoiceActive = false;
791
 
792
+ // Initialize
793
+ window.addEventListener('DOMContentLoaded', async () => {
794
+ sendButton.style.display = 'block';
795
+ await loadHistory();
796
+ startNewChat();
797
+ });
798
+
799
+ // Start new chat
800
+ function startNewChat() {
801
+ currentConversationId = generateUUID();
802
+ chatMessages.innerHTML = '';
803
+ console.log('Started new conversation:', currentConversationId);
804
+ }
805
+
806
+ // Generate UUID
807
+ function generateUUID() {
808
+ return 'xxxx-xxxx-4xxx-yxxx-xxxx'.replace(/[xy]/g, function(c) {
809
+ const r = Math.random() * 16 | 0;
810
+ const v = c === 'x' ? r : (r & 0x3 | 0x8);
811
+ return v.toString(16);
812
+ });
813
+ }
814
+
815
+ // Load conversation history
816
+ async function loadHistory() {
817
+ try {
818
+ const response = await fetch('/conversations');
819
+ const conversations = await response.json();
820
+
821
+ historyList.innerHTML = '';
822
+ conversations.forEach(conv => {
823
+ const item = document.createElement('div');
824
+ item.className = 'history-item';
825
+ item.onclick = () => loadConversation(conv.id);
826
+
827
+ const title = document.createElement('div');
828
+ title.className = 'history-item-title';
829
+ title.textContent = conv.title;
830
+
831
+ const date = document.createElement('div');
832
+ date.className = 'history-item-date';
833
+ date.textContent = new Date(conv.updated_at).toLocaleDateString('ko-KR');
834
+
835
+ item.appendChild(title);
836
+ item.appendChild(date);
837
+ historyList.appendChild(item);
838
+ });
839
+ } catch (error) {
840
+ console.error('Failed to load history:', error);
841
+ }
842
+ }
843
+
844
+ // Load a specific conversation
845
+ async function loadConversation(conversationId) {
846
+ try {
847
+ const response = await fetch(`/conversation/${conversationId}`);
848
+ const messages = await response.json();
849
+
850
+ currentConversationId = conversationId;
851
+ chatMessages.innerHTML = '';
852
+
853
+ messages.forEach(msg => {
854
+ addMessage(msg.role, msg.content, false);
855
+ });
856
+ } catch (error) {
857
+ console.error('Failed to load conversation:', error);
858
+ }
859
+ }
860
+
861
  // Web search toggle functionality
862
  searchToggle.addEventListener('click', () => {
863
  webSearchEnabled = !webSearchEnabled;
 
865
  console.log('Web search enabled:', webSearchEnabled);
866
  });
867
 
868
+ // Auto language detection toggle
869
+ autoLangToggle.addEventListener('click', () => {
870
+ autoLanguageDetection = !autoLanguageDetection;
871
+ autoLangToggle.classList.toggle('active', autoLanguageDetection);
872
+ languageSelect.disabled = autoLanguageDetection;
873
+ console.log('Auto language detection:', autoLanguageDetection);
874
+ });
875
+
876
  // Language selection
877
  languageSelect.addEventListener('change', () => {
878
  selectedLanguage = languageSelect.value;
 
881
 
882
  // System prompt update
883
  systemPromptInput.addEventListener('input', () => {
884
+ systemPrompt = systemPromptInput.value || "당신은 친절하고 도움이 되는 AI 어시스턴트입니다.";
885
  });
886
 
887
  // Text input handling
 
899
  if (!message) return;
900
 
901
  // Add user message to chat
902
+ addMessage('user', message, true);
903
  textInput.value = '';
904
 
905
  // Show sending indicator
 
918
  body: JSON.stringify({
919
  message: message,
920
  web_search_enabled: webSearchEnabled,
921
+ target_language: autoLanguageDetection ? '' : selectedLanguage,
922
+ system_prompt: systemPrompt,
923
+ conversation_id: currentConversationId,
924
+ auto_detect: autoLanguageDetection
925
  })
926
  });
927
 
 
936
  } else {
937
  // Add assistant response
938
  let content = data.response;
939
+ if (data.language) {
940
  content += ` <span class="language-info">[${data.language}]</span>`;
941
  }
942
+ addMessage('assistant', content, true);
943
+
944
+ // Refresh history
945
+ await loadHistory();
946
  }
947
  } catch (error) {
948
  console.error('Error sending text message:', error);
 
963
  sendButton.style.display = 'none';
964
  } else {
965
  statusText.textContent = '연결 대기 중';
966
+ sendButton.style.display = 'block';
967
  isVoiceActive = false;
968
  }
969
  }
970
+
971
  function updateButtonState() {
972
  const button = document.getElementById('start-button');
973
  if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
 
997
  updateStatus('disconnected');
998
  }
999
  }
1000
+
1001
  function setupAudioVisualization(stream) {
1002
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
1003
  analyser = audioContext.createAnalyser();
 
1032
 
1033
  updateAudioLevel();
1034
  }
1035
+
1036
  function showError(message) {
1037
  const toast = document.getElementById('error-toast');
1038
  toast.textContent = message;
 
1042
  toast.style.display = 'none';
1043
  }, 5000);
1044
  }
1045
+
1046
  async function setupWebRTC() {
1047
  const config = __RTC_CONFIGURATION__;
1048
  peerConnection = new RTCPeerConnection(config);
 
1055
  toast.style.display = 'none';
1056
  }, 5000);
1057
  }, 5000);
1058
+
1059
  try {
1060
  const stream = await navigator.mediaDevices.getUserMedia({
1061
  audio: true
 
1098
  peerConnection.addEventListener("icegatheringstatechange", checkState);
1099
  }
1100
  });
1101
+
1102
  peerConnection.addEventListener('connectionstatechange', () => {
1103
  console.log('connectionstatechange', peerConnection.connectionState);
1104
  if (peerConnection.connectionState === 'connected') {
 
1108
  }
1109
  updateButtonState();
1110
  });
 
1111
 
1112
+ webrtc_id = Math.random().toString(36).substring(7);
 
 
 
 
 
 
1113
 
1114
  const response = await fetch('/webrtc/offer', {
1115
  method: 'POST',
 
1119
  type: peerConnection.localDescription.type,
1120
  webrtc_id: webrtc_id,
1121
  web_search_enabled: webSearchEnabled,
1122
+ target_language: autoLanguageDetection ? '' : selectedLanguage,
1123
+ system_prompt: systemPrompt,
1124
+ conversation_id: currentConversationId,
1125
+ auto_detect: autoLanguageDetection
1126
  })
1127
  });
1128
+
1129
  const serverResponse = await response.json();
1130
  if (serverResponse.status === 'failed') {
1131
  showError(serverResponse.meta.error === 'concurrency_limit_reached'
 
1134
  stop();
1135
  return;
1136
  }
1137
+
1138
  await peerConnection.setRemoteDescription(serverResponse);
1139
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
1140
+ eventSource.addEventListener("output", async (event) => {
1141
  const eventJson = JSON.parse(event.data);
1142
  let content = eventJson.content;
1143
 
1144
+ if (eventJson.language) {
1145
  content += ` <span class="language-info">[${eventJson.language}]</span>`;
1146
  }
1147
+ addMessage("assistant", content, true);
1148
+
1149
+ // Refresh history after receiving a response
1150
+ await loadHistory();
1151
  });
1152
  eventSource.addEventListener("search", (event) => {
1153
  const eventJson = JSON.parse(event.data);
1154
  if (eventJson.query) {
1155
+ addMessage("search-result", `웹 검색 중: "${eventJson.query}"`, true);
1156
  }
1157
  });
1158
  } catch (err) {
 
1162
  stop();
1163
  }
1164
  }
1165
+
1166
+ function addMessage(role, content, save = false) {
1167
  const messageDiv = document.createElement('div');
1168
  messageDiv.classList.add('message', role);
1169
 
 
1174
  }
1175
  chatMessages.appendChild(messageDiv);
1176
  chatMessages.scrollTop = chatMessages.scrollHeight;
1177
+
1178
+ // Save to database if needed
1179
+ if (save && currentConversationId && role !== 'search-result') {
1180
+ fetch('/message', {
1181
+ method: 'POST',
1182
+ headers: { 'Content-Type': 'application/json' },
1183
+ body: JSON.stringify({
1184
+ conversation_id: currentConversationId,
1185
+ role: role,
1186
+ content: content.replace(/<[^>]*>/g, ''), // Remove HTML tags
1187
+ language: ''
1188
+ })
1189
+ }).catch(error => console.error('Failed to save message:', error));
1190
+ }
1191
  }
1192
 
1193
  function stop() {
1194
  console.log('[STOP] Stopping connection...');
1195
 
 
1196
  if (animationFrame) {
1197
  cancelAnimationFrame(animationFrame);
1198
  animationFrame = null;
1199
  }
1200
 
 
1201
  if (audioContext) {
1202
  audioContext.close();
1203
  audioContext = null;
 
1205
  audioSource = null;
1206
  }
1207
 
 
1208
  if (dataChannel) {
1209
  dataChannel.close();
1210
  dataChannel = null;
1211
  }
1212
 
 
1213
  if (peerConnection) {
1214
  console.log('[STOP] Current connection state:', peerConnection.connectionState);
1215
 
 
1216
  if (peerConnection.getTransceivers) {
1217
  peerConnection.getTransceivers().forEach(transceiver => {
1218
  if (transceiver.stop) {
 
1221
  });
1222
  }
1223
 
 
1224
  if (peerConnection.getSenders) {
1225
  peerConnection.getSenders().forEach(sender => {
1226
  if (sender.track) {
 
1229
  });
1230
  }
1231
 
 
1232
  if (peerConnection.getReceivers) {
1233
  peerConnection.getReceivers().forEach(receiver => {
1234
  if (receiver.track) {
 
1237
  });
1238
  }
1239
 
 
1240
  peerConnection.close();
 
 
1241
  peerConnection = null;
1242
 
1243
  console.log('[STOP] Connection closed');
1244
  }
1245
 
 
1246
  audioLevel = 0;
1247
  isVoiceActive = false;
 
 
1248
  updateButtonState();
1249
 
 
1250
  if (webrtc_id) {
1251
  console.log('[STOP] Clearing webrtc_id:', webrtc_id);
1252
  webrtc_id = null;
1253
  }
1254
  }
1255
+
1256
  startButton.addEventListener('click', () => {
1257
  console.log('clicked');
1258
  console.log(peerConnection, peerConnection?.connectionState);
 
1263
  stop();
1264
  }
1265
  });
 
 
 
 
 
1266
  </script>
1267
  </body>
1268
 
 
1313
  # Initialize search client globally
1314
  brave_api_key = os.getenv("BSEARCH_API")
1315
  search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
 
1316
 
1317
  # Store connection settings
1318
  connection_settings = {}
 
1337
 
1338
 
1339
  async def process_text_chat(message: str, web_search_enabled: bool, target_language: str,
1340
+ system_prompt: str, conversation_id: str, auto_detect: bool) -> Dict[str, str]:
1341
  """Process text chat using GPT-4o-mini model"""
1342
  try:
1343
+ # Auto-detect language if enabled
1344
+ if auto_detect:
1345
+ detected_lang = detect_language(message)
1346
+ if detected_lang in ['en', 'ko']:
1347
+ target_language = detected_lang
 
 
 
 
 
 
 
 
 
 
 
 
1348
  else:
1349
+ target_language = 'ko' # Default to Korean
1350
+
1351
+ # Save user message
1352
+ await save_message(conversation_id, "user", message, target_language)
1353
+
1354
+ # Prepare system prompt based on language
1355
+ if target_language == 'en':
1356
+ base_instructions = f"You are a helpful assistant. {system_prompt}"
1357
  else:
1358
+ base_instructions = system_prompt or "당신은 친절하고 도움이 되는 AI 어시스턴트입니다."
 
1359
 
1360
  messages = [
1361
  {"role": "system", "content": base_instructions}
 
1363
 
1364
  # Handle web search if enabled
1365
  if web_search_enabled and search_client:
 
1366
  search_keywords = ["날씨", "기온", "비", "눈", "뉴스", "소식", "현재", "최근",
1367
  "오늘", "지금", "가격", "환율", "주가", "weather", "news",
1368
  "current", "today", "price", "2024", "2025"]
 
1370
  should_search = any(keyword in message.lower() for keyword in search_keywords)
1371
 
1372
  if should_search:
 
1373
  search_results = await search_client.search(message)
1374
  if search_results:
1375
  search_context = "웹 검색 결과:\n\n"
1376
  for i, result in enumerate(search_results[:5], 1):
1377
  search_context += f"{i}. {result['title']}\n{result['description']}\n\n"
1378
 
 
 
 
 
 
 
1379
  messages.append({
1380
  "role": "system",
1381
+ "content": "다음 검색 결과를 참고하여 답변하세요: \n\n" + search_context
1382
  })
1383
 
1384
+ messages.append({"role": "user", "content": message})
 
1385
 
1386
  # Call GPT-4o-mini
1387
  response = await client.chat.completions.create(
 
1393
 
1394
  response_text = response.choices[0].message.content
1395
 
1396
+ # Save assistant response
1397
+ await save_message(conversation_id, "assistant", response_text, target_language)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1398
 
1399
  return {
1400
  "response": response_text,
 
1408
 
1409
  class OpenAIHandler(AsyncStreamHandler):
1410
  def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1411
+ system_prompt: str = "", webrtc_id: str = None, conversation_id: str = None,
1412
+ auto_detect: bool = True) -> None:
1413
  super().__init__(
1414
  expected_layout="mono",
1415
  output_sample_rate=SAMPLE_RATE,
 
1426
  self.web_search_enabled = web_search_enabled
1427
  self.target_language = target_language
1428
  self.system_prompt = system_prompt
1429
+ self.conversation_id = conversation_id
1430
+ self.auto_detect = auto_detect
 
1431
 
1432
  def copy(self):
 
1433
  if connection_settings:
 
1434
  recent_ids = sorted(connection_settings.keys(),
1435
  key=lambda k: connection_settings[k].get('timestamp', 0),
1436
  reverse=True)
 
1438
  recent_id = recent_ids[0]
1439
  settings = connection_settings[recent_id]
1440
 
 
 
 
1441
  return OpenAIHandler(
1442
  web_search_enabled=settings.get('web_search_enabled', False),
1443
  target_language=settings.get('target_language', ''),
1444
  system_prompt=settings.get('system_prompt', ''),
1445
+ webrtc_id=recent_id,
1446
+ conversation_id=settings.get('conversation_id'),
1447
+ auto_detect=settings.get('auto_detect', True)
1448
  )
1449
 
 
1450
  return OpenAIHandler(web_search_enabled=False)
1451
 
1452
  async def search_web(self, query: str) -> str:
 
1459
  if not results:
1460
  return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."
1461
 
 
1462
  formatted_results = []
1463
  for i, result in enumerate(results, 1):
1464
  formatted_results.append(
 
1481
  )
1482
  await self.connection.response.create()
1483
 
 
 
 
 
 
 
 
 
 
 
 
 
1484
  async def start_up(self):
1485
  """Connect to realtime API"""
 
1486
  if connection_settings and self.webrtc_id:
1487
  if self.webrtc_id in connection_settings:
1488
  settings = connection_settings[self.webrtc_id]
1489
  self.web_search_enabled = settings.get('web_search_enabled', False)
1490
  self.target_language = settings.get('target_language', '')
1491
  self.system_prompt = settings.get('system_prompt', '')
1492
+ self.conversation_id = settings.get('conversation_id')
1493
+ self.auto_detect = settings.get('auto_detect', True)
 
 
1494
 
1495
  self.client = openai.AsyncOpenAI()
1496
 
 
 
 
 
1497
  tools = []
1498
+ base_instructions = self.system_prompt or "당신은 친절하고 도움이 되는 AI 어시스턴트입니다."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1499
 
1500
  if self.web_search_enabled and self.search_client:
1501
  tools = [{
1502
  "type": "function",
1503
  "function": {
1504
  "name": "web_search",
1505
+ "description": "Search the web for current information.",
1506
  "parameters": {
1507
  "type": "object",
1508
  "properties": {
 
1515
  }
1516
  }
1517
  }]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1518
 
1519
  async with self.client.beta.realtime.connect(
1520
  model="gpt-4o-mini-realtime-preview-2024-12-17"
1521
  ) as conn:
 
1522
  session_update = {
1523
  "turn_detection": {"type": "server_vad"},
1524
+ "instructions": base_instructions,
1525
  "tools": tools,
1526
  "tool_choice": "auto" if tools else "none",
1527
  "temperature": 0.7,
1528
  "max_response_output_tokens": 4096,
1529
  "modalities": ["text", "audio"],
1530
+ "voice": "nova"
1531
  }
1532
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1533
  await conn.session.update(session=session_update)
1534
  self.connection = conn
 
1535
 
1536
  async for event in self.connection:
 
 
 
 
1537
  if event.type == "response.audio_transcript.done":
1538
+ # Save the transcript
1539
+ if self.conversation_id:
1540
+ await save_message(self.conversation_id, "assistant", event.transcript)
1541
 
1542
  output_data = {
1543
  "event": event,
 
1557
 
1558
  # Handle function calls
1559
  elif event.type == "response.function_call_arguments.start":
 
1560
  self.function_call_in_progress = True
1561
  self.current_function_args = ""
1562
  self.current_call_id = getattr(event, 'call_id', None)
 
1567
 
1568
  elif event.type == "response.function_call_arguments.done":
1569
  if self.function_call_in_progress:
 
1570
  try:
1571
  args = json.loads(self.current_function_args)
1572
  query = args.get("query", "")
1573
 
 
1574
  await self.output_queue.put(AdditionalOutputs({
1575
  "type": "search",
1576
  "query": query
1577
  }))
1578
 
 
1579
  search_results = await self.search_web(query)
 
1580
 
 
1581
  if self.connection and self.current_call_id:
1582
  await self.connection.conversation.item.create(
1583
  item={
 
1596
  self.current_call_id = None
1597
 
1598
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
 
1599
  if not self.connection:
 
1600
  return
1601
  try:
1602
  _, array = frame
 
1607
  print(f"Error in receive: {e}")
1608
 
1609
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
 
1610
  item = await wait_for_item(self.output_queue)
1611
 
 
1612
  if isinstance(item, dict) and item.get('type') == 'text_message':
1613
  await self.process_text_message(item['content'])
1614
  return None
 
1616
  return item
1617
 
1618
  async def shutdown(self) -> None:
 
 
 
1619
  if self.connection:
1620
  await self.connection.close()
1621
  self.connection = None
 
1622
 
1623
 
1624
  # Create initial handler instance
 
1629
 
1630
  # Create stream with handler instance
1631
  stream = Stream(
1632
+ handler,
1633
  mode="send-receive",
1634
  modality="audio",
1635
  additional_inputs=[chatbot],
 
1642
 
1643
  app = FastAPI()
1644
 
1645
+ # Initialize database on startup
1646
+ @app.on_event("startup")
1647
+ async def startup_event():
1648
+ await init_db()
1649
+
1650
  # Mount stream
1651
  stream.mount(app)
1652
 
 
1660
  web_search_enabled = body.get("web_search_enabled", False)
1661
  target_language = body.get("target_language", "")
1662
  system_prompt = body.get("system_prompt", "")
1663
+ conversation_id = body.get("conversation_id")
1664
+ auto_detect = body.get("auto_detect", True)
1665
 
 
 
 
 
 
1666
  if webrtc_id:
1667
  connection_settings[webrtc_id] = {
1668
  'web_search_enabled': web_search_enabled,
1669
  'target_language': target_language,
1670
  'system_prompt': system_prompt,
1671
+ 'conversation_id': conversation_id,
1672
+ 'auto_detect': auto_detect,
1673
  'timestamp': asyncio.get_event_loop().time()
1674
  }
 
 
 
1675
 
1676
  # Remove our custom route temporarily
1677
  custom_route = None
 
1680
  custom_route = app.routes.pop(i)
1681
  break
1682
 
 
 
1683
  response = await stream.offer(body)
1684
 
 
1685
  if custom_route:
1686
  app.routes.insert(0, custom_route)
1687
 
 
 
1688
  return response
1689
 
1690
 
 
1697
  web_search_enabled = body.get("web_search_enabled", False)
1698
  target_language = body.get("target_language", "")
1699
  system_prompt = body.get("system_prompt", "")
1700
+ conversation_id = body.get("conversation_id")
1701
+ auto_detect = body.get("auto_detect", True)
1702
 
1703
  if not message:
1704
  return {"error": "메시지가 비어있습니다."}
1705
 
1706
+ result = await process_text_chat(message, web_search_enabled, target_language,
1707
+ system_prompt, conversation_id, auto_detect)
1708
 
1709
  return result
1710
 
 
1713
  return {"error": "채팅 처리 중 오류가 발생했습니다."}
1714
 
1715
 
1716
+ @app.get("/conversations")
1717
+ async def get_conversations_endpoint():
1718
+ """Get all conversations"""
1719
+ conversations = await get_conversations()
1720
+ return conversations
1721
+
1722
+
1723
+ @app.get("/conversation/{conversation_id}")
1724
+ async def get_conversation_endpoint(conversation_id: str):
1725
+ """Get messages for a specific conversation"""
1726
+ messages = await get_conversation_messages(conversation_id)
1727
+ return messages
1728
+
1729
+
1730
+ @app.post("/message")
1731
+ async def save_message_endpoint(request: Request):
1732
+ """Save a message"""
1733
+ body = await request.json()
1734
+ await save_message(
1735
+ body["conversation_id"],
1736
+ body["role"],
1737
+ body["content"],
1738
+ body.get("language")
1739
+ )
1740
+ return {"status": "ok"}
1741
+
1742
+
1743
  @app.post("/text_message/{webrtc_id}")
1744
  async def receive_text_message(webrtc_id: str, request: Request):
1745
  """Receive text message from client"""
1746
  body = await request.json()
1747
  message = body.get("content", "")
1748
 
 
1749
  if webrtc_id in stream.handlers:
1750
  handler = stream.handlers[webrtc_id]
 
1751
  await handler.output_queue.put({
1752
  'type': 'text_message',
1753
  'content': message
 
1762
  async def output_stream():
1763
  async for output in stream.output_stream(webrtc_id):
1764
  if hasattr(output, 'args') and output.args:
 
1765
  if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
1766
  yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
 
1767
  elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
1768
  event_data = output.args[0]
1769
  if 'event' in event_data and hasattr(event_data['event'], 'transcript'):