seawolf2357 commited on
Commit
73eebb3
·
verified ·
1 Parent(s): 1283bb7

Update app-backup4.py

Browse files
Files changed (1) hide show
  1. app-backup4.py +599 -152
app-backup4.py CHANGED
@@ -31,7 +31,18 @@ import uuid
31
  load_dotenv()
32
 
33
  SAMPLE_RATE = 24000
34
- DB_PATH = "chat_history.db"
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # HTML content embedded as a string
37
  HTML_CONTENT = """<!DOCTYPE html>
@@ -40,7 +51,7 @@ HTML_CONTENT = """<!DOCTYPE html>
40
  <head>
41
  <meta charset="UTF-8">
42
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
43
- <title>Mouth of 'MOUSE'</title>
44
  <style>
45
  :root {
46
  --primary-color: #6f42c1;
@@ -50,6 +61,7 @@ HTML_CONTENT = """<!DOCTYPE html>
50
  --text-color: #f8f9fa;
51
  --border-color: #333;
52
  --hover-color: #8a5cf6;
 
53
  }
54
  body {
55
  font-family: "SF Pro Display", -apple-system, BlinkMacSystemFont, sans-serif;
@@ -126,7 +138,6 @@ HTML_CONTENT = """<!DOCTYPE html>
126
  padding: 20px;
127
  border: 1px solid var(--border-color);
128
  overflow-y: auto;
129
- flex-grow: 1;
130
  }
131
  .settings-grid {
132
  display: flex;
@@ -171,14 +182,40 @@ HTML_CONTENT = """<!DOCTYPE html>
171
  .toggle-switch.active .toggle-slider {
172
  transform: translateX(24px);
173
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  /* History section */
175
  .history-section {
176
  background-color: var(--card-bg);
177
  border-radius: 12px;
178
  padding: 20px;
179
  border: 1px solid var(--border-color);
180
- margin-top: 20px;
181
- max-height: 300px;
182
  overflow-y: auto;
183
  }
184
  .history-item {
@@ -292,6 +329,13 @@ HTML_CONTENT = """<!DOCTYPE html>
292
  padding: 10px;
293
  margin-bottom: 10px;
294
  }
 
 
 
 
 
 
 
295
  .language-info {
296
  font-size: 12px;
297
  color: #888;
@@ -359,6 +403,14 @@ HTML_CONTENT = """<!DOCTYPE html>
359
  #send-button:hover {
360
  background: linear-gradient(135deg, #27ae60, #229954);
361
  }
 
 
 
 
 
 
 
 
362
  #audio-output {
363
  display: none;
364
  }
@@ -420,6 +472,10 @@ HTML_CONTENT = """<!DOCTYPE html>
420
  background-color: #ff9800;
421
  color: white;
422
  }
 
 
 
 
423
  .status-indicator {
424
  display: inline-flex;
425
  align-items: center;
@@ -454,34 +510,17 @@ HTML_CONTENT = """<!DOCTYPE html>
454
  opacity: 0.6;
455
  }
456
  }
457
- .mouse-logo {
458
- position: relative;
459
  width: 40px;
460
  height: 40px;
461
- }
462
- .mouse-ears {
463
- position: absolute;
464
- width: 15px;
465
- height: 15px;
466
- background-color: var(--primary-color);
467
- border-radius: 50%;
468
- }
469
- .mouse-ear-left {
470
- top: 0;
471
- left: 5px;
472
- }
473
- .mouse-ear-right {
474
- top: 0;
475
- right: 5px;
476
- }
477
- .mouse-face {
478
- position: absolute;
479
- top: 10px;
480
- left: 5px;
481
- width: 30px;
482
- height: 30px;
483
- background-color: var(--secondary-color);
484
  border-radius: 50%;
 
 
 
 
 
 
485
  }
486
  </style>
487
  </head>
@@ -491,12 +530,8 @@ HTML_CONTENT = """<!DOCTYPE html>
491
  <div class="container">
492
  <div class="header">
493
  <div class="logo">
494
- <div class="mouse-logo">
495
- <div class="mouse-ears mouse-ear-left"></div>
496
- <div class="mouse-ears mouse-ear-right"></div>
497
- <div class="mouse-face"></div>
498
- </div>
499
- <h1>MOUSE 음성 챗</h1>
500
  </div>
501
  <div class="status-indicator">
502
  <div id="status-dot" class="status-dot disconnected"></div>
@@ -517,11 +552,16 @@ HTML_CONTENT = """<!DOCTYPE html>
517
  </div>
518
  </div>
519
  <div class="text-input-section">
520
- <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
521
- <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
522
  </div>
523
  </div>
524
 
 
 
 
 
 
525
  <div class="history-section">
526
  <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화 기록</h3>
527
  <div id="history-list"></div>
@@ -529,6 +569,7 @@ HTML_CONTENT = """<!DOCTYPE html>
529
 
530
  <div class="controls">
531
  <button id="start-button">대화 시작</button>
 
532
  </div>
533
  </div>
534
 
@@ -552,24 +593,44 @@ HTML_CONTENT = """<!DOCTYPE html>
552
  let peerConnection;
553
  let webrtc_id;
554
  let webSearchEnabled = false;
555
- let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
556
  let currentSessionId = null;
 
 
557
  const audioOutput = document.getElementById('audio-output');
558
  const startButton = document.getElementById('start-button');
 
559
  const sendButton = document.getElementById('send-button');
560
  const chatMessages = document.getElementById('chat-messages');
561
  const statusDot = document.getElementById('status-dot');
562
  const statusText = document.getElementById('status-text');
563
  const searchToggle = document.getElementById('search-toggle');
564
- const systemPromptInput = document.getElementById('system-prompt');
565
  const textInput = document.getElementById('text-input');
566
  const historyList = document.getElementById('history-list');
 
 
 
567
  let audioLevel = 0;
568
  let animationFrame;
569
  let audioContext, analyser, audioSource;
570
  let dataChannel = null;
571
  let isVoiceActive = false;
572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  // Start new session
574
  async function startNewSession() {
575
  const response = await fetch('/session/new', { method: 'POST' });
@@ -577,6 +638,62 @@ HTML_CONTENT = """<!DOCTYPE html>
577
  currentSessionId = data.session_id;
578
  console.log('New session started:', currentSessionId);
579
  loadHistory();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
  }
581
 
582
  // Load conversation history
@@ -623,11 +740,6 @@ HTML_CONTENT = """<!DOCTYPE html>
623
  console.log('Web search enabled:', webSearchEnabled);
624
  });
625
 
626
- // System prompt update
627
- systemPromptInput.addEventListener('input', () => {
628
- systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
629
- });
630
-
631
  // Text input handling
632
  textInput.addEventListener('keypress', (e) => {
633
  if (e.key === 'Enter' && !e.shiftKey) {
@@ -637,11 +749,19 @@ HTML_CONTENT = """<!DOCTYPE html>
637
  });
638
 
639
  sendButton.addEventListener('click', sendTextMessage);
 
640
 
641
  async function sendTextMessage() {
642
  const message = textInput.value.trim();
643
  if (!message) return;
644
 
 
 
 
 
 
 
 
645
  // Add user message to chat
646
  addMessage('user', message);
647
  textInput.value = '';
@@ -662,8 +782,9 @@ HTML_CONTENT = """<!DOCTYPE html>
662
  body: JSON.stringify({
663
  message: message,
664
  web_search_enabled: webSearchEnabled,
665
- system_prompt: systemPrompt,
666
- session_id: currentSessionId
 
667
  })
668
  });
669
 
@@ -696,16 +817,34 @@ HTML_CONTENT = """<!DOCTYPE html>
696
  if (state === 'connected') {
697
  statusText.textContent = '연결됨';
698
  sendButton.style.display = 'block';
 
699
  isVoiceActive = true;
700
  } else if (state === 'connecting') {
701
  statusText.textContent = '연결 중...';
702
  sendButton.style.display = 'none';
 
703
  } else {
704
  statusText.textContent = '연결 대기 중';
705
- sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
 
706
  isVoiceActive = false;
707
  }
708
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
  function updateButtonState() {
710
  const button = document.getElementById('start-button');
711
  if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
@@ -735,6 +874,7 @@ HTML_CONTENT = """<!DOCTYPE html>
735
  updateStatus('disconnected');
736
  }
737
  }
 
738
  function setupAudioVisualization(stream) {
739
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
740
  analyser = audioContext.createAnalyser();
@@ -769,27 +909,14 @@ HTML_CONTENT = """<!DOCTYPE html>
769
 
770
  updateAudioLevel();
771
  }
772
- function showError(message) {
773
- const toast = document.getElementById('error-toast');
774
- toast.textContent = message;
775
- toast.className = 'toast error';
776
- toast.style.display = 'block';
777
- setTimeout(() => {
778
- toast.style.display = 'none';
779
- }, 5000);
780
- }
781
  async function setupWebRTC() {
782
  const config = __RTC_CONFIGURATION__;
783
  peerConnection = new RTCPeerConnection(config);
784
  const timeoutId = setTimeout(() => {
785
- const toast = document.getElementById('error-toast');
786
- toast.textContent = "연결이 평소보다 오래 걸리고 있습니다. VPN을 사용 중이신가요?";
787
- toast.className = 'toast warning';
788
- toast.style.display = 'block';
789
- setTimeout(() => {
790
- toast.style.display = 'none';
791
- }, 5000);
792
  }, 5000);
 
793
  try {
794
  const stream = await navigator.mediaDevices.getUserMedia({
795
  audio: true
@@ -832,6 +959,7 @@ HTML_CONTENT = """<!DOCTYPE html>
832
  peerConnection.addEventListener("icegatheringstatechange", checkState);
833
  }
834
  });
 
835
  peerConnection.addEventListener('connectionstatechange', () => {
836
  console.log('connectionstatechange', peerConnection.connectionState);
837
  if (peerConnection.connectionState === 'connected') {
@@ -841,15 +969,8 @@ HTML_CONTENT = """<!DOCTYPE html>
841
  }
842
  updateButtonState();
843
  });
844
- webrtc_id = Math.random().toString(36).substring(7);
845
 
846
- // Log current settings before sending
847
- console.log('Sending offer with settings:', {
848
- webrtc_id: webrtc_id,
849
- web_search_enabled: webSearchEnabled,
850
- system_prompt: systemPrompt,
851
- session_id: currentSessionId
852
- });
853
 
854
  const response = await fetch('/webrtc/offer', {
855
  method: 'POST',
@@ -859,10 +980,12 @@ HTML_CONTENT = """<!DOCTYPE html>
859
  type: peerConnection.localDescription.type,
860
  webrtc_id: webrtc_id,
861
  web_search_enabled: webSearchEnabled,
862
- system_prompt: systemPrompt,
863
- session_id: currentSessionId
 
864
  })
865
  });
 
866
  const serverResponse = await response.json();
867
  if (serverResponse.status === 'failed') {
868
  showError(serverResponse.meta.error === 'concurrency_limit_reached'
@@ -871,6 +994,7 @@ HTML_CONTENT = """<!DOCTYPE html>
871
  stop();
872
  return;
873
  }
 
874
  await peerConnection.setRemoteDescription(serverResponse);
875
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
876
  eventSource.addEventListener("output", (event) => {
@@ -895,6 +1019,7 @@ HTML_CONTENT = """<!DOCTYPE html>
895
  stop();
896
  }
897
  }
 
898
  function addMessage(role, content, save = true) {
899
  const messageDiv = document.createElement('div');
900
  messageDiv.classList.add('message', role);
@@ -908,7 +1033,7 @@ HTML_CONTENT = """<!DOCTYPE html>
908
  chatMessages.scrollTop = chatMessages.scrollHeight;
909
 
910
  // Save message to database if save flag is true
911
- if (save && currentSessionId) {
912
  fetch('/message/save', {
913
  method: 'POST',
914
  headers: { 'Content-Type': 'application/json' },
@@ -997,6 +1122,7 @@ HTML_CONTENT = """<!DOCTYPE html>
997
  webrtc_id = null;
998
  }
999
  }
 
1000
  startButton.addEventListener('click', () => {
1001
  console.log('clicked');
1002
  console.log(peerConnection, peerConnection?.connectionState);
@@ -1011,8 +1137,10 @@ HTML_CONTENT = """<!DOCTYPE html>
1011
  // Initialize on page load
1012
  window.addEventListener('DOMContentLoaded', () => {
1013
  sendButton.style.display = 'block';
 
1014
  startNewSession();
1015
  loadHistory();
 
1016
  });
1017
  </script>
1018
  </body>
@@ -1062,13 +1190,14 @@ class BraveSearchClient:
1062
 
1063
 
1064
  # Database helper class
1065
- class ChatDatabase:
1066
- """Database manager for chat history"""
1067
 
1068
  @staticmethod
1069
  async def init():
1070
  """Initialize database tables"""
1071
  async with aiosqlite.connect(DB_PATH) as db:
 
1072
  await db.execute("""
1073
  CREATE TABLE IF NOT EXISTS conversations (
1074
  id TEXT PRIMARY KEY,
@@ -1078,6 +1207,7 @@ class ChatDatabase:
1078
  )
1079
  """)
1080
 
 
1081
  await db.execute("""
1082
  CREATE TABLE IF NOT EXISTS messages (
1083
  id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -1090,6 +1220,24 @@ class ChatDatabase:
1090
  )
1091
  """)
1092
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1093
  await db.commit()
1094
 
1095
  @staticmethod
@@ -1105,13 +1253,18 @@ class ChatDatabase:
1105
  @staticmethod
1106
  async def save_message(session_id: str, role: str, content: str):
1107
  """Save a message to the database"""
 
 
 
 
 
1108
  # Detect language
1109
  detected_language = None
1110
  try:
1111
- if content and len(content) > 10: # Only detect for substantial content
1112
  detected_language = detect(content)
1113
- except LangDetectException:
1114
- pass
1115
 
1116
  async with aiosqlite.connect(DB_PATH) as db:
1117
  await db.execute(
@@ -1133,7 +1286,7 @@ class ChatDatabase:
1133
  (session_id,)
1134
  )
1135
  row = await cursor.fetchone()
1136
- if row and not row[0]: # If no summary exists
1137
  summary = content[:100] + "..." if len(content) > 100 else content
1138
  await db.execute(
1139
  "UPDATE conversations SET summary = ? WHERE id = ?",
@@ -1184,6 +1337,139 @@ class ChatDatabase:
1184
  }
1185
  for row in rows
1186
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
 
1188
 
1189
  # Initialize search client globally
@@ -1203,16 +1489,54 @@ def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEve
1203
  return chatbot
1204
 
1205
 
1206
- async def process_text_chat(message: str, web_search_enabled: bool, system_prompt: str, session_id: str) -> Dict[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1207
  """Process text chat using GPT-4o-mini model"""
1208
  try:
1209
- messages = [
1210
- {"role": "system", "content": system_prompt or "You are a helpful assistant."}
1211
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1212
 
1213
  # Handle web search if enabled
1214
- if web_search_enabled and search_client:
1215
- # Check if the message requires web search
1216
  search_keywords = ["날씨", "기온", "비", "눈", "뉴스", "소식", "현재", "최근",
1217
  "오늘", "지금", "가격", "환율", "주가", "weather", "news",
1218
  "current", "today", "price", "2024", "2025"]
@@ -1220,7 +1544,6 @@ async def process_text_chat(message: str, web_search_enabled: bool, system_promp
1220
  should_search = any(keyword in message.lower() for keyword in search_keywords)
1221
 
1222
  if should_search:
1223
- # Perform web search
1224
  search_results = await search_client.search(message)
1225
  if search_results:
1226
  search_context = "웹 검색 결과:\n\n"
@@ -1254,8 +1577,8 @@ async def process_text_chat(message: str, web_search_enabled: bool, system_promp
1254
 
1255
  # Save messages to database
1256
  if session_id:
1257
- await ChatDatabase.save_message(session_id, "user", message)
1258
- await ChatDatabase.save_message(session_id, "assistant", response_text)
1259
 
1260
  return {
1261
  "response": response_text,
@@ -1268,8 +1591,8 @@ async def process_text_chat(message: str, web_search_enabled: bool, system_promp
1268
 
1269
 
1270
  class OpenAIHandler(AsyncStreamHandler):
1271
- def __init__(self, web_search_enabled: bool = False, system_prompt: str = "",
1272
- webrtc_id: str = None, session_id: str = None) -> None:
1273
  super().__init__(
1274
  expected_layout="mono",
1275
  output_sample_rate=SAMPLE_RATE,
@@ -1284,15 +1607,16 @@ class OpenAIHandler(AsyncStreamHandler):
1284
  self.current_call_id = None
1285
  self.webrtc_id = webrtc_id
1286
  self.web_search_enabled = web_search_enabled
1287
- self.system_prompt = system_prompt
1288
  self.session_id = session_id
 
 
 
 
1289
 
1290
- print(f"[INIT] Handler created with web_search={web_search_enabled}, session_id={session_id}")
1291
 
1292
  def copy(self):
1293
- # Get the most recent settings
1294
  if connection_settings:
1295
- # Get the most recent webrtc_id
1296
  recent_ids = sorted(connection_settings.keys(),
1297
  key=lambda k: connection_settings[k].get('timestamp', 0),
1298
  reverse=True)
@@ -1300,14 +1624,14 @@ class OpenAIHandler(AsyncStreamHandler):
1300
  recent_id = recent_ids[0]
1301
  settings = connection_settings[recent_id]
1302
 
1303
- # Log the settings being copied
1304
  print(f"[COPY] Copying settings from {recent_id}:")
1305
 
1306
  return OpenAIHandler(
1307
  web_search_enabled=settings.get('web_search_enabled', False),
1308
- system_prompt=settings.get('system_prompt', ''),
1309
  webrtc_id=recent_id,
1310
- session_id=settings.get('session_id')
 
 
1311
  )
1312
 
1313
  print(f"[COPY] No settings found, creating default handler")
@@ -1323,7 +1647,6 @@ class OpenAIHandler(AsyncStreamHandler):
1323
  if not results:
1324
  return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."
1325
 
1326
- # Format search results
1327
  formatted_results = []
1328
  for i, result in enumerate(results, 1):
1329
  formatted_results.append(
@@ -1348,25 +1671,34 @@ class OpenAIHandler(AsyncStreamHandler):
1348
 
1349
  async def start_up(self):
1350
  """Connect to realtime API"""
1351
- # First check if we have the most recent settings
1352
  if connection_settings and self.webrtc_id:
1353
  if self.webrtc_id in connection_settings:
1354
  settings = connection_settings[self.webrtc_id]
1355
  self.web_search_enabled = settings.get('web_search_enabled', False)
1356
- self.system_prompt = settings.get('system_prompt', '')
1357
  self.session_id = settings.get('session_id')
 
 
1358
 
1359
  print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
1360
 
1361
  self.client = openai.AsyncOpenAI()
1362
 
1363
- # Connect to Realtime API
1364
  print(f"[REALTIME API] Connecting...")
1365
 
 
 
 
 
 
 
 
 
 
 
 
 
1366
  # Define the web search function
1367
  tools = []
1368
- base_instructions = self.system_prompt or "You are a helpful assistant."
1369
-
1370
  if self.web_search_enabled and self.search_client:
1371
  tools = [{
1372
  "type": "function",
@@ -1385,20 +1717,10 @@ class OpenAIHandler(AsyncStreamHandler):
1385
  }
1386
  }
1387
  }]
1388
- print("Web search function added to tools")
1389
 
1390
  search_instructions = (
1391
  "\n\nYou have web search capabilities. "
1392
- "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
1393
- "- Weather (날씨, 기온, 비, 눈)\n"
1394
- "- News (뉴스, 소식)\n"
1395
- "- Current events (현재, 최근, 오늘, 지금)\n"
1396
- "- Prices (가격, 환율, 주가)\n"
1397
- "- Sports scores or results\n"
1398
- "- Any question about 2024 or 2025\n"
1399
- "- Any time-sensitive information\n\n"
1400
- "When in doubt, USE web_search. It's better to search and provide accurate information "
1401
- "than to guess or use outdated information."
1402
  )
1403
 
1404
  instructions = base_instructions + search_instructions
@@ -1408,9 +1730,13 @@ class OpenAIHandler(AsyncStreamHandler):
1408
  async with self.client.beta.realtime.connect(
1409
  model="gpt-4o-mini-realtime-preview-2024-12-17"
1410
  ) as conn:
1411
- # Update session with tools
1412
  session_update = {
1413
- "turn_detection": {"type": "server_vad"},
 
 
 
 
 
1414
  "instructions": instructions,
1415
  "tools": tools,
1416
  "tool_choice": "auto" if tools else "none",
@@ -1420,29 +1746,84 @@ class OpenAIHandler(AsyncStreamHandler):
1420
  "voice": "alloy"
1421
  }
1422
 
1423
- await conn.session.update(session=session_update)
1424
- self.connection = conn
1425
- print(f"Connected with tools: {len(tools)} functions")
 
 
 
 
 
1426
 
1427
  async for event in self.connection:
1428
- # Debug logging for function calls
1429
- if event.type.startswith("response.function_call"):
1430
- print(f"Function event: {event.type}")
 
1431
 
1432
- if event.type == "response.audio_transcript.done":
1433
- print(f"[RESPONSE] Transcript: {event.transcript[:100]}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1434
 
1435
  # Detect language
1436
  detected_language = None
1437
  try:
1438
  if event.transcript and len(event.transcript) > 10:
1439
  detected_language = detect(event.transcript)
1440
- except:
1441
- pass
1442
 
1443
  # Save to database
1444
- if self.session_id:
1445
- await ChatDatabase.save_message(self.session_id, "assistant", event.transcript)
1446
 
1447
  output_data = {
1448
  "event": event,
@@ -1450,15 +1831,31 @@ class OpenAIHandler(AsyncStreamHandler):
1450
  }
1451
  await self.output_queue.put(AdditionalOutputs(output_data))
1452
 
 
 
 
 
 
 
1453
  elif event.type == "response.audio.delta":
1454
- await self.output_queue.put(
1455
- (
1456
- self.output_sample_rate,
1457
- np.frombuffer(
1458
- base64.b64decode(event.delta), dtype=np.int16
1459
- ).reshape(1, -1),
1460
- ),
1461
- )
 
 
 
 
 
 
 
 
 
 
1462
 
1463
  # Handle function calls
1464
  elif event.type == "response.function_call_arguments.start":
@@ -1511,7 +1908,15 @@ class OpenAIHandler(AsyncStreamHandler):
1511
  print(f"[RECEIVE] No connection, skipping")
1512
  return
1513
  try:
 
 
 
 
1514
  _, array = frame
 
 
 
 
1515
  array = array.squeeze()
1516
  audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
1517
  await self.connection.input_audio_buffer.append(audio=audio_message)
@@ -1521,7 +1926,6 @@ class OpenAIHandler(AsyncStreamHandler):
1521
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
1522
  item = await wait_for_item(self.output_queue)
1523
 
1524
- # Check if it's a dict with text message
1525
  if isinstance(item, dict) and item.get('type') == 'text_message':
1526
  await self.process_text_message(item['content'])
1527
  return None
@@ -1545,7 +1949,7 @@ chatbot = gr.Chatbot(type="messages")
1545
 
1546
  # Create stream with handler instance
1547
  stream = Stream(
1548
- handler, # Pass instance, not factory
1549
  mode="send-receive",
1550
  modality="audio",
1551
  additional_inputs=[chatbot],
@@ -1564,8 +1968,26 @@ stream.mount(app)
1564
  # Initialize database on startup
1565
  @app.on_event("startup")
1566
  async def startup_event():
1567
- await ChatDatabase.init()
1568
- print("Database initialized")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1569
 
1570
  # Intercept offer to capture settings
1571
  @app.post("/webrtc/offer", include_in_schema=False)
@@ -1575,24 +1997,26 @@ async def custom_offer(request: Request):
1575
 
1576
  webrtc_id = body.get("webrtc_id")
1577
  web_search_enabled = body.get("web_search_enabled", False)
1578
- system_prompt = body.get("system_prompt", "")
1579
  session_id = body.get("session_id")
 
 
1580
 
1581
  print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
1582
  print(f"[OFFER] web_search_enabled: {web_search_enabled}")
1583
  print(f"[OFFER] session_id: {session_id}")
 
1584
 
1585
  # Store settings with timestamp
1586
  if webrtc_id:
1587
  connection_settings[webrtc_id] = {
1588
  'web_search_enabled': web_search_enabled,
1589
- 'system_prompt': system_prompt,
1590
  'session_id': session_id,
 
 
1591
  'timestamp': asyncio.get_event_loop().time()
1592
  }
1593
 
1594
- print(f"[OFFER] Stored settings for {webrtc_id}:")
1595
- print(f"[OFFER] {connection_settings[webrtc_id]}")
1596
 
1597
  # Remove our custom route temporarily
1598
  custom_route = None
@@ -1618,10 +2042,25 @@ async def custom_offer(request: Request):
1618
  async def create_new_session():
1619
  """Create a new chat session"""
1620
  session_id = str(uuid.uuid4())
1621
- await ChatDatabase.create_session(session_id)
1622
  return {"session_id": session_id}
1623
 
1624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1625
  @app.post("/message/save")
1626
  async def save_message(request: Request):
1627
  """Save a message to the database"""
@@ -1633,24 +2072,31 @@ async def save_message(request: Request):
1633
  if not all([session_id, role, content]):
1634
  return {"error": "Missing required fields"}
1635
 
1636
- await ChatDatabase.save_message(session_id, role, content)
1637
  return {"status": "ok"}
1638
 
1639
 
1640
  @app.get("/history/recent")
1641
  async def get_recent_history():
1642
  """Get recent conversation history"""
1643
- conversations = await ChatDatabase.get_recent_conversations()
1644
  return conversations
1645
 
1646
 
1647
  @app.get("/history/{session_id}")
1648
  async def get_conversation(session_id: str):
1649
  """Get messages for a specific conversation"""
1650
- messages = await ChatDatabase.get_conversation_messages(session_id)
1651
  return messages
1652
 
1653
 
 
 
 
 
 
 
 
1654
  @app.post("/chat/text")
1655
  async def chat_text(request: Request):
1656
  """Handle text chat messages using GPT-4o-mini"""
@@ -1658,14 +2104,15 @@ async def chat_text(request: Request):
1658
  body = await request.json()
1659
  message = body.get("message", "")
1660
  web_search_enabled = body.get("web_search_enabled", False)
1661
- system_prompt = body.get("system_prompt", "")
1662
  session_id = body.get("session_id")
 
 
1663
 
1664
  if not message:
1665
  return {"error": "메시지가 비어있습니다."}
1666
 
1667
  # Process text chat
1668
- result = await process_text_chat(message, web_search_enabled, system_prompt, session_id)
1669
 
1670
  return result
1671
 
 
31
  load_dotenv()
32
 
33
  SAMPLE_RATE = 24000
34
+
35
+ # Use Persistent Storage path for Hugging Face Space
36
+ # In HF Spaces, persistent storage is at /data
37
+ if os.path.exists("/data"):
38
+ PERSISTENT_DIR = "/data"
39
+ else:
40
+ PERSISTENT_DIR = "./data"
41
+
42
+ os.makedirs(PERSISTENT_DIR, exist_ok=True)
43
+ DB_PATH = os.path.join(PERSISTENT_DIR, "personal_assistant.db")
44
+ print(f"Using persistent directory: {PERSISTENT_DIR}")
45
+ print(f"Database path: {DB_PATH}")
46
 
47
  # HTML content embedded as a string
48
  HTML_CONTENT = """<!DOCTYPE html>
 
51
  <head>
52
  <meta charset="UTF-8">
53
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
54
+ <title>Personal AI Assistant</title>
55
  <style>
56
  :root {
57
  --primary-color: #6f42c1;
 
61
  --text-color: #f8f9fa;
62
  --border-color: #333;
63
  --hover-color: #8a5cf6;
64
+ --memory-color: #4a9eff;
65
  }
66
  body {
67
  font-family: "SF Pro Display", -apple-system, BlinkMacSystemFont, sans-serif;
 
138
  padding: 20px;
139
  border: 1px solid var(--border-color);
140
  overflow-y: auto;
 
141
  }
142
  .settings-grid {
143
  display: flex;
 
182
  .toggle-switch.active .toggle-slider {
183
  transform: translateX(24px);
184
  }
185
+ /* Memory section */
186
+ .memory-section {
187
+ background-color: var(--card-bg);
188
+ border-radius: 12px;
189
+ padding: 20px;
190
+ border: 1px solid var(--border-color);
191
+ max-height: 300px;
192
+ overflow-y: auto;
193
+ }
194
+ .memory-item {
195
+ padding: 10px;
196
+ margin-bottom: 10px;
197
+ background: linear-gradient(135deg, rgba(74, 158, 255, 0.1), rgba(111, 66, 193, 0.1));
198
+ border-radius: 6px;
199
+ border-left: 3px solid var(--memory-color);
200
+ }
201
+ .memory-category {
202
+ font-size: 12px;
203
+ color: var(--memory-color);
204
+ font-weight: bold;
205
+ text-transform: uppercase;
206
+ margin-bottom: 5px;
207
+ }
208
+ .memory-content {
209
+ font-size: 14px;
210
+ color: var(--text-color);
211
+ }
212
  /* History section */
213
  .history-section {
214
  background-color: var(--card-bg);
215
  border-radius: 12px;
216
  padding: 20px;
217
  border: 1px solid var(--border-color);
218
+ max-height: 200px;
 
219
  overflow-y: auto;
220
  }
221
  .history-item {
 
329
  padding: 10px;
330
  margin-bottom: 10px;
331
  }
332
+ .message.memory-update {
333
+ background: linear-gradient(135deg, rgba(74, 158, 255, 0.2), rgba(111, 66, 193, 0.2));
334
+ font-size: 13px;
335
+ padding: 8px 12px;
336
+ margin-bottom: 10px;
337
+ border-left: 3px solid var(--memory-color);
338
+ }
339
  .language-info {
340
  font-size: 12px;
341
  color: #888;
 
403
  #send-button:hover {
404
  background: linear-gradient(135deg, #27ae60, #229954);
405
  }
406
+ #end-session-button {
407
+ background: linear-gradient(135deg, #4a9eff, #3a7ed8);
408
+ padding: 8px 16px;
409
+ font-size: 13px;
410
+ }
411
+ #end-session-button:hover {
412
+ background: linear-gradient(135deg, #3a7ed8, #2a5eb8);
413
+ }
414
  #audio-output {
415
  display: none;
416
  }
 
472
  background-color: #ff9800;
473
  color: white;
474
  }
475
+ .toast.success {
476
+ background-color: #4caf50;
477
+ color: white;
478
+ }
479
  .status-indicator {
480
  display: inline-flex;
481
  align-items: center;
 
510
  opacity: 0.6;
511
  }
512
  }
513
+ .user-avatar {
 
514
  width: 40px;
515
  height: 40px;
516
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  border-radius: 50%;
518
+ display: flex;
519
+ align-items: center;
520
+ justify-content: center;
521
+ font-size: 20px;
522
+ font-weight: bold;
523
+ color: white;
524
  }
525
  </style>
526
  </head>
 
530
  <div class="container">
531
  <div class="header">
532
  <div class="logo">
533
+ <div class="user-avatar" id="user-avatar">👤</div>
534
+ <h1>Personal AI Assistant</h1>
 
 
 
 
535
  </div>
536
  <div class="status-indicator">
537
  <div id="status-dot" class="status-dot disconnected"></div>
 
552
  </div>
553
  </div>
554
  <div class="text-input-section">
555
+ <label for="user-name" class="setting-label">사용자 이름:</label>
556
+ <input type="text" id="user-name" placeholder="이름을 입력하세요..." />
557
  </div>
558
  </div>
559
 
560
+ <div class="memory-section">
561
+ <h3 style="margin: 0 0 15px 0; color: var(--memory-color);">기억된 정보</h3>
562
+ <div id="memory-list"></div>
563
+ </div>
564
+
565
  <div class="history-section">
566
  <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화 기록</h3>
567
  <div id="history-list"></div>
 
569
 
570
  <div class="controls">
571
  <button id="start-button">대화 시작</button>
572
+ <button id="end-session-button" style="display: none;">기억 업데이트</button>
573
  </div>
574
  </div>
575
 
 
593
  let peerConnection;
594
  let webrtc_id;
595
  let webSearchEnabled = false;
 
596
  let currentSessionId = null;
597
+ let userName = localStorage.getItem('userName') || '';
598
+ let userMemories = {};
599
  const audioOutput = document.getElementById('audio-output');
600
  const startButton = document.getElementById('start-button');
601
+ const endSessionButton = document.getElementById('end-session-button');
602
  const sendButton = document.getElementById('send-button');
603
  const chatMessages = document.getElementById('chat-messages');
604
  const statusDot = document.getElementById('status-dot');
605
  const statusText = document.getElementById('status-text');
606
  const searchToggle = document.getElementById('search-toggle');
 
607
  const textInput = document.getElementById('text-input');
608
  const historyList = document.getElementById('history-list');
609
+ const memoryList = document.getElementById('memory-list');
610
+ const userNameInput = document.getElementById('user-name');
611
+ const userAvatar = document.getElementById('user-avatar');
612
  let audioLevel = 0;
613
  let animationFrame;
614
  let audioContext, analyser, audioSource;
615
  let dataChannel = null;
616
  let isVoiceActive = false;
617
 
618
+ // Initialize user name
619
+ userNameInput.value = userName;
620
+ if (userName) {
621
+ userAvatar.textContent = userName.charAt(0).toUpperCase();
622
+ }
623
+
624
+ userNameInput.addEventListener('input', () => {
625
+ userName = userNameInput.value;
626
+ localStorage.setItem('userName', userName);
627
+ if (userName) {
628
+ userAvatar.textContent = userName.charAt(0).toUpperCase();
629
+ } else {
630
+ userAvatar.textContent = '👤';
631
+ }
632
+ });
633
+
634
  // Start new session
635
  async function startNewSession() {
636
  const response = await fetch('/session/new', { method: 'POST' });
 
638
  currentSessionId = data.session_id;
639
  console.log('New session started:', currentSessionId);
640
  loadHistory();
641
+ loadMemories();
642
+ }
643
+
644
+ // Load memories
645
+ async function loadMemories() {
646
+ try {
647
+ const response = await fetch('/memory/all');
648
+ const memories = await response.json();
649
+
650
+ userMemories = {};
651
+ memoryList.innerHTML = '';
652
+
653
+ memories.forEach(memory => {
654
+ if (!userMemories[memory.category]) {
655
+ userMemories[memory.category] = [];
656
+ }
657
+ userMemories[memory.category].push(memory.content);
658
+
659
+ const item = document.createElement('div');
660
+ item.className = 'memory-item';
661
+ item.innerHTML = `
662
+ <div class="memory-category">${memory.category}</div>
663
+ <div class="memory-content">${memory.content}</div>
664
+ `;
665
+ memoryList.appendChild(item);
666
+ });
667
+
668
+ console.log('Loaded memories:', userMemories);
669
+ } catch (error) {
670
+ console.error('Failed to load memories:', error);
671
+ }
672
+ }
673
+
674
+ // End session and update memories
675
+ async function endSession() {
676
+ if (!currentSessionId) return;
677
+
678
+ try {
679
+ addMessage('memory-update', '대화 내용을 분석하여 기억을 업데이트하고 있습니다...');
680
+
681
+ const response = await fetch('/session/end', {
682
+ method: 'POST',
683
+ headers: { 'Content-Type': 'application/json' },
684
+ body: JSON.stringify({ session_id: currentSessionId })
685
+ });
686
+
687
+ const result = await response.json();
688
+ if (result.status === 'ok') {
689
+ showToast('기억이 성공적으로 업데이트되었습니다.', 'success');
690
+ loadMemories();
691
+ startNewSession();
692
+ }
693
+ } catch (error) {
694
+ console.error('Failed to end session:', error);
695
+ showError('기억 업데이트 중 오류가 발생했습니다.');
696
+ }
697
  }
698
 
699
  // Load conversation history
 
740
  console.log('Web search enabled:', webSearchEnabled);
741
  });
742
 
 
 
 
 
 
743
  // Text input handling
744
  textInput.addEventListener('keypress', (e) => {
745
  if (e.key === 'Enter' && !e.shiftKey) {
 
749
  });
750
 
751
  sendButton.addEventListener('click', sendTextMessage);
752
+ endSessionButton.addEventListener('click', endSession);
753
 
754
  async function sendTextMessage() {
755
  const message = textInput.value.trim();
756
  if (!message) return;
757
 
758
+ // Check for stop words
759
+ const stopWords = ["중단", "그만", "스톱", "stop", "닥쳐", "멈춰", "중지"];
760
+ if (stopWords.some(word => message.toLowerCase().includes(word))) {
761
+ addMessage('assistant', '대화를 중단합니다.');
762
+ return;
763
+ }
764
+
765
  // Add user message to chat
766
  addMessage('user', message);
767
  textInput.value = '';
 
782
  body: JSON.stringify({
783
  message: message,
784
  web_search_enabled: webSearchEnabled,
785
+ session_id: currentSessionId,
786
+ user_name: userName,
787
+ memories: userMemories
788
  })
789
  });
790
 
 
817
  if (state === 'connected') {
818
  statusText.textContent = '연결됨';
819
  sendButton.style.display = 'block';
820
+ endSessionButton.style.display = 'block';
821
  isVoiceActive = true;
822
  } else if (state === 'connecting') {
823
  statusText.textContent = '연결 중...';
824
  sendButton.style.display = 'none';
825
+ endSessionButton.style.display = 'none';
826
  } else {
827
  statusText.textContent = '연결 대기 중';
828
+ sendButton.style.display = 'block';
829
+ endSessionButton.style.display = 'block';
830
  isVoiceActive = false;
831
  }
832
  }
833
+
834
+ function showToast(message, type = 'info') {
835
+ const toast = document.getElementById('error-toast');
836
+ toast.textContent = message;
837
+ toast.className = `toast ${type}`;
838
+ toast.style.display = 'block';
839
+ setTimeout(() => {
840
+ toast.style.display = 'none';
841
+ }, 5000);
842
+ }
843
+
844
+ function showError(message) {
845
+ showToast(message, 'error');
846
+ }
847
+
848
  function updateButtonState() {
849
  const button = document.getElementById('start-button');
850
  if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
 
874
  updateStatus('disconnected');
875
  }
876
  }
877
+
878
  function setupAudioVisualization(stream) {
879
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
880
  analyser = audioContext.createAnalyser();
 
909
 
910
  updateAudioLevel();
911
  }
912
+
 
 
 
 
 
 
 
 
913
  async function setupWebRTC() {
914
  const config = __RTC_CONFIGURATION__;
915
  peerConnection = new RTCPeerConnection(config);
916
  const timeoutId = setTimeout(() => {
917
+ showToast("연결이 평소보다 오래 걸리고 있습니다. VPN을 사용 중이신가요?", 'warning');
 
 
 
 
 
 
918
  }, 5000);
919
+
920
  try {
921
  const stream = await navigator.mediaDevices.getUserMedia({
922
  audio: true
 
959
  peerConnection.addEventListener("icegatheringstatechange", checkState);
960
  }
961
  });
962
+
963
  peerConnection.addEventListener('connectionstatechange', () => {
964
  console.log('connectionstatechange', peerConnection.connectionState);
965
  if (peerConnection.connectionState === 'connected') {
 
969
  }
970
  updateButtonState();
971
  });
 
972
 
973
+ webrtc_id = Math.random().toString(36).substring(7);
 
 
 
 
 
 
974
 
975
  const response = await fetch('/webrtc/offer', {
976
  method: 'POST',
 
980
  type: peerConnection.localDescription.type,
981
  webrtc_id: webrtc_id,
982
  web_search_enabled: webSearchEnabled,
983
+ session_id: currentSessionId,
984
+ user_name: userName,
985
+ memories: userMemories
986
  })
987
  });
988
+
989
  const serverResponse = await response.json();
990
  if (serverResponse.status === 'failed') {
991
  showError(serverResponse.meta.error === 'concurrency_limit_reached'
 
994
  stop();
995
  return;
996
  }
997
+
998
  await peerConnection.setRemoteDescription(serverResponse);
999
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
1000
  eventSource.addEventListener("output", (event) => {
 
1019
  stop();
1020
  }
1021
  }
1022
+
1023
  function addMessage(role, content, save = true) {
1024
  const messageDiv = document.createElement('div');
1025
  messageDiv.classList.add('message', role);
 
1033
  chatMessages.scrollTop = chatMessages.scrollHeight;
1034
 
1035
  // Save message to database if save flag is true
1036
+ if (save && currentSessionId && role !== 'memory-update' && role !== 'search-result') {
1037
  fetch('/message/save', {
1038
  method: 'POST',
1039
  headers: { 'Content-Type': 'application/json' },
 
1122
  webrtc_id = null;
1123
  }
1124
  }
1125
+
1126
  startButton.addEventListener('click', () => {
1127
  console.log('clicked');
1128
  console.log(peerConnection, peerConnection?.connectionState);
 
1137
  // Initialize on page load
1138
  window.addEventListener('DOMContentLoaded', () => {
1139
  sendButton.style.display = 'block';
1140
+ endSessionButton.style.display = 'block';
1141
  startNewSession();
1142
  loadHistory();
1143
+ loadMemories();
1144
  });
1145
  </script>
1146
  </body>
 
1190
 
1191
 
1192
  # Database helper class
1193
+ class PersonalAssistantDB:
1194
+ """Database manager for personal assistant"""
1195
 
1196
  @staticmethod
1197
  async def init():
1198
  """Initialize database tables"""
1199
  async with aiosqlite.connect(DB_PATH) as db:
1200
+ # Conversations table
1201
  await db.execute("""
1202
  CREATE TABLE IF NOT EXISTS conversations (
1203
  id TEXT PRIMARY KEY,
 
1207
  )
1208
  """)
1209
 
1210
+ # Messages table
1211
  await db.execute("""
1212
  CREATE TABLE IF NOT EXISTS messages (
1213
  id INTEGER PRIMARY KEY AUTOINCREMENT,
 
1220
  )
1221
  """)
1222
 
1223
+ # User memories table - stores personal information
1224
+ await db.execute("""
1225
+ CREATE TABLE IF NOT EXISTS user_memories (
1226
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1227
+ category TEXT NOT NULL,
1228
+ content TEXT NOT NULL,
1229
+ confidence REAL DEFAULT 1.0,
1230
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1231
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
1232
+ source_session_id TEXT,
1233
+ FOREIGN KEY (source_session_id) REFERENCES conversations(id)
1234
+ )
1235
+ """)
1236
+
1237
+ # Create indexes for better performance
1238
+ await db.execute("CREATE INDEX IF NOT EXISTS idx_memories_category ON user_memories(category)")
1239
+ await db.execute("CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id)")
1240
+
1241
  await db.commit()
1242
 
1243
  @staticmethod
 
1253
  @staticmethod
1254
  async def save_message(session_id: str, role: str, content: str):
1255
  """Save a message to the database"""
1256
+ # Check for None or empty content
1257
+ if not content:
1258
+ print(f"[SAVE_MESSAGE] Empty content for {role} message, skipping")
1259
+ return
1260
+
1261
  # Detect language
1262
  detected_language = None
1263
  try:
1264
+ if content and len(content) > 10:
1265
  detected_language = detect(content)
1266
+ except (LangDetectException, Exception) as e:
1267
+ print(f"Language detection error: {e}")
1268
 
1269
  async with aiosqlite.connect(DB_PATH) as db:
1270
  await db.execute(
 
1286
  (session_id,)
1287
  )
1288
  row = await cursor.fetchone()
1289
+ if row and not row[0]:
1290
  summary = content[:100] + "..." if len(content) > 100 else content
1291
  await db.execute(
1292
  "UPDATE conversations SET summary = ? WHERE id = ?",
 
1337
  }
1338
  for row in rows
1339
  ]
1340
+
1341
+ @staticmethod
1342
+ async def save_memory(category: str, content: str, session_id: str = None, confidence: float = 1.0):
1343
+ """Save or update a user memory"""
1344
+ async with aiosqlite.connect(DB_PATH) as db:
1345
+ # Check if similar memory exists
1346
+ cursor = await db.execute(
1347
+ """SELECT id, content FROM user_memories
1348
+ WHERE category = ? AND content LIKE ?
1349
+ LIMIT 1""",
1350
+ (category, f"%{content[:20]}%")
1351
+ )
1352
+ existing = await cursor.fetchone()
1353
+
1354
+ if existing:
1355
+ # Update existing memory
1356
+ await db.execute(
1357
+ """UPDATE user_memories
1358
+ SET content = ?, confidence = ?, updated_at = CURRENT_TIMESTAMP,
1359
+ source_session_id = ?
1360
+ WHERE id = ?""",
1361
+ (content, confidence, session_id, existing[0])
1362
+ )
1363
+ else:
1364
+ # Insert new memory
1365
+ await db.execute(
1366
+ """INSERT INTO user_memories (category, content, confidence, source_session_id)
1367
+ VALUES (?, ?, ?, ?)""",
1368
+ (category, content, confidence, session_id)
1369
+ )
1370
+
1371
+ await db.commit()
1372
+
1373
+ @staticmethod
1374
+ async def get_all_memories():
1375
+ """Get all user memories"""
1376
+ async with aiosqlite.connect(DB_PATH) as db:
1377
+ cursor = await db.execute(
1378
+ """SELECT category, content, confidence, updated_at
1379
+ FROM user_memories
1380
+ ORDER BY category, updated_at DESC"""
1381
+ )
1382
+ rows = await cursor.fetchall()
1383
+ return [
1384
+ {
1385
+ "category": row[0],
1386
+ "content": row[1],
1387
+ "confidence": row[2],
1388
+ "updated_at": row[3]
1389
+ }
1390
+ for row in rows
1391
+ ]
1392
+
1393
+ @staticmethod
1394
+ async def extract_and_save_memories(session_id: str):
1395
+ """Extract memories from conversation and save them"""
1396
+ # Get all messages from the session
1397
+ messages = await PersonalAssistantDB.get_conversation_messages(session_id)
1398
+
1399
+ if not messages:
1400
+ return
1401
+
1402
+ # Prepare conversation text for analysis
1403
+ conversation_text = "\n".join([
1404
+ f"{msg['role']}: {msg['content']}"
1405
+ for msg in messages if msg.get('content')
1406
+ ])
1407
+
1408
+ # Use GPT to extract memories
1409
+ client = openai.AsyncOpenAI()
1410
+
1411
+ try:
1412
+ response = await client.chat.completions.create(
1413
+ model="gpt-4.1-mini",
1414
+ messages=[
1415
+ {
1416
+ "role": "system",
1417
+ "content": """You are a memory extraction system. Extract personal information from conversations.
1418
+
1419
+ Categories to extract:
1420
+ - personal_info: 이름, 나이, 성별, 직업, 거주지
1421
+ - preferences: 좋아하는 것, 싫어하는 것, 취향
1422
+ - important_dates: 생일, 기념일, 중요한 날짜
1423
+ - relationships: 가족, 친구, 동료 관계
1424
+ - hobbies: 취미, 관심사
1425
+ - health: 건강 상태, 알레르기, 의료 정보
1426
+ - goals: 목표, 계획, 꿈
1427
+ - routines: 일상, 습관, 루틴
1428
+ - work: 직장, 업무, 프로젝트
1429
+ - education: 학력, 전공, 학습
1430
+
1431
+ Return as JSON array with format:
1432
+ [
1433
+ {
1434
+ "category": "category_name",
1435
+ "content": "extracted information in Korean",
1436
+ "confidence": 0.0-1.0
1437
+ }
1438
+ ]
1439
+
1440
+ Only extract clear, factual information. Do not make assumptions."""
1441
+ },
1442
+ {
1443
+ "role": "user",
1444
+ "content": f"Extract memories from this conversation:\n\n{conversation_text}"
1445
+ }
1446
+ ],
1447
+ temperature=0.3,
1448
+ max_tokens=2000
1449
+ )
1450
+
1451
+ # Parse and save memories
1452
+ memories_text = response.choices[0].message.content
1453
+
1454
+ # Extract JSON from response
1455
+ import re
1456
+ json_match = re.search(r'\[.*\]', memories_text, re.DOTALL)
1457
+ if json_match:
1458
+ memories = json.loads(json_match.group())
1459
+
1460
+ for memory in memories:
1461
+ if memory.get('content') and len(memory['content']) > 5:
1462
+ await PersonalAssistantDB.save_memory(
1463
+ category=memory.get('category', 'general'),
1464
+ content=memory['content'],
1465
+ session_id=session_id,
1466
+ confidence=memory.get('confidence', 0.8)
1467
+ )
1468
+
1469
+ print(f"Extracted and saved {len(memories)} memories from session {session_id}")
1470
+
1471
+ except Exception as e:
1472
+ print(f"Error extracting memories: {e}")
1473
 
1474
 
1475
  # Initialize search client globally
 
1489
  return chatbot
1490
 
1491
 
1492
+ def format_memories_for_prompt(memories: Dict[str, List[str]]) -> str:
1493
+ """Format memories for inclusion in system prompt"""
1494
+ if not memories:
1495
+ return ""
1496
+
1497
+ memory_text = "\n\n=== 기억된 정보 ===\n"
1498
+ for category, items in memories.items():
1499
+ if items and isinstance(items, list):
1500
+ memory_text += f"\n[{category}]\n"
1501
+ for item in items:
1502
+ if item: # Check if item is not None or empty
1503
+ memory_text += f"- {item}\n"
1504
+
1505
+ return memory_text
1506
+
1507
+
1508
+ async def process_text_chat(message: str, web_search_enabled: bool, session_id: str,
1509
+ user_name: str = "", memories: Dict = None) -> Dict[str, str]:
1510
  """Process text chat using GPT-4o-mini model"""
1511
  try:
1512
+ # Check for empty or None message
1513
+ if not message:
1514
+ return {"error": "메시지가 비어있습니다."}
1515
+
1516
+ # Check for stop words
1517
+ stop_words = ["중단", "그만", "스톱", "stop", "닥쳐", "멈춰", "중지"]
1518
+ if any(word in message.lower() for word in stop_words):
1519
+ return {
1520
+ "response": "대화를 중단합니다.",
1521
+ "detected_language": "ko"
1522
+ }
1523
+
1524
+ # Build system prompt with memories
1525
+ base_prompt = f"""You are a personal AI assistant for {user_name if user_name else 'the user'}.
1526
+ You remember all previous conversations and personal information about the user.
1527
+ Be friendly, helpful, and personalized in your responses.
1528
+ Always use the information you remember to make conversations more personal and relevant.
1529
+ IMPORTANT: Give only ONE response. Do not repeat or give multiple answers."""
1530
+
1531
+ # Add memories to prompt
1532
+ if memories:
1533
+ memory_text = format_memories_for_prompt(memories)
1534
+ base_prompt += memory_text
1535
+
1536
+ messages = [{"role": "system", "content": base_prompt}]
1537
 
1538
  # Handle web search if enabled
1539
+ if web_search_enabled and search_client and message:
 
1540
  search_keywords = ["날씨", "기온", "비", "눈", "뉴스", "소식", "현재", "최근",
1541
  "오늘", "지금", "가격", "환율", "주가", "weather", "news",
1542
  "current", "today", "price", "2024", "2025"]
 
1544
  should_search = any(keyword in message.lower() for keyword in search_keywords)
1545
 
1546
  if should_search:
 
1547
  search_results = await search_client.search(message)
1548
  if search_results:
1549
  search_context = "웹 검색 결과:\n\n"
 
1577
 
1578
  # Save messages to database
1579
  if session_id:
1580
+ await PersonalAssistantDB.save_message(session_id, "user", message)
1581
+ await PersonalAssistantDB.save_message(session_id, "assistant", response_text)
1582
 
1583
  return {
1584
  "response": response_text,
 
1591
 
1592
 
1593
  class OpenAIHandler(AsyncStreamHandler):
1594
+ def __init__(self, web_search_enabled: bool = False, webrtc_id: str = None,
1595
+ session_id: str = None, user_name: str = "", memories: Dict = None) -> None:
1596
  super().__init__(
1597
  expected_layout="mono",
1598
  output_sample_rate=SAMPLE_RATE,
 
1607
  self.current_call_id = None
1608
  self.webrtc_id = webrtc_id
1609
  self.web_search_enabled = web_search_enabled
 
1610
  self.session_id = session_id
1611
+ self.user_name = user_name
1612
+ self.memories = memories or {}
1613
+ self.is_responding = False # Track if already responding
1614
+ self.should_stop = False # Track if conversation should stop
1615
 
1616
+ print(f"[INIT] Handler created with web_search={web_search_enabled}, session_id={session_id}, user={user_name}")
1617
 
1618
  def copy(self):
 
1619
  if connection_settings:
 
1620
  recent_ids = sorted(connection_settings.keys(),
1621
  key=lambda k: connection_settings[k].get('timestamp', 0),
1622
  reverse=True)
 
1624
  recent_id = recent_ids[0]
1625
  settings = connection_settings[recent_id]
1626
 
 
1627
  print(f"[COPY] Copying settings from {recent_id}:")
1628
 
1629
  return OpenAIHandler(
1630
  web_search_enabled=settings.get('web_search_enabled', False),
 
1631
  webrtc_id=recent_id,
1632
+ session_id=settings.get('session_id'),
1633
+ user_name=settings.get('user_name', ''),
1634
+ memories=settings.get('memories', {})
1635
  )
1636
 
1637
  print(f"[COPY] No settings found, creating default handler")
 
1647
  if not results:
1648
  return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."
1649
 
 
1650
  formatted_results = []
1651
  for i, result in enumerate(results, 1):
1652
  formatted_results.append(
 
1671
 
1672
  async def start_up(self):
1673
  """Connect to realtime API"""
 
1674
  if connection_settings and self.webrtc_id:
1675
  if self.webrtc_id in connection_settings:
1676
  settings = connection_settings[self.webrtc_id]
1677
  self.web_search_enabled = settings.get('web_search_enabled', False)
 
1678
  self.session_id = settings.get('session_id')
1679
+ self.user_name = settings.get('user_name', '')
1680
+ self.memories = settings.get('memories', {})
1681
 
1682
  print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
1683
 
1684
  self.client = openai.AsyncOpenAI()
1685
 
 
1686
  print(f"[REALTIME API] Connecting...")
1687
 
1688
+ # Build system prompt with memories
1689
+ base_instructions = f"""You are a personal AI assistant for {self.user_name if self.user_name else 'the user'}.
1690
+ You remember all previous conversations and personal information about the user.
1691
+ Be friendly, helpful, and personalized in your responses.
1692
+ Always use the information you remember to make conversations more personal and relevant.
1693
+ IMPORTANT: Give only ONE response per user input. Do not repeat yourself or give multiple answers."""
1694
+
1695
+ # Add memories to prompt
1696
+ if self.memories:
1697
+ memory_text = format_memories_for_prompt(self.memories)
1698
+ base_instructions += memory_text
1699
+
1700
  # Define the web search function
1701
  tools = []
 
 
1702
  if self.web_search_enabled and self.search_client:
1703
  tools = [{
1704
  "type": "function",
 
1717
  }
1718
  }
1719
  }]
 
1720
 
1721
  search_instructions = (
1722
  "\n\nYou have web search capabilities. "
1723
+ "Use web_search for current information like weather, news, prices, etc."
 
 
 
 
 
 
 
 
 
1724
  )
1725
 
1726
  instructions = base_instructions + search_instructions
 
1730
  async with self.client.beta.realtime.connect(
1731
  model="gpt-4o-mini-realtime-preview-2024-12-17"
1732
  ) as conn:
 
1733
  session_update = {
1734
+ "turn_detection": {
1735
+ "type": "server_vad",
1736
+ "threshold": 0.5,
1737
+ "prefix_padding_ms": 300,
1738
+ "silence_duration_ms": 200
1739
+ },
1740
  "instructions": instructions,
1741
  "tools": tools,
1742
  "tool_choice": "auto" if tools else "none",
 
1746
  "voice": "alloy"
1747
  }
1748
 
1749
+ try:
1750
+ await conn.session.update(session=session_update)
1751
+ self.connection = conn
1752
+ print(f"Connected with tools: {len(tools)} functions")
1753
+ print(f"Session update successful")
1754
+ except Exception as e:
1755
+ print(f"Error updating session: {e}")
1756
+ raise
1757
 
1758
  async for event in self.connection:
1759
+ # Debug log for all events
1760
+ if hasattr(event, 'type'):
1761
+ if event.type not in ["response.audio.delta", "response.audio.done"]:
1762
+ print(f"[EVENT] Type: {event.type}")
1763
 
1764
+ # Handle user input audio transcription
1765
+ if event.type == "conversation.item.input_audio_transcription.completed":
1766
+ if hasattr(event, 'transcript') and event.transcript:
1767
+ user_text = event.transcript.lower()
1768
+ stop_words = ["중단", "그만", "스톱", "stop", "닥쳐", "멈춰", "중지"]
1769
+
1770
+ if any(word in user_text for word in stop_words):
1771
+ print(f"[STOP DETECTED] User said: {event.transcript}")
1772
+ self.should_stop = True
1773
+ if self.connection:
1774
+ try:
1775
+ await self.connection.response.cancel()
1776
+ except:
1777
+ pass
1778
+ continue
1779
+
1780
+ # Save user message to database
1781
+ if self.session_id:
1782
+ await PersonalAssistantDB.save_message(self.session_id, "user", event.transcript)
1783
+
1784
+ # Handle user transcription for stop detection (alternative event)
1785
+ elif event.type == "conversation.item.created":
1786
+ if hasattr(event, 'item') and hasattr(event.item, 'role') and event.item.role == "user":
1787
+ if hasattr(event.item, 'content') and event.item.content:
1788
+ for content_item in event.item.content:
1789
+ if hasattr(content_item, 'transcript') and content_item.transcript:
1790
+ user_text = content_item.transcript.lower()
1791
+ stop_words = ["중단", "그만", "스톱", "stop", "닥쳐", "멈춰", "중지"]
1792
+
1793
+ if any(word in user_text for word in stop_words):
1794
+ print(f"[STOP DETECTED] User said: {content_item.transcript}")
1795
+ self.should_stop = True
1796
+ if self.connection:
1797
+ try:
1798
+ await self.connection.response.cancel()
1799
+ except:
1800
+ pass
1801
+ continue
1802
+
1803
+ # Save user message to database
1804
+ if self.session_id:
1805
+ await PersonalAssistantDB.save_message(self.session_id, "user", content_item.transcript)
1806
+
1807
+ elif event.type == "response.audio_transcript.done":
1808
+ # Prevent multiple responses
1809
+ if self.is_responding:
1810
+ print("[DUPLICATE RESPONSE] Skipping duplicate response")
1811
+ continue
1812
+
1813
+ self.is_responding = True
1814
+ print(f"[RESPONSE] Transcript: {event.transcript[:100] if event.transcript else 'None'}...")
1815
 
1816
  # Detect language
1817
  detected_language = None
1818
  try:
1819
  if event.transcript and len(event.transcript) > 10:
1820
  detected_language = detect(event.transcript)
1821
+ except Exception as e:
1822
+ print(f"Language detection error: {e}")
1823
 
1824
  # Save to database
1825
+ if self.session_id and event.transcript:
1826
+ await PersonalAssistantDB.save_message(self.session_id, "assistant", event.transcript)
1827
 
1828
  output_data = {
1829
  "event": event,
 
1831
  }
1832
  await self.output_queue.put(AdditionalOutputs(output_data))
1833
 
1834
+ elif event.type == "response.done":
1835
+ # Reset responding flag when response is complete
1836
+ self.is_responding = False
1837
+ self.should_stop = False
1838
+ print("[RESPONSE DONE] Response completed")
1839
+
1840
  elif event.type == "response.audio.delta":
1841
+ # Check if we should stop
1842
+ if self.should_stop:
1843
+ continue
1844
+
1845
+ if hasattr(event, 'delta'):
1846
+ await self.output_queue.put(
1847
+ (
1848
+ self.output_sample_rate,
1849
+ np.frombuffer(
1850
+ base64.b64decode(event.delta), dtype=np.int16
1851
+ ).reshape(1, -1),
1852
+ ),
1853
+ )
1854
+
1855
+ # Handle errors
1856
+ elif event.type == "error":
1857
+ print(f"[ERROR] {event}")
1858
+ self.is_responding = False
1859
 
1860
  # Handle function calls
1861
  elif event.type == "response.function_call_arguments.start":
 
1908
  print(f"[RECEIVE] No connection, skipping")
1909
  return
1910
  try:
1911
+ if frame is None or len(frame) < 2:
1912
+ print(f"[RECEIVE] Invalid frame")
1913
+ return
1914
+
1915
  _, array = frame
1916
+ if array is None:
1917
+ print(f"[RECEIVE] Null array")
1918
+ return
1919
+
1920
  array = array.squeeze()
1921
  audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
1922
  await self.connection.input_audio_buffer.append(audio=audio_message)
 
1926
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
1927
  item = await wait_for_item(self.output_queue)
1928
 
 
1929
  if isinstance(item, dict) and item.get('type') == 'text_message':
1930
  await self.process_text_message(item['content'])
1931
  return None
 
1949
 
1950
  # Create stream with handler instance
1951
  stream = Stream(
1952
+ handler,
1953
  mode="send-receive",
1954
  modality="audio",
1955
  additional_inputs=[chatbot],
 
1968
  # Initialize database on startup
1969
  @app.on_event("startup")
1970
  async def startup_event():
1971
+ try:
1972
+ await PersonalAssistantDB.init()
1973
+ print(f"Database initialized at: {DB_PATH}")
1974
+ print(f"Persistent directory: {PERSISTENT_DIR}")
1975
+ print(f"DB file exists: {os.path.exists(DB_PATH)}")
1976
+
1977
+ # Check if we're in Hugging Face Space
1978
+ if os.path.exists("/data"):
1979
+ print("Running in Hugging Face Space with persistent storage")
1980
+ # List files in persistent directory
1981
+ try:
1982
+ files = os.listdir(PERSISTENT_DIR)
1983
+ print(f"Files in persistent directory: {files}")
1984
+ except Exception as e:
1985
+ print(f"Error listing files: {e}")
1986
+ except Exception as e:
1987
+ print(f"Error during startup: {e}")
1988
+ # Try to create directory if it doesn't exist
1989
+ os.makedirs(PERSISTENT_DIR, exist_ok=True)
1990
+ await PersonalAssistantDB.init()
1991
 
1992
  # Intercept offer to capture settings
1993
  @app.post("/webrtc/offer", include_in_schema=False)
 
1997
 
1998
  webrtc_id = body.get("webrtc_id")
1999
  web_search_enabled = body.get("web_search_enabled", False)
 
2000
  session_id = body.get("session_id")
2001
+ user_name = body.get("user_name", "")
2002
+ memories = body.get("memories", {})
2003
 
2004
  print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
2005
  print(f"[OFFER] web_search_enabled: {web_search_enabled}")
2006
  print(f"[OFFER] session_id: {session_id}")
2007
+ print(f"[OFFER] user_name: {user_name}")
2008
 
2009
  # Store settings with timestamp
2010
  if webrtc_id:
2011
  connection_settings[webrtc_id] = {
2012
  'web_search_enabled': web_search_enabled,
 
2013
  'session_id': session_id,
2014
+ 'user_name': user_name,
2015
+ 'memories': memories,
2016
  'timestamp': asyncio.get_event_loop().time()
2017
  }
2018
 
2019
+ print(f"[OFFER] Stored settings for {webrtc_id}")
 
2020
 
2021
  # Remove our custom route temporarily
2022
  custom_route = None
 
2042
  async def create_new_session():
2043
  """Create a new chat session"""
2044
  session_id = str(uuid.uuid4())
2045
+ await PersonalAssistantDB.create_session(session_id)
2046
  return {"session_id": session_id}
2047
 
2048
 
2049
+ @app.post("/session/end")
2050
+ async def end_session(request: Request):
2051
+ """End session and extract memories"""
2052
+ body = await request.json()
2053
+ session_id = body.get("session_id")
2054
+
2055
+ if not session_id:
2056
+ return {"error": "session_id required"}
2057
+
2058
+ # Extract and save memories from the conversation
2059
+ await PersonalAssistantDB.extract_and_save_memories(session_id)
2060
+
2061
+ return {"status": "ok"}
2062
+
2063
+
2064
  @app.post("/message/save")
2065
  async def save_message(request: Request):
2066
  """Save a message to the database"""
 
2072
  if not all([session_id, role, content]):
2073
  return {"error": "Missing required fields"}
2074
 
2075
+ await PersonalAssistantDB.save_message(session_id, role, content)
2076
  return {"status": "ok"}
2077
 
2078
 
2079
  @app.get("/history/recent")
2080
  async def get_recent_history():
2081
  """Get recent conversation history"""
2082
+ conversations = await PersonalAssistantDB.get_recent_conversations()
2083
  return conversations
2084
 
2085
 
2086
  @app.get("/history/{session_id}")
2087
  async def get_conversation(session_id: str):
2088
  """Get messages for a specific conversation"""
2089
+ messages = await PersonalAssistantDB.get_conversation_messages(session_id)
2090
  return messages
2091
 
2092
 
2093
+ @app.get("/memory/all")
2094
+ async def get_all_memories():
2095
+ """Get all user memories"""
2096
+ memories = await PersonalAssistantDB.get_all_memories()
2097
+ return memories
2098
+
2099
+
2100
  @app.post("/chat/text")
2101
  async def chat_text(request: Request):
2102
  """Handle text chat messages using GPT-4o-mini"""
 
2104
  body = await request.json()
2105
  message = body.get("message", "")
2106
  web_search_enabled = body.get("web_search_enabled", False)
 
2107
  session_id = body.get("session_id")
2108
+ user_name = body.get("user_name", "")
2109
+ memories = body.get("memories", {})
2110
 
2111
  if not message:
2112
  return {"error": "메시지가 비어있습니다."}
2113
 
2114
  # Process text chat
2115
+ result = await process_text_chat(message, web_search_enabled, session_id, user_name, memories)
2116
 
2117
  return result
2118