seawolf2357 commited on
Commit
1cff142
·
verified ·
1 Parent(s): e9bd082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -337
app.py CHANGED
@@ -20,12 +20,6 @@ from openai.types.beta.realtime import ResponseAudioTranscriptDoneEvent
20
  import httpx
21
  from typing import Optional, List, Dict
22
  import gradio as gr
23
- import logging
24
- from datetime import datetime
25
-
26
- # 로깅 설정
27
- logging.basicConfig(level=logging.INFO)
28
- logger = logging.getLogger(__name__)
29
 
30
  load_dotenv()
31
 
@@ -389,9 +383,6 @@ HTML_CONTENT = """<!DOCTYPE html>
389
  let peerConnection;
390
  let webrtc_id;
391
  let webSearchEnabled = false;
392
- let reconnectAttempts = 0;
393
- let heartbeatInterval;
394
- let connectionHealthInterval;
395
  const audioOutput = document.getElementById('audio-output');
396
  const startButton = document.getElementById('start-button');
397
  const chatMessages = document.getElementById('chat-messages');
@@ -419,7 +410,6 @@ HTML_CONTENT = """<!DOCTYPE html>
419
  statusText.textContent = '연결 대기 중';
420
  }
421
  }
422
-
423
  function updateButtonState() {
424
  const button = document.getElementById('start-button');
425
  if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
@@ -449,7 +439,6 @@ HTML_CONTENT = """<!DOCTYPE html>
449
  updateStatus('disconnected');
450
  }
451
  }
452
-
453
  function setupAudioVisualization(stream) {
454
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
455
  analyser = audioContext.createAnalyser();
@@ -484,7 +473,6 @@ HTML_CONTENT = """<!DOCTYPE html>
484
 
485
  updateAudioLevel();
486
  }
487
-
488
  function showError(message) {
489
  const toast = document.getElementById('error-toast');
490
  toast.textContent = message;
@@ -494,46 +482,9 @@ HTML_CONTENT = """<!DOCTYPE html>
494
  toast.style.display = 'none';
495
  }, 5000);
496
  }
497
-
498
- // 연결 상태 모니터링 함수
499
- function startConnectionHealthCheck() {
500
- if (connectionHealthInterval) {
501
- clearInterval(connectionHealthInterval);
502
- }
503
-
504
- connectionHealthInterval = setInterval(() => {
505
- if (peerConnection) {
506
- const state = peerConnection.connectionState;
507
- const iceState = peerConnection.iceConnectionState;
508
- console.log(`Connection state: ${state}, ICE state: ${iceState}`);
509
-
510
- if (state === 'failed' || state === 'closed' || iceState === 'failed') {
511
- console.log('Connection lost, attempting to reconnect...');
512
- handleConnectionLoss();
513
- }
514
- }
515
- }, 3000); // 3초마다 체크
516
- }
517
-
518
- // 연결 손실 처리
519
- function handleConnectionLoss() {
520
- if (reconnectAttempts < 3) {
521
- reconnectAttempts++;
522
- showError(`연결이 끊어졌습니다. 재연결 시도 중... (${reconnectAttempts}/3)`);
523
- stop();
524
- setTimeout(() => {
525
- setupWebRTC();
526
- }, 2000);
527
- } else {
528
- showError('연결을 복구할 수 없습니다. 새로고침 후 다시 시도해주세요.');
529
- stop();
530
- }
531
- }
532
-
533
  async function setupWebRTC() {
534
  const config = __RTC_CONFIGURATION__;
535
  peerConnection = new RTCPeerConnection(config);
536
-
537
  const timeoutId = setTimeout(() => {
538
  const toast = document.getElementById('error-toast');
539
  toast.textContent = "연결이 평소보다 오래 걸리고 있습니다. VPN을 사용 중이신가요?";
@@ -543,62 +494,29 @@ HTML_CONTENT = """<!DOCTYPE html>
543
  toast.style.display = 'none';
544
  }, 5000);
545
  }, 5000);
546
-
547
  try {
548
  const stream = await navigator.mediaDevices.getUserMedia({
549
- audio: {
550
- echoCancellation: true, // 에코 제거
551
- noiseSuppression: true, // 노이즈 제거
552
- autoGainControl: true, // 자동 게인 제어
553
- sampleRate: 24000
554
- }
555
  });
556
  setupAudioVisualization(stream);
557
  stream.getTracks().forEach(track => {
558
  peerConnection.addTrack(track, stream);
559
  });
560
-
561
  peerConnection.addEventListener('track', (evt) => {
562
  if (audioOutput.srcObject !== evt.streams[0]) {
563
  audioOutput.srcObject = evt.streams[0];
564
- // 자동 재생 시 볼륨 조절로 피드백 방지
565
- audioOutput.volume = 0.8;
566
- audioOutput.play().catch(e => {
567
- console.error('Audio play error:', e);
568
- });
569
  }
570
  });
571
-
572
  const dataChannel = peerConnection.createDataChannel('text');
573
-
574
- // Heartbeat 메시지 전송
575
- dataChannel.onopen = () => {
576
- console.log('Data channel opened');
577
- if (heartbeatInterval) clearInterval(heartbeatInterval);
578
- heartbeatInterval = setInterval(() => {
579
- if (dataChannel.readyState === 'open') {
580
- dataChannel.send(JSON.stringify({ type: 'heartbeat' }));
581
- }
582
- }, 30000); // 30초마다 heartbeat
583
- };
584
-
585
  dataChannel.onmessage = (event) => {
586
  const eventJson = JSON.parse(event.data);
587
  if (eventJson.type === "error") {
588
  showError(eventJson.message);
589
- } else if (eventJson.type === "connection_lost") {
590
- handleConnectionLoss();
591
  }
592
  };
593
-
594
- dataChannel.onclose = () => {
595
- console.log('Data channel closed');
596
- if (heartbeatInterval) clearInterval(heartbeatInterval);
597
- };
598
-
599
  const offer = await peerConnection.createOffer();
600
  await peerConnection.setLocalDescription(offer);
601
-
602
  await new Promise((resolve) => {
603
  if (peerConnection.iceGatheringState === "complete") {
604
  resolve();
@@ -612,31 +530,15 @@ HTML_CONTENT = """<!DOCTYPE html>
612
  peerConnection.addEventListener("icegatheringstatechange", checkState);
613
  }
614
  });
615
-
616
- // 모든 연결 상태 이벤트 모니터링
617
  peerConnection.addEventListener('connectionstatechange', () => {
618
  console.log('connectionstatechange', peerConnection.connectionState);
619
  if (peerConnection.connectionState === 'connected') {
620
  clearTimeout(timeoutId);
621
  const toast = document.getElementById('error-toast');
622
  toast.style.display = 'none';
623
- reconnectAttempts = 0;
624
- startConnectionHealthCheck();
625
- } else if (peerConnection.connectionState === 'failed') {
626
- handleConnectionLoss();
627
  }
628
  updateButtonState();
629
  });
630
-
631
- peerConnection.addEventListener('iceconnectionstatechange', () => {
632
- console.log('ICE connection state:', peerConnection.iceConnectionState);
633
- if (peerConnection.iceConnectionState === 'disconnected') {
634
- showError('네트워크 연결이 불안정합니다');
635
- } else if (peerConnection.iceConnectionState === 'failed') {
636
- handleConnectionLoss();
637
- }
638
- });
639
-
640
  webrtc_id = Math.random().toString(36).substring(7);
641
  const response = await fetch('/webrtc/offer', {
642
  method: 'POST',
@@ -648,7 +550,6 @@ HTML_CONTENT = """<!DOCTYPE html>
648
  web_search_enabled: webSearchEnabled
649
  })
650
  });
651
-
652
  const serverResponse = await response.json();
653
  if (serverResponse.status === 'failed') {
654
  showError(serverResponse.meta.error === 'concurrency_limit_reached'
@@ -657,27 +558,18 @@ HTML_CONTENT = """<!DOCTYPE html>
657
  stop();
658
  return;
659
  }
660
-
661
  await peerConnection.setRemoteDescription(serverResponse);
662
-
663
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
664
  eventSource.addEventListener("output", (event) => {
665
  const eventJson = JSON.parse(event.data);
666
  addMessage("assistant", eventJson.content);
667
  });
668
-
669
  eventSource.addEventListener("search", (event) => {
670
  const eventJson = JSON.parse(event.data);
671
  if (eventJson.query) {
672
  addMessage("search-result", `웹 검색 중: "${eventJson.query}"`);
673
  }
674
  });
675
-
676
- eventSource.addEventListener("error", (event) => {
677
- console.error('EventSource error:', event);
678
- handleConnectionLoss();
679
- });
680
-
681
  } catch (err) {
682
  clearTimeout(timeoutId);
683
  console.error('Error setting up WebRTC:', err);
@@ -685,34 +577,14 @@ HTML_CONTENT = """<!DOCTYPE html>
685
  stop();
686
  }
687
  }
688
-
689
- let lastMessageContent = '';
690
- let lastMessageTime = 0;
691
-
692
  function addMessage(role, content) {
693
- // 중복 메시지 방지
694
- const now = Date.now();
695
- if (content === lastMessageContent && (now - lastMessageTime) < 1000) {
696
- return; // 1초 이내 같은 메시지 무시
697
- }
698
-
699
- lastMessageContent = content;
700
- lastMessageTime = now;
701
-
702
  const messageDiv = document.createElement('div');
703
  messageDiv.classList.add('message', role);
704
  messageDiv.textContent = content;
705
  chatMessages.appendChild(messageDiv);
706
  chatMessages.scrollTop = chatMessages.scrollHeight;
707
  }
708
-
709
  function stop() {
710
- if (heartbeatInterval) {
711
- clearInterval(heartbeatInterval);
712
- }
713
- if (connectionHealthInterval) {
714
- clearInterval(connectionHealthInterval);
715
- }
716
  if (animationFrame) {
717
  cancelAnimationFrame(animationFrame);
718
  }
@@ -741,7 +613,6 @@ HTML_CONTENT = """<!DOCTYPE html>
741
  updateButtonState();
742
  audioLevel = 0;
743
  }
744
-
745
  startButton.addEventListener('click', () => {
746
  console.log('clicked');
747
  console.log(peerConnection, peerConnection?.connectionState);
@@ -795,14 +666,14 @@ class BraveSearchClient:
795
  })
796
  return results
797
  except Exception as e:
798
- logger.error(f"Brave Search error: {e}")
799
  return []
800
 
801
 
802
  # Initialize search client globally
803
  brave_api_key = os.getenv("BSEARCH_API")
804
  search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
805
- logger.info(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
806
 
807
  # Store web search settings by connection
808
  web_search_settings = {}
@@ -828,12 +699,7 @@ class OpenAIHandler(AsyncStreamHandler):
828
  self.current_call_id = None
829
  self.webrtc_id = webrtc_id
830
  self.web_search_enabled = web_search_enabled
831
- self.keep_alive_task = None
832
- self.last_activity = datetime.now()
833
- self.connection_active = True
834
- self.response_in_progress = False # 응답 중복 방지
835
- self.last_response_time = datetime.now() # 마지막 응답 시간
836
- logger.info(f"Handler created with web_search_enabled={web_search_enabled}, webrtc_id={webrtc_id}")
837
 
838
  def copy(self):
839
  # Get the most recent settings
@@ -846,10 +712,10 @@ class OpenAIHandler(AsyncStreamHandler):
846
  recent_id = recent_ids[0]
847
  settings = web_search_settings[recent_id]
848
  web_search_enabled = settings.get('enabled', False)
849
- logger.info(f"Handler.copy() using recent settings - webrtc_id={recent_id}, web_search_enabled={web_search_enabled}")
850
  return OpenAIHandler(web_search_enabled=web_search_enabled, webrtc_id=recent_id)
851
 
852
- logger.info(f"Handler.copy() called - creating new handler with default settings")
853
  return OpenAIHandler(web_search_enabled=False)
854
 
855
  async def search_web(self, query: str) -> str:
@@ -857,7 +723,7 @@ class OpenAIHandler(AsyncStreamHandler):
857
  if not self.search_client or not self.web_search_enabled:
858
  return "웹 검색이 비활성화되어 있습니다."
859
 
860
- logger.info(f"Searching web for: {query}")
861
  results = await self.search_client.search(query)
862
  if not results:
863
  return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."
@@ -873,31 +739,6 @@ class OpenAIHandler(AsyncStreamHandler):
873
 
874
  return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)
875
 
876
- async def keep_alive(self):
877
- """Keep the connection alive with periodic activity checks"""
878
- while self.connection_active:
879
- try:
880
- await asyncio.sleep(30) # 30초마다 체크
881
-
882
- # 마지막 활동으로부터 5분이 지났는지 확인
883
- inactive_time = (datetime.now() - self.last_activity).total_seconds()
884
- if inactive_time > 300: # 5분
885
- logger.warning(f"Connection inactive for {inactive_time} seconds")
886
-
887
- # 연결 상태 확인
888
- if self.connection:
889
- logger.debug("Connection alive - sending keepalive")
890
- # OpenAI 연결은 자동으로 유지됨
891
- else:
892
- logger.error("Connection lost in keep_alive")
893
- self.connection_active = False
894
- break
895
-
896
- except Exception as e:
897
- logger.error(f"Keep-alive error: {e}")
898
- self.connection_active = False
899
- break
900
-
901
  async def start_up(self):
902
  """Connect to realtime API with function calling enabled"""
903
  # First check if we have the most recent settings
@@ -910,23 +751,14 @@ class OpenAIHandler(AsyncStreamHandler):
910
  settings = web_search_settings[recent_id]
911
  self.web_search_enabled = settings.get('enabled', False)
912
  self.webrtc_id = recent_id
913
- logger.info(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, web_search_enabled={self.web_search_enabled}")
914
 
915
- logger.info(f"Starting up handler with web_search_enabled={self.web_search_enabled}")
916
  self.client = openai.AsyncOpenAI()
917
- self.connection_active = True
918
 
919
  # Define the web search function
920
  tools = []
921
- instructions = (
922
- "You are a helpful assistant. Respond in Korean when the user speaks Korean. "
923
- "IMPORTANT RULES:\n"
924
- "1. Wait for the user to finish speaking before responding\n"
925
- "2. Keep responses concise and to the point\n"
926
- "3. Do not continue speaking if the user interrupts\n"
927
- "4. Stop immediately when you finish answering the question\n"
928
- "5. Do not add unnecessary elaboration or follow-up questions unless asked"
929
- )
930
 
931
  if self.web_search_enabled and self.search_client:
932
  tools = [{
@@ -946,17 +778,11 @@ class OpenAIHandler(AsyncStreamHandler):
946
  }
947
  }
948
  }]
949
- logger.info("Web search function added to tools")
950
 
951
  instructions = (
952
  "You are a helpful assistant with web search capabilities. "
953
- "IMPORTANT RULES:\n"
954
- "1. Wait for the user to finish speaking before responding\n"
955
- "2. Keep responses concise and to the point\n"
956
- "3. Do not continue speaking if the user interrupts\n"
957
- "4. Stop immediately when you finish answering the question\n"
958
- "5. Do not add unnecessary elaboration or follow-up questions unless asked\n\n"
959
- "WEB SEARCH RULES: You MUST use the web_search function for ANY of these topics:\n"
960
  "- Weather (날씨, 기온, 비, 눈)\n"
961
  "- News (뉴스, 소식)\n"
962
  "- Current events (현재, 최근, 오늘, 지금)\n"
@@ -964,167 +790,105 @@ class OpenAIHandler(AsyncStreamHandler):
964
  "- Sports scores or results\n"
965
  "- Any question about 2024 or 2025\n"
966
  "- Any time-sensitive information\n\n"
967
- "When in doubt, USE web_search for accuracy. Always respond in Korean when the user speaks Korean."
 
968
  )
969
 
970
- try:
971
- async with self.client.beta.realtime.connect(
972
- model="gpt-4o-mini-realtime-preview-2024-12-17"
973
- ) as conn:
974
- # Update session with tools - VAD 설정 강화
975
- session_update = {
976
- "turn_detection": {
977
- "type": "server_vad",
978
- "threshold": 0.5, # 민감도 조정
979
- "prefix_padding_ms": 300, # 음성 시작 전 패딩
980
- "silence_duration_ms": 500 # 침묵 감지 시간 증가
981
- },
982
- "instructions": instructions,
983
- "tools": tools,
984
- "tool_choice": "auto" if tools else "none",
985
- "voice": "echo", # 음성 설정
986
- "temperature": 0.7, # 응답 다양성 감소
987
- "max_response_output_tokens": "inf" # 무한 응답 방지
988
- }
989
 
990
- await conn.session.update(session=session_update)
991
- self.connection = conn
992
- self.last_activity = datetime.now()
993
- logger.info(f"Connected with tools: {len(tools)} functions")
994
 
995
- # Start keep-alive task
996
- self.keep_alive_task = asyncio.create_task(self.keep_alive())
 
 
 
 
 
 
 
997
 
998
- async for event in self.connection:
999
- self.last_activity = datetime.now()
1000
-
1001
- # Debug logging
1002
- if event.type in ["response.audio_transcript.done", "response.done", "response.created", "input_audio_buffer.speech_started", "input_audio_buffer.speech_stopped"]:
1003
- logger.info(f"Event: {event.type}")
1004
-
1005
- # 사용자 음성 감지
1006
- if event.type == "input_audio_buffer.speech_started":
1007
- logger.info("User started speaking")
1008
- self.response_in_progress = False # 사용자가 말하면 응답 중단
1009
-
1010
- # 응답 시작/종료 추적
1011
- if event.type == "response.created":
1012
- self.response_in_progress = True
1013
- self.last_response_time = datetime.now()
1014
- logger.info("Response started")
1015
-
1016
- if event.type == "response.done":
1017
- self.response_in_progress = False
1018
- logger.info("Response completed")
1019
-
1020
- if event.type == "response.audio_transcript.done":
1021
- await self.output_queue.put(AdditionalOutputs(event))
1022
-
1023
- elif event.type == "response.audio.delta":
1024
- # 응답 중복 방지 - 너무 빠른 연속 응답 차단
1025
- time_since_last = (datetime.now() - self.last_response_time).total_seconds()
1026
- if time_since_last < 0.1: # 100ms 이내 응답 무시
1027
- continue
1028
 
1029
- await self.output_queue.put(
1030
- (
1031
- self.output_sample_rate,
1032
- np.frombuffer(
1033
- base64.b64decode(event.delta), dtype=np.int16
1034
- ).reshape(1, -1),
1035
- ),
1036
- )
1037
-
1038
- # Handle function calls
1039
- elif event.type == "response.function_call_arguments.start":
1040
- logger.info(f"Function call started")
1041
- self.function_call_in_progress = True
1042
- self.current_function_args = ""
1043
- self.current_call_id = getattr(event, 'call_id', None)
1044
-
1045
- elif event.type == "response.function_call_arguments.delta":
1046
- if self.function_call_in_progress:
1047
- self.current_function_args += event.delta
1048
-
1049
- elif event.type == "response.function_call_arguments.done":
1050
- if self.function_call_in_progress:
1051
- logger.info(f"Function call done, args: {self.current_function_args}")
1052
- try:
1053
- args = json.loads(self.current_function_args)
1054
- query = args.get("query", "")
1055
-
1056
- # Emit search event to client
1057
- await self.output_queue.put(AdditionalOutputs({
1058
- "type": "search",
1059
- "query": query
1060
- }))
1061
-
1062
- # Perform the search
1063
- search_results = await self.search_web(query)
1064
- logger.info(f"Search results length: {len(search_results)}")
1065
-
1066
- # Send function result back to the model
1067
- if self.connection and self.current_call_id:
1068
- await self.connection.conversation.item.create(
1069
- item={
1070
- "type": "function_call_output",
1071
- "call_id": self.current_call_id,
1072
- "output": search_results
1073
- }
1074
- )
1075
- # response.create() 제거 - 자동으로 응답 생성됨
1076
 
1077
- except Exception as e:
1078
- logger.error(f"Function call error: {e}")
1079
- finally:
1080
- self.function_call_in_progress = False
1081
- self.current_function_args = ""
1082
- self.current_call_id = None
1083
-
1084
- except Exception as e:
1085
- logger.error(f"Connection error in start_up: {e}")
1086
- self.connection_active = False
1087
- # 연결 오류를 클라이언트에 알림
1088
- await self.output_queue.put(AdditionalOutputs({
1089
- "type": "connection_lost",
1090
- "message": "서버 연결이 끊어졌습니다"
1091
- }))
 
 
 
 
 
 
1092
 
1093
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
1094
- if not self.connection or not self.connection_active:
1095
  return
1096
  try:
1097
- self.last_activity = datetime.now()
1098
  _, array = frame
1099
  array = array.squeeze()
1100
  audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
1101
  await self.connection.input_audio_buffer.append(audio=audio_message)
1102
  except Exception as e:
1103
- logger.error(f"Error in receive: {e}")
1104
- # 연결이 ���어진 경우 상태 업데이트
1105
- if "closed" in str(e).lower() or "connection" in str(e).lower():
1106
- self.connection = None
1107
- self.connection_active = False
1108
- # 클라이언트에 연결 종료 알림
1109
- await self.output_queue.put(AdditionalOutputs({
1110
- "type": "connection_lost",
1111
- "message": "연결이 종료되었습니다"
1112
- }))
1113
 
1114
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
1115
  return await wait_for_item(self.output_queue)
1116
 
1117
  async def shutdown(self) -> None:
1118
- logger.info("Shutting down handler")
1119
- self.connection_active = False
1120
-
1121
- if self.keep_alive_task:
1122
- self.keep_alive_task.cancel()
1123
- try:
1124
- await self.keep_alive_task
1125
- except asyncio.CancelledError:
1126
- pass
1127
-
1128
  if self.connection:
1129
  await self.connection.close()
1130
  self.connection = None
@@ -1136,7 +900,7 @@ handler = OpenAIHandler(web_search_enabled=False)
1136
  # Create components
1137
  chatbot = gr.Chatbot(type="messages")
1138
 
1139
- # Create stream with handler instance - 시간 제한 제거
1140
  stream = Stream(
1141
  handler, # Pass instance, not factory
1142
  mode="send-receive",
@@ -1146,7 +910,7 @@ stream = Stream(
1146
  additional_outputs_handler=update_chatbot,
1147
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
1148
  concurrency_limit=5 if get_space() else None,
1149
- time_limit=None, # 시간 제한 제거
1150
  )
1151
 
1152
  app = FastAPI()
@@ -1163,7 +927,7 @@ async def custom_offer(request: Request):
1163
  webrtc_id = body.get("webrtc_id")
1164
  web_search_enabled = body.get("web_search_enabled", False)
1165
 
1166
- logger.info(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}")
1167
 
1168
  # Store settings with timestamp
1169
  if webrtc_id:
@@ -1195,13 +959,9 @@ async def outputs(webrtc_id: str):
1195
  async def output_stream():
1196
  async for output in stream.output_stream(webrtc_id):
1197
  if hasattr(output, 'args') and output.args:
1198
- # Check if it's a search event or connection lost event
1199
- if isinstance(output.args[0], dict):
1200
- event_type = output.args[0].get('type')
1201
- if event_type == 'search':
1202
- yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
1203
- elif event_type == 'connection_lost':
1204
- yield f"event: error\ndata: {json.dumps(output.args[0])}\n\n"
1205
  # Regular transcript event
1206
  elif hasattr(output.args[0], 'transcript'):
1207
  s = json.dumps({"role": "assistant", "content": output.args[0].transcript})
 
20
  import httpx
21
  from typing import Optional, List, Dict
22
  import gradio as gr
 
 
 
 
 
 
23
 
24
  load_dotenv()
25
 
 
383
  let peerConnection;
384
  let webrtc_id;
385
  let webSearchEnabled = false;
 
 
 
386
  const audioOutput = document.getElementById('audio-output');
387
  const startButton = document.getElementById('start-button');
388
  const chatMessages = document.getElementById('chat-messages');
 
410
  statusText.textContent = '연결 대기 중';
411
  }
412
  }
 
413
  function updateButtonState() {
414
  const button = document.getElementById('start-button');
415
  if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
 
439
  updateStatus('disconnected');
440
  }
441
  }
 
442
  function setupAudioVisualization(stream) {
443
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
444
  analyser = audioContext.createAnalyser();
 
473
 
474
  updateAudioLevel();
475
  }
 
476
  function showError(message) {
477
  const toast = document.getElementById('error-toast');
478
  toast.textContent = message;
 
482
  toast.style.display = 'none';
483
  }, 5000);
484
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  async function setupWebRTC() {
486
  const config = __RTC_CONFIGURATION__;
487
  peerConnection = new RTCPeerConnection(config);
 
488
  const timeoutId = setTimeout(() => {
489
  const toast = document.getElementById('error-toast');
490
  toast.textContent = "연결이 평소보다 오래 걸리고 있습니다. VPN을 사용 중이신가요?";
 
494
  toast.style.display = 'none';
495
  }, 5000);
496
  }, 5000);
 
497
  try {
498
  const stream = await navigator.mediaDevices.getUserMedia({
499
+ audio: true
 
 
 
 
 
500
  });
501
  setupAudioVisualization(stream);
502
  stream.getTracks().forEach(track => {
503
  peerConnection.addTrack(track, stream);
504
  });
 
505
  peerConnection.addEventListener('track', (evt) => {
506
  if (audioOutput.srcObject !== evt.streams[0]) {
507
  audioOutput.srcObject = evt.streams[0];
508
+ audioOutput.play();
 
 
 
 
509
  }
510
  });
 
511
  const dataChannel = peerConnection.createDataChannel('text');
 
 
 
 
 
 
 
 
 
 
 
 
512
  dataChannel.onmessage = (event) => {
513
  const eventJson = JSON.parse(event.data);
514
  if (eventJson.type === "error") {
515
  showError(eventJson.message);
 
 
516
  }
517
  };
 
 
 
 
 
 
518
  const offer = await peerConnection.createOffer();
519
  await peerConnection.setLocalDescription(offer);
 
520
  await new Promise((resolve) => {
521
  if (peerConnection.iceGatheringState === "complete") {
522
  resolve();
 
530
  peerConnection.addEventListener("icegatheringstatechange", checkState);
531
  }
532
  });
 
 
533
  peerConnection.addEventListener('connectionstatechange', () => {
534
  console.log('connectionstatechange', peerConnection.connectionState);
535
  if (peerConnection.connectionState === 'connected') {
536
  clearTimeout(timeoutId);
537
  const toast = document.getElementById('error-toast');
538
  toast.style.display = 'none';
 
 
 
 
539
  }
540
  updateButtonState();
541
  });
 
 
 
 
 
 
 
 
 
 
542
  webrtc_id = Math.random().toString(36).substring(7);
543
  const response = await fetch('/webrtc/offer', {
544
  method: 'POST',
 
550
  web_search_enabled: webSearchEnabled
551
  })
552
  });
 
553
  const serverResponse = await response.json();
554
  if (serverResponse.status === 'failed') {
555
  showError(serverResponse.meta.error === 'concurrency_limit_reached'
 
558
  stop();
559
  return;
560
  }
 
561
  await peerConnection.setRemoteDescription(serverResponse);
 
562
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
563
  eventSource.addEventListener("output", (event) => {
564
  const eventJson = JSON.parse(event.data);
565
  addMessage("assistant", eventJson.content);
566
  });
 
567
  eventSource.addEventListener("search", (event) => {
568
  const eventJson = JSON.parse(event.data);
569
  if (eventJson.query) {
570
  addMessage("search-result", `웹 검색 중: "${eventJson.query}"`);
571
  }
572
  });
 
 
 
 
 
 
573
  } catch (err) {
574
  clearTimeout(timeoutId);
575
  console.error('Error setting up WebRTC:', err);
 
577
  stop();
578
  }
579
  }
 
 
 
 
580
  function addMessage(role, content) {
 
 
 
 
 
 
 
 
 
581
  const messageDiv = document.createElement('div');
582
  messageDiv.classList.add('message', role);
583
  messageDiv.textContent = content;
584
  chatMessages.appendChild(messageDiv);
585
  chatMessages.scrollTop = chatMessages.scrollHeight;
586
  }
 
587
  function stop() {
 
 
 
 
 
 
588
  if (animationFrame) {
589
  cancelAnimationFrame(animationFrame);
590
  }
 
613
  updateButtonState();
614
  audioLevel = 0;
615
  }
 
616
  startButton.addEventListener('click', () => {
617
  console.log('clicked');
618
  console.log(peerConnection, peerConnection?.connectionState);
 
666
  })
667
  return results
668
  except Exception as e:
669
+ print(f"Brave Search error: {e}")
670
  return []
671
 
672
 
673
  # Initialize search client globally
674
  brave_api_key = os.getenv("BSEARCH_API")
675
  search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
676
+ print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
677
 
678
  # Store web search settings by connection
679
  web_search_settings = {}
 
699
  self.current_call_id = None
700
  self.webrtc_id = webrtc_id
701
  self.web_search_enabled = web_search_enabled
702
+ print(f"Handler created with web_search_enabled={web_search_enabled}, webrtc_id={webrtc_id}")
 
 
 
 
 
703
 
704
  def copy(self):
705
  # Get the most recent settings
 
712
  recent_id = recent_ids[0]
713
  settings = web_search_settings[recent_id]
714
  web_search_enabled = settings.get('enabled', False)
715
+ print(f"Handler.copy() using recent settings - webrtc_id={recent_id}, web_search_enabled={web_search_enabled}")
716
  return OpenAIHandler(web_search_enabled=web_search_enabled, webrtc_id=recent_id)
717
 
718
+ print(f"Handler.copy() called - creating new handler with default settings")
719
  return OpenAIHandler(web_search_enabled=False)
720
 
721
  async def search_web(self, query: str) -> str:
 
723
  if not self.search_client or not self.web_search_enabled:
724
  return "웹 검색이 비활성화되어 있습니다."
725
 
726
+ print(f"Searching web for: {query}")
727
  results = await self.search_client.search(query)
728
  if not results:
729
  return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."
 
739
 
740
  return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)
741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
  async def start_up(self):
743
  """Connect to realtime API with function calling enabled"""
744
  # First check if we have the most recent settings
 
751
  settings = web_search_settings[recent_id]
752
  self.web_search_enabled = settings.get('enabled', False)
753
  self.webrtc_id = recent_id
754
+ print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, web_search_enabled={self.web_search_enabled}")
755
 
756
+ print(f"Starting up handler with web_search_enabled={self.web_search_enabled}")
757
  self.client = openai.AsyncOpenAI()
 
758
 
759
  # Define the web search function
760
  tools = []
761
+ instructions = "You are a helpful assistant. Respond in Korean when the user speaks Korean."
 
 
 
 
 
 
 
 
762
 
763
  if self.web_search_enabled and self.search_client:
764
  tools = [{
 
778
  }
779
  }
780
  }]
781
+ print("Web search function added to tools")
782
 
783
  instructions = (
784
  "You are a helpful assistant with web search capabilities. "
785
+ "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
 
 
 
 
 
 
786
  "- Weather (날씨, 기온, 비, 눈)\n"
787
  "- News (뉴스, 소식)\n"
788
  "- Current events (현재, 최근, 오늘, 지금)\n"
 
790
  "- Sports scores or results\n"
791
  "- Any question about 2024 or 2025\n"
792
  "- Any time-sensitive information\n\n"
793
+ "When in doubt, USE web_search. It's better to search and provide accurate information "
794
+ "than to guess or use outdated information. Always respond in Korean when the user speaks Korean."
795
  )
796
 
797
+ async with self.client.beta.realtime.connect(
798
+ model="gpt-4o-mini-realtime-preview-2024-12-17"
799
+ ) as conn:
800
+ # Update session with tools
801
+ session_update = {
802
+ "turn_detection": {"type": "server_vad"},
803
+ "instructions": instructions,
804
+ "tools": tools,
805
+ "tool_choice": "auto" if tools else "none"
806
+ }
807
+
808
+ await conn.session.update(session=session_update)
809
+ self.connection = conn
810
+ print(f"Connected with tools: {len(tools)} functions")
811
+
812
+ async for event in self.connection:
813
+ # Debug logging for function calls
814
+ if event.type.startswith("response.function_call"):
815
+ print(f"Function event: {event.type}")
816
 
817
+ if event.type == "response.audio_transcript.done":
818
+ await self.output_queue.put(AdditionalOutputs(event))
 
 
819
 
820
+ elif event.type == "response.audio.delta":
821
+ await self.output_queue.put(
822
+ (
823
+ self.output_sample_rate,
824
+ np.frombuffer(
825
+ base64.b64decode(event.delta), dtype=np.int16
826
+ ).reshape(1, -1),
827
+ ),
828
+ )
829
 
830
+ # Handle function calls
831
+ elif event.type == "response.function_call_arguments.start":
832
+ print(f"Function call started")
833
+ self.function_call_in_progress = True
834
+ self.current_function_args = ""
835
+ self.current_call_id = getattr(event, 'call_id', None)
836
+
837
+ elif event.type == "response.function_call_arguments.delta":
838
+ if self.function_call_in_progress:
839
+ self.current_function_args += event.delta
840
+
841
+ elif event.type == "response.function_call_arguments.done":
842
+ if self.function_call_in_progress:
843
+ print(f"Function call done, args: {self.current_function_args}")
844
+ try:
845
+ args = json.loads(self.current_function_args)
846
+ query = args.get("query", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
847
 
848
+ # Emit search event to client
849
+ await self.output_queue.put(AdditionalOutputs({
850
+ "type": "search",
851
+ "query": query
852
+ }))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853
 
854
+ # Perform the search
855
+ search_results = await self.search_web(query)
856
+ print(f"Search results length: {len(search_results)}")
857
+
858
+ # Send function result back to the model
859
+ if self.connection and self.current_call_id:
860
+ await self.connection.conversation.item.create(
861
+ item={
862
+ "type": "function_call_output",
863
+ "call_id": self.current_call_id,
864
+ "output": search_results
865
+ }
866
+ )
867
+ await self.connection.response.create()
868
+
869
+ except Exception as e:
870
+ print(f"Function call error: {e}")
871
+ finally:
872
+ self.function_call_in_progress = False
873
+ self.current_function_args = ""
874
+ self.current_call_id = None
875
 
876
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
877
+ if not self.connection:
878
  return
879
  try:
 
880
  _, array = frame
881
  array = array.squeeze()
882
  audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
883
  await self.connection.input_audio_buffer.append(audio=audio_message)
884
  except Exception as e:
885
+ print(f"Error in receive: {e}")
886
+ # Connection might be closed, ignore the error
 
 
 
 
 
 
 
 
887
 
888
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
889
  return await wait_for_item(self.output_queue)
890
 
891
  async def shutdown(self) -> None:
 
 
 
 
 
 
 
 
 
 
892
  if self.connection:
893
  await self.connection.close()
894
  self.connection = None
 
900
  # Create components
901
  chatbot = gr.Chatbot(type="messages")
902
 
903
+ # Create stream with handler instance
904
  stream = Stream(
905
  handler, # Pass instance, not factory
906
  mode="send-receive",
 
910
  additional_outputs_handler=update_chatbot,
911
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
912
  concurrency_limit=5 if get_space() else None,
913
+ time_limit=300 if get_space() else None,
914
  )
915
 
916
  app = FastAPI()
 
927
  webrtc_id = body.get("webrtc_id")
928
  web_search_enabled = body.get("web_search_enabled", False)
929
 
930
+ print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}")
931
 
932
  # Store settings with timestamp
933
  if webrtc_id:
 
959
  async def output_stream():
960
  async for output in stream.output_stream(webrtc_id):
961
  if hasattr(output, 'args') and output.args:
962
+ # Check if it's a search event
963
+ if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
964
+ yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
 
 
 
 
965
  # Regular transcript event
966
  elif hasattr(output.args[0], 'transcript'):
967
  s = json.dumps({"role": "assistant", "content": output.args[0].transcript})