Private-AI

Running

App Files Files Community

seawolf2357 commited on Jun 12

Commit

e9bd082

verified ·

1 Parent(s): 0d8a2ef

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -13

app.py CHANGED Viewed

@@ -546,7 +546,12 @@ HTML_CONTENT = """<!DOCTYPE html>
             try {
                 const stream = await navigator.mediaDevices.getUserMedia({
-                    audio: true
                 });
                 setupAudioVisualization(stream);
                 stream.getTracks().forEach(track => {
@@ -556,7 +561,11 @@ HTML_CONTENT = """<!DOCTYPE html>
                 peerConnection.addEventListener('track', (evt) => {
                     if (audioOutput.srcObject !== evt.streams[0]) {
                         audioOutput.srcObject = evt.streams[0];
-                        audioOutput.play();
                     }
                 });
@@ -677,7 +686,19 @@ HTML_CONTENT = """<!DOCTYPE html>
             }
         }
         function addMessage(role, content) {
             const messageDiv = document.createElement('div');
             messageDiv.classList.add('message', role);
             messageDiv.textContent = content;
@@ -810,6 +831,8 @@ class OpenAIHandler(AsyncStreamHandler):
         self.keep_alive_task = None
         self.last_activity = datetime.now()
         self.connection_active = True
         logger.info(f"Handler created with web_search_enabled={web_search_enabled}, webrtc_id={webrtc_id}")
     def copy(self):
@@ -895,7 +918,15 @@ class OpenAIHandler(AsyncStreamHandler):
         # Define the web search function
         tools = []
-        instructions = "You are a helpful assistant. Respond in Korean when the user speaks Korean."
         if self.web_search_enabled and self.search_client:
             tools = [{
@@ -919,7 +950,13 @@ class OpenAIHandler(AsyncStreamHandler):
             instructions = (
                 "You are a helpful assistant with web search capabilities. "
-                "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
                 "- Weather (날씨, 기온, 비, 눈)\n"
                 "- News (뉴스, 소식)\n"
                 "- Current events (현재, 최근, 오늘, 지금)\n"
@@ -927,20 +964,27 @@ class OpenAIHandler(AsyncStreamHandler):
                 "- Sports scores or results\n"
                 "- Any question about 2024 or 2025\n"
                 "- Any time-sensitive information\n\n"
-                "When in doubt, USE web_search. It's better to search and provide accurate information "
-                "than to guess or use outdated information. Always respond in Korean when the user speaks Korean."
             )
         try:
             async with self.client.beta.realtime.connect(
                 model="gpt-4o-mini-realtime-preview-2024-12-17"
             ) as conn:
-                # Update session with tools
                 session_update = {
-                    "turn_detection": {"type": "server_vad"},
                     "instructions": instructions,
                     "tools": tools,
-                    "tool_choice": "auto" if tools else "none"
                 }
                 await conn.session.update(session=session_update)
@@ -954,14 +998,34 @@ class OpenAIHandler(AsyncStreamHandler):
                 async for event in self.connection:
                     self.last_activity = datetime.now()
-                    # Debug logging for function calls
-                    if event.type.startswith("response.function_call"):
-                        logger.debug(f"Function event: {event.type}")
                     if event.type == "response.audio_transcript.done":
                         await self.output_queue.put(AdditionalOutputs(event))
                     elif event.type == "response.audio.delta":
                         await self.output_queue.put(
                             (
                                 self.output_sample_rate,
@@ -1008,7 +1072,7 @@ class OpenAIHandler(AsyncStreamHandler):
                                             "output": search_results
                                         }
                                     )
-                                    await self.connection.response.create()
                             except Exception as e:
                                 logger.error(f"Function call error: {e}")

             try {
                 const stream = await navigator.mediaDevices.getUserMedia({
+                    audio: {
+                        echoCancellation: true,  // 에코 제거
+                        noiseSuppression: true,  // 노이즈 제거
+                        autoGainControl: true,   // 자동 게인 제어
+                        sampleRate: 24000
+                    }
                 });
                 setupAudioVisualization(stream);
                 stream.getTracks().forEach(track => {
                 peerConnection.addEventListener('track', (evt) => {
                     if (audioOutput.srcObject !== evt.streams[0]) {
                         audioOutput.srcObject = evt.streams[0];
+                        // 자동 재생 시 볼륨 조절로 피드백 방지
+                        audioOutput.volume = 0.8;
+                        audioOutput.play().catch(e => {
+                            console.error('Audio play error:', e);
+                        });
                     }
                 });
             }
         }
+        let lastMessageContent = '';
+        let lastMessageTime = 0;
         function addMessage(role, content) {
+            // 중복 메시지 방지
+            const now = Date.now();
+            if (content === lastMessageContent && (now - lastMessageTime) < 1000) {
+                return;  // 1초 이내 같은 메시지 무시
+            }
+            lastMessageContent = content;
+            lastMessageTime = now;
             const messageDiv = document.createElement('div');
             messageDiv.classList.add('message', role);
             messageDiv.textContent = content;
         self.keep_alive_task = None
         self.last_activity = datetime.now()
         self.connection_active = True
+        self.response_in_progress = False  # 응답 중복 방지
+        self.last_response_time = datetime.now()  # 마지막 응답 시간
         logger.info(f"Handler created with web_search_enabled={web_search_enabled}, webrtc_id={webrtc_id}")
     def copy(self):
         # Define the web search function
         tools = []
+        instructions = (
+            "You are a helpful assistant. Respond in Korean when the user speaks Korean. "
+            "IMPORTANT RULES:\n"
+            "1. Wait for the user to finish speaking before responding\n"
+            "2. Keep responses concise and to the point\n"
+            "3. Do not continue speaking if the user interrupts\n"
+            "4. Stop immediately when you finish answering the question\n"
+            "5. Do not add unnecessary elaboration or follow-up questions unless asked"
+        )
         if self.web_search_enabled and self.search_client:
             tools = [{
             instructions = (
                 "You are a helpful assistant with web search capabilities. "
+                "IMPORTANT RULES:\n"
+                "1. Wait for the user to finish speaking before responding\n"
+                "2. Keep responses concise and to the point\n"
+                "3. Do not continue speaking if the user interrupts\n"
+                "4. Stop immediately when you finish answering the question\n"
+                "5. Do not add unnecessary elaboration or follow-up questions unless asked\n\n"
+                "WEB SEARCH RULES: You MUST use the web_search function for ANY of these topics:\n"
                 "- Weather (날씨, 기온, 비, 눈)\n"
                 "- News (뉴스, 소식)\n"
                 "- Current events (현재, 최근, 오늘, 지금)\n"
                 "- Sports scores or results\n"
                 "- Any question about 2024 or 2025\n"
                 "- Any time-sensitive information\n\n"
+                "When in doubt, USE web_search for accuracy. Always respond in Korean when the user speaks Korean."
             )
         try:
             async with self.client.beta.realtime.connect(
                 model="gpt-4o-mini-realtime-preview-2024-12-17"
             ) as conn:
+                # Update session with tools - VAD 설정 강화
                 session_update = {
+                    "turn_detection": {
+                        "type": "server_vad",
+                        "threshold": 0.5,  # 민감도 조정
+                        "prefix_padding_ms": 300,  # 음성 시작 전 패딩
+                        "silence_duration_ms": 500  # 침묵 감지 시간 증가
+                    },
                     "instructions": instructions,
                     "tools": tools,
+                    "tool_choice": "auto" if tools else "none",
+                    "voice": "echo",  # 음성 설정
+                    "temperature": 0.7,  # 응답 다양성 감소
+                    "max_response_output_tokens": "inf"  # 무한 응답 방지
                 }
                 await conn.session.update(session=session_update)
                 async for event in self.connection:
                     self.last_activity = datetime.now()
+                    # Debug logging
+                    if event.type in ["response.audio_transcript.done", "response.done", "response.created", "input_audio_buffer.speech_started", "input_audio_buffer.speech_stopped"]:
+                        logger.info(f"Event: {event.type}")
+                    # 사용자 음성 감지
+                    if event.type == "input_audio_buffer.speech_started":
+                        logger.info("User started speaking")
+                        self.response_in_progress = False  # 사용자가 말하면 응답 중단
+                    # 응답 시작/종료 추적
+                    if event.type == "response.created":
+                        self.response_in_progress = True
+                        self.last_response_time = datetime.now()
+                        logger.info("Response started")
+                    if event.type == "response.done":
+                        self.response_in_progress = False
+                        logger.info("Response completed")
                     if event.type == "response.audio_transcript.done":
                         await self.output_queue.put(AdditionalOutputs(event))
                     elif event.type == "response.audio.delta":
+                        # 응답 중복 방지 - 너무 빠른 연속 응답 차단
+                        time_since_last = (datetime.now() - self.last_response_time).total_seconds()
+                        if time_since_last < 0.1:  # 100ms 이내 응답 무시
+                            continue
                         await self.output_queue.put(
                             (
                                 self.output_sample_rate,
                                             "output": search_results
                                         }
                                     )
+                                    # response.create() 제거 - 자동으로 응답 생성됨
                             except Exception as e:
                                 logger.error(f"Function call error: {e}")