Private-AI

Running

App Files Files Community

seawolf2357 commited on 24 days ago

Commit

7880cc5

verified ·

1 Parent(s): bac099b

Update app.py

Browse files

Files changed (1) hide show

app.py +352 -50

app.py CHANGED Viewed

@@ -25,6 +25,39 @@ load_dotenv()
 SAMPLE_RATE = 24000
 # HTML content embedded as a string
 HTML_CONTENT = """<!DOCTYPE html>
 <html lang="ko">
@@ -87,14 +120,31 @@ HTML_CONTENT = """<!DOCTYPE html>
             font-size: 32px;
             letter-spacing: 1px;
         }
-        /* Web search toggle */
-        .search-toggle {
             display: flex;
             align-items: center;
-            justify-content: center;
             gap: 10px;
-            margin-top: 15px;
         }
         .toggle-switch {
             position: relative;
             width: 50px;
@@ -120,9 +170,43 @@ HTML_CONTENT = """<!DOCTYPE html>
         .toggle-switch.active .toggle-slider {
             transform: translateX(24px);
         }
-        .search-label {
             font-size: 14px;
-            color: #aaa;
         }
         .chat-container {
             border-radius: 12px;
@@ -192,6 +276,7 @@ HTML_CONTENT = """<!DOCTYPE html>
             margin-top: 20px;
             display: flex;
             justify-content: center;
             flex-shrink: 0;
         }
         button {
@@ -220,6 +305,14 @@ HTML_CONTENT = """<!DOCTYPE html>
         button:active {
             transform: translateY(1px);
         }
         #audio-output {
             display: none;
         }
@@ -344,6 +437,11 @@ HTML_CONTENT = """<!DOCTYPE html>
             background-color: var(--secondary-color);
             border-radius: 50%;
         }
     </style>
 </head>
@@ -359,22 +457,71 @@ HTML_CONTENT = """<!DOCTYPE html>
                 </div>
                 <h1>MOUSE 음성 챗</h1>
             </div>
-            <div class="search-toggle">
-                <span class="search-label">웹 검색</span>
-                <div id="search-toggle" class="toggle-switch">
-                    <div class="toggle-slider"></div>
-                </div>
-            </div>
             <div class="status-indicator">
                 <div id="status-dot" class="status-dot disconnected"></div>
                 <span id="status-text">연결 대기 중</span>
             </div>
         </div>
         <div class="chat-container">
             <div class="chat-messages" id="chat-messages"></div>
         </div>
         <div class="controls">
             <button id="start-button">대화 시작</button>
         </div>
     </div>
     <audio id="audio-output"></audio>
@@ -383,15 +530,22 @@ HTML_CONTENT = """<!DOCTYPE html>
         let peerConnection;
         let webrtc_id;
         let webSearchEnabled = false;
         const audioOutput = document.getElementById('audio-output');
         const startButton = document.getElementById('start-button');
         const chatMessages = document.getElementById('chat-messages');
         const statusDot = document.getElementById('status-dot');
         const statusText = document.getElementById('status-text');
         const searchToggle = document.getElementById('search-toggle');
         let audioLevel = 0;
         let animationFrame;
         let audioContext, analyser, audioSource;
         // Web search toggle functionality
         searchToggle.addEventListener('click', () => {
@@ -400,14 +554,50 @@ HTML_CONTENT = """<!DOCTYPE html>
             console.log('Web search enabled:', webSearchEnabled);
         });
         function updateStatus(state) {
             statusDot.className = 'status-dot ' + state;
             if (state === 'connected') {
                 statusText.textContent = '연결됨';
             } else if (state === 'connecting') {
                 statusText.textContent = '연결 중...';
             } else {
                 statusText.textContent = '연결 대기 중';
             }
         }
         function updateButtonState() {
@@ -508,13 +698,19 @@ HTML_CONTENT = """<!DOCTYPE html>
                         audioOutput.play();
                     }
                 });
-                const dataChannel = peerConnection.createDataChannel('text');
                 dataChannel.onmessage = (event) => {
                     const eventJson = JSON.parse(event.data);
                     if (eventJson.type === "error") {
                         showError(eventJson.message);
                     }
                 };
                 const offer = await peerConnection.createOffer();
                 await peerConnection.setLocalDescription(offer);
                 await new Promise((resolve) => {
@@ -547,7 +743,9 @@ HTML_CONTENT = """<!DOCTYPE html>
                         sdp: peerConnection.localDescription.sdp,
                         type: peerConnection.localDescription.type,
                         webrtc_id: webrtc_id,
-                        web_search_enabled: webSearchEnabled
                     })
                 });
                 const serverResponse = await response.json();
@@ -562,7 +760,11 @@ HTML_CONTENT = """<!DOCTYPE html>
                 const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
                 eventSource.addEventListener("output", (event) => {
                     const eventJson = JSON.parse(event.data);
-                    addMessage("assistant", eventJson.content);
                 });
                 eventSource.addEventListener("search", (event) => {
                     const eventJson = JSON.parse(event.data);
@@ -580,7 +782,11 @@ HTML_CONTENT = """<!DOCTYPE html>
         function addMessage(role, content) {
             const messageDiv = document.createElement('div');
             messageDiv.classList.add('message', role);
-            messageDiv.textContent = content;
             chatMessages.appendChild(messageDiv);
             chatMessages.scrollTop = chatMessages.scrollHeight;
         }
@@ -610,6 +816,7 @@ HTML_CONTENT = """<!DOCTYPE html>
                 console.log('closing');
                 peerConnection.close();
             }
             updateButtonState();
             audioLevel = 0;
         }
@@ -675,8 +882,8 @@ brave_api_key = os.getenv("BSEARCH_API")
 search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
 print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
-# Store web search settings by connection
-web_search_settings = {}
 def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
     chatbot.append({"role": "assistant", "content": response.transcript})
@@ -684,7 +891,8 @@ def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEve
 class OpenAIHandler(AsyncStreamHandler):
-    def __init__(self, web_search_enabled: bool = False, webrtc_id: str = None) -> None:
         super().__init__(
             expected_layout="mono",
             output_sample_rate=SAMPLE_RATE,
@@ -699,21 +907,27 @@ class OpenAIHandler(AsyncStreamHandler):
         self.current_call_id = None
         self.webrtc_id = webrtc_id
         self.web_search_enabled = web_search_enabled
-        print(f"Handler created with web_search_enabled={web_search_enabled}, webrtc_id={webrtc_id}")
     def copy(self):
         # Get the most recent settings
-        if web_search_settings:
             # Get the most recent webrtc_id
-            recent_ids = sorted(web_search_settings.keys(),
-                              key=lambda k: web_search_settings[k].get('timestamp', 0),
                               reverse=True)
             if recent_ids:
                 recent_id = recent_ids[0]
-                settings = web_search_settings[recent_id]
-                web_search_enabled = settings.get('enabled', False)
-                print(f"Handler.copy() using recent settings - webrtc_id={recent_id}, web_search_enabled={web_search_enabled}")
-                return OpenAIHandler(web_search_enabled=web_search_enabled, webrtc_id=recent_id)
         print(f"Handler.copy() called - creating new handler with default settings")
         return OpenAIHandler(web_search_enabled=False)
@@ -739,26 +953,57 @@ class OpenAIHandler(AsyncStreamHandler):
         return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)
     async def start_up(self):
         """Connect to realtime API with function calling enabled"""
         # First check if we have the most recent settings
-        if web_search_settings:
-            recent_ids = sorted(web_search_settings.keys(),
-                              key=lambda k: web_search_settings[k].get('timestamp', 0),
                               reverse=True)
             if recent_ids:
                 recent_id = recent_ids[0]
-                settings = web_search_settings[recent_id]
-                self.web_search_enabled = settings.get('enabled', False)
                 self.webrtc_id = recent_id
-                print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, web_search_enabled={self.web_search_enabled}")
-        print(f"Starting up handler with web_search_enabled={self.web_search_enabled}")
         self.client = openai.AsyncOpenAI()
         # Define the web search function
         tools = []
-        instructions = "You are a helpful assistant. Respond in Korean when the user speaks Korean."
         if self.web_search_enabled and self.search_client:
             tools = [{
@@ -771,7 +1016,7 @@ class OpenAIHandler(AsyncStreamHandler):
                         "properties": {
                             "query": {
                                 "type": "string",
-                                "description": "The search query in Korean or English"
                             }
                         },
                         "required": ["query"]
@@ -780,8 +1025,8 @@ class OpenAIHandler(AsyncStreamHandler):
             }]
             print("Web search function added to tools")
-            instructions = (
-                "You are a helpful assistant with web search capabilities. "
                 "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
                 "- Weather (날씨, 기온, 비, 눈)\n"
                 "- News (뉴스, 소식)\n"
@@ -791,8 +1036,11 @@ class OpenAIHandler(AsyncStreamHandler):
                 "- Any question about 2024 or 2025\n"
                 "- Any time-sensitive information\n\n"
                 "When in doubt, USE web_search. It's better to search and provide accurate information "
-                "than to guess or use outdated information. Always respond in Korean when the user speaks Korean."
             )
         async with self.client.beta.realtime.connect(
             model="gpt-4o-mini-realtime-preview-2024-12-17"
@@ -805,9 +1053,23 @@ class OpenAIHandler(AsyncStreamHandler):
                 "tool_choice": "auto" if tools else "none"
             }
             await conn.session.update(session=session_update)
             self.connection = conn
-            print(f"Connected with tools: {len(tools)} functions")
             async for event in self.connection:
                 # Debug logging for function calls
@@ -815,7 +1077,11 @@ class OpenAIHandler(AsyncStreamHandler):
                     print(f"Function event: {event.type}")
                 if event.type == "response.audio_transcript.done":
-                    await self.output_queue.put(AdditionalOutputs(event))
                 elif event.type == "response.audio.delta":
                     await self.output_queue.put(
@@ -886,7 +1152,14 @@ class OpenAIHandler(AsyncStreamHandler):
             # Connection might be closed, ignore the error
     async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
-        return await wait_for_item(self.output_queue)
     async def shutdown(self) -> None:
         if self.connection:
@@ -921,18 +1194,23 @@ stream.mount(app)
 # Intercept offer to capture settings
 @app.post("/webrtc/offer", include_in_schema=False)
 async def custom_offer(request: Request):
-    """Intercept offer to capture web search settings"""
     body = await request.json()
     webrtc_id = body.get("webrtc_id")
     web_search_enabled = body.get("web_search_enabled", False)
-    print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}")
     # Store settings with timestamp
     if webrtc_id:
-        web_search_settings[webrtc_id] = {
-            'enabled': web_search_enabled,
             'timestamp': asyncio.get_event_loop().time()
         }
@@ -953,6 +1231,24 @@ async def custom_offer(request: Request):
     return response
 @app.get("/outputs")
 async def outputs(webrtc_id: str):
     """Stream outputs including search events"""
@@ -962,10 +1258,16 @@ async def outputs(webrtc_id: str):
                 # Check if it's a search event
                 if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
                     yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
-                # Regular transcript event
-                elif hasattr(output.args[0], 'transcript'):
-                    s = json.dumps({"role": "assistant", "content": output.args[0].transcript})
-                    yield f"event: output\ndata: {s}\n\n"
     return StreamingResponse(output_stream(), media_type="text/event-stream")

 SAMPLE_RATE = 24000
+# Supported languages for OpenAI Realtime API
+SUPPORTED_LANGUAGES = {
+    "ko": "한국어 (Korean)",
+    "en": "English",
+    "es": "Español (Spanish)",
+    "fr": "Français (French)",
+    "de": "Deutsch (German)",
+    "it": "Italiano (Italian)",
+    "pt": "Português (Portuguese)",
+    "ru": "Русский (Russian)",
+    "ja": "日本語 (Japanese)",
+    "zh": "中文 (Chinese)",
+    "ar": "العربية (Arabic)",
+    "hi": "हिन्दी (Hindi)",
+    "nl": "Nederlands (Dutch)",
+    "pl": "Polski (Polish)",
+    "tr": "Türkçe (Turkish)",
+    "vi": "Tiếng Việt (Vietnamese)",
+    "th": "ไทย (Thai)",
+    "id": "Bahasa Indonesia",
+    "sv": "Svenska (Swedish)",
+    "da": "Dansk (Danish)",
+    "no": "Norsk (Norwegian)",
+    "fi": "Suomi (Finnish)",
+    "he": "עברית (Hebrew)",
+    "uk": "Українська (Ukrainian)",
+    "cs": "Čeština (Czech)",
+    "el": "Ελληνικά (Greek)",
+    "ro": "Română (Romanian)",
+    "hu": "Magyar (Hungarian)",
+    "ms": "Bahasa Melayu (Malay)"
+}
 # HTML content embedded as a string
 HTML_CONTENT = """<!DOCTYPE html>
 <html lang="ko">
             font-size: 32px;
             letter-spacing: 1px;
         }
+        /* Settings section */
+        .settings-section {
+            background-color: var(--card-bg);
+            border-radius: 12px;
+            padding: 20px;
+            margin-bottom: 20px;
+            border: 1px solid var(--border-color);
+        }
+        .settings-grid {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 15px;
+            margin-bottom: 15px;
+        }
+        .setting-item {
             display: flex;
             align-items: center;
             gap: 10px;
         }
+        .setting-label {
+            font-size: 14px;
+            color: #aaa;
+            min-width: 80px;
+        }
+        /* Toggle switch */
         .toggle-switch {
             position: relative;
             width: 50px;
         .toggle-switch.active .toggle-slider {
             transform: translateX(24px);
         }
+        /* Select dropdown */
+        select {
+            background-color: var(--card-bg);
+            color: var(--text-color);
+            border: 1px solid var(--border-color);
+            padding: 8px 12px;
+            border-radius: 6px;
             font-size: 14px;
+            cursor: pointer;
+            min-width: 150px;
+        }
+        select:focus {
+            outline: none;
+            border-color: var(--primary-color);
+        }
+        /* Text inputs */
+        .text-input-section {
+            margin-top: 15px;
+        }
+        input[type="text"], textarea {
+            width: 100%;
+            background-color: var(--dark-bg);
+            color: var(--text-color);
+            border: 1px solid var(--border-color);
+            padding: 10px;
+            border-radius: 6px;
+            font-size: 14px;
+            box-sizing: border-box;
+            margin-top: 5px;
+        }
+        input[type="text"]:focus, textarea:focus {
+            outline: none;
+            border-color: var(--primary-color);
+        }
+        textarea {
+            resize: vertical;
+            min-height: 80px;
         }
         .chat-container {
             border-radius: 12px;
             margin-top: 20px;
             display: flex;
             justify-content: center;
+            gap: 10px;
             flex-shrink: 0;
         }
         button {
         button:active {
             transform: translateY(1px);
         }
+        #send-button {
+            background: linear-gradient(135deg, #2ecc71, #27ae60);
+            padding: 10px 20px;
+            font-size: 14px;
+        }
+        #send-button:hover {
+            background: linear-gradient(135deg, #27ae60, #229954);
+        }
         #audio-output {
             display: none;
         }
             background-color: var(--secondary-color);
             border-radius: 50%;
         }
+        .language-info {
+            font-size: 12px;
+            color: #888;
+            margin-left: 5px;
+        }
     </style>
 </head>
                 </div>
                 <h1>MOUSE 음성 챗</h1>
             </div>
             <div class="status-indicator">
                 <div id="status-dot" class="status-dot disconnected"></div>
                 <span id="status-text">연결 대기 중</span>
             </div>
         </div>
+        <div class="settings-section">
+            <div class="settings-grid">
+                <div class="setting-item">
+                    <span class="setting-label">웹 검색</span>
+                    <div id="search-toggle" class="toggle-switch">
+                        <div class="toggle-slider"></div>
+                    </div>
+                </div>
+                <div class="setting-item">
+                    <span class="setting-label">자동 번역</span>
+                    <select id="language-select">
+                        <option value="">비활성화</option>
+                        <option value="ko">한국어 (Korean)</option>
+                        <option value="en">English</option>
+                        <option value="es">Español (Spanish)</option>
+                        <option value="fr">Français (French)</option>
+                        <option value="de">Deutsch (German)</option>
+                        <option value="it">Italiano (Italian)</option>
+                        <option value="pt">Português (Portuguese)</option>
+                        <option value="ru">Русский (Russian)</option>
+                        <option value="ja">日本語 (Japanese)</option>
+                        <option value="zh">中文 (Chinese)</option>
+                        <option value="ar">العربية (Arabic)</option>
+                        <option value="hi">हिन्दी (Hindi)</option>
+                        <option value="nl">Nederlands (Dutch)</option>
+                        <option value="pl">Polski (Polish)</option>
+                        <option value="tr">Türkçe (Turkish)</option>
+                        <option value="vi">Tiếng Việt (Vietnamese)</option>
+                        <option value="th">ไทย (Thai)</option>
+                        <option value="id">Bahasa Indonesia</option>
+                        <option value="sv">Svenska (Swedish)</option>
+                        <option value="da">Dansk (Danish)</option>
+                        <option value="no">Norsk (Norwegian)</option>
+                        <option value="fi">Suomi (Finnish)</option>
+                        <option value="he">עברית (Hebrew)</option>
+                        <option value="uk">Українська (Ukrainian)</option>
+                        <option value="cs">Čeština (Czech)</option>
+                        <option value="el">Ελληνικά (Greek)</option>
+                        <option value="ro">Română (Romanian)</option>
+                        <option value="hu">Magyar (Hungarian)</option>
+                        <option value="ms">Bahasa Melayu (Malay)</option>
+                    </select>
+                </div>
+            </div>
+            <div class="text-input-section">
+                <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
+                <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
+            </div>
+        </div>
         <div class="chat-container">
             <div class="chat-messages" id="chat-messages"></div>
+            <div class="text-input-section" style="margin-top: 10px;">
+                <input type="text" id="text-input" placeholder="텍스트 메시지를 입력하세요..." />
+            </div>
         </div>
         <div class="controls">
             <button id="start-button">대화 시작</button>
+            <button id="send-button" style="display: none;">전송</button>
         </div>
     </div>
     <audio id="audio-output"></audio>
         let peerConnection;
         let webrtc_id;
         let webSearchEnabled = false;
+        let selectedLanguage = "";
+        let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
         const audioOutput = document.getElementById('audio-output');
         const startButton = document.getElementById('start-button');
+        const sendButton = document.getElementById('send-button');
         const chatMessages = document.getElementById('chat-messages');
         const statusDot = document.getElementById('status-dot');
         const statusText = document.getElementById('status-text');
         const searchToggle = document.getElementById('search-toggle');
+        const languageSelect = document.getElementById('language-select');
+        const systemPromptInput = document.getElementById('system-prompt');
+        const textInput = document.getElementById('text-input');
         let audioLevel = 0;
         let animationFrame;
         let audioContext, analyser, audioSource;
+        let dataChannel = null;
         // Web search toggle functionality
         searchToggle.addEventListener('click', () => {
             console.log('Web search enabled:', webSearchEnabled);
         });
+        // Language selection
+        languageSelect.addEventListener('change', () => {
+            selectedLanguage = languageSelect.value;
+            console.log('Selected language:', selectedLanguage);
+        });
+        // System prompt update
+        systemPromptInput.addEventListener('input', () => {
+            systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
+        });
+        // Text input handling
+        textInput.addEventListener('keypress', (e) => {
+            if (e.key === 'Enter' && !e.shiftKey) {
+                e.preventDefault();
+                sendTextMessage();
+            }
+        });
+        sendButton.addEventListener('click', sendTextMessage);
+        function sendTextMessage() {
+            const message = textInput.value.trim();
+            if (!message || !dataChannel || dataChannel.readyState !== 'open') return;
+            addMessage('user', message);
+            dataChannel.send(JSON.stringify({
+                type: 'text_message',
+                content: message
+            }));
+            textInput.value = '';
+        }
         function updateStatus(state) {
             statusDot.className = 'status-dot ' + state;
             if (state === 'connected') {
                 statusText.textContent = '연결됨';
+                sendButton.style.display = 'block';
             } else if (state === 'connecting') {
                 statusText.textContent = '연결 중...';
+                sendButton.style.display = 'none';
             } else {
                 statusText.textContent = '연결 대기 중';
+                sendButton.style.display = 'none';
             }
         }
         function updateButtonState() {
                         audioOutput.play();
                     }
                 });
+                // Create data channel for text messages
+                dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onopen = () => {
+                    console.log('Data channel opened');
+                };
                 dataChannel.onmessage = (event) => {
                     const eventJson = JSON.parse(event.data);
                     if (eventJson.type === "error") {
                         showError(eventJson.message);
                     }
                 };
                 const offer = await peerConnection.createOffer();
                 await peerConnection.setLocalDescription(offer);
                 await new Promise((resolve) => {
                         sdp: peerConnection.localDescription.sdp,
                         type: peerConnection.localDescription.type,
                         webrtc_id: webrtc_id,
+                        web_search_enabled: webSearchEnabled,
+                        target_language: selectedLanguage,
+                        system_prompt: systemPrompt
                     })
                 });
                 const serverResponse = await response.json();
                 const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
                 eventSource.addEventListener("output", (event) => {
                     const eventJson = JSON.parse(event.data);
+                    let content = eventJson.content;
+                    if (selectedLanguage && eventJson.language) {
+                        content += ` <span class="language-info">[${eventJson.language}]</span>`;
+                    }
+                    addMessage("assistant", content);
                 });
                 eventSource.addEventListener("search", (event) => {
                     const eventJson = JSON.parse(event.data);
         function addMessage(role, content) {
             const messageDiv = document.createElement('div');
             messageDiv.classList.add('message', role);
+            if (content.includes('<span')) {
+                messageDiv.innerHTML = content;
+            } else {
+                messageDiv.textContent = content;
+            }
             chatMessages.appendChild(messageDiv);
             chatMessages.scrollTop = chatMessages.scrollHeight;
         }
                 console.log('closing');
                 peerConnection.close();
             }
+            dataChannel = null;
             updateButtonState();
             audioLevel = 0;
         }
 search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
 print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
+# Store connection settings
+connection_settings = {}
 def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
     chatbot.append({"role": "assistant", "content": response.transcript})
 class OpenAIHandler(AsyncStreamHandler):
+    def __init__(self, web_search_enabled: bool = False, target_language: str = "",
+                 system_prompt: str = "", webrtc_id: str = None) -> None:
         super().__init__(
             expected_layout="mono",
             output_sample_rate=SAMPLE_RATE,
         self.current_call_id = None
         self.webrtc_id = webrtc_id
         self.web_search_enabled = web_search_enabled
+        self.target_language = target_language
+        self.system_prompt = system_prompt
+        print(f"Handler created with web_search_enabled={web_search_enabled}, "
+              f"target_language={target_language}, webrtc_id={webrtc_id}")
     def copy(self):
         # Get the most recent settings
+        if connection_settings:
             # Get the most recent webrtc_id
+            recent_ids = sorted(connection_settings.keys(),
+                              key=lambda k: connection_settings[k].get('timestamp', 0),
                               reverse=True)
             if recent_ids:
                 recent_id = recent_ids[0]
+                settings = connection_settings[recent_id]
+                return OpenAIHandler(
+                    web_search_enabled=settings.get('web_search_enabled', False),
+                    target_language=settings.get('target_language', ''),
+                    system_prompt=settings.get('system_prompt', ''),
+                    webrtc_id=recent_id
+                )
         print(f"Handler.copy() called - creating new handler with default settings")
         return OpenAIHandler(web_search_enabled=False)
         return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)
+    async def process_text_message(self, message: str):
+        """Process text message from user"""
+        if self.connection:
+            await self.connection.conversation.item.create(
+                item={
+                    "type": "message",
+                    "role": "user",
+                    "content": [{"type": "input_text", "text": message}]
+                }
+            )
+            await self.connection.response.create()
+    def get_translation_instructions(self):
+        """Get instructions for translation based on target language"""
+        if not self.target_language:
+            return ""
+        language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
+        return (
+            f"\n\nIMPORTANT: You must respond in {language_name} ({self.target_language}). "
+            f"Translate all your responses to {language_name}. "
+            f"This includes both spoken and written responses."
+        )
     async def start_up(self):
         """Connect to realtime API with function calling enabled"""
         # First check if we have the most recent settings
+        if connection_settings:
+            recent_ids = sorted(connection_settings.keys(),
+                              key=lambda k: connection_settings[k].get('timestamp', 0),
                               reverse=True)
             if recent_ids:
                 recent_id = recent_ids[0]
+                settings = connection_settings[recent_id]
+                self.web_search_enabled = settings.get('web_search_enabled', False)
+                self.target_language = settings.get('target_language', '')
+                self.system_prompt = settings.get('system_prompt', '')
                 self.webrtc_id = recent_id
+                print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
+                      f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}")
+        print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
+              f"target_language={self.target_language}")
         self.client = openai.AsyncOpenAI()
         # Define the web search function
         tools = []
+        base_instructions = self.system_prompt or "You are a helpful assistant."
+        # Add translation instructions if language is selected
+        translation_instructions = self.get_translation_instructions()
         if self.web_search_enabled and self.search_client:
             tools = [{
                         "properties": {
                             "query": {
                                 "type": "string",
+                                "description": "The search query"
                             }
                         },
                         "required": ["query"]
             }]
             print("Web search function added to tools")
+            search_instructions = (
+                "\n\nYou have web search capabilities. "
                 "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
                 "- Weather (날씨, 기온, 비, 눈)\n"
                 "- News (뉴스, 소식)\n"
                 "- Any question about 2024 or 2025\n"
                 "- Any time-sensitive information\n\n"
                 "When in doubt, USE web_search. It's better to search and provide accurate information "
+                "than to guess or use outdated information."
             )
+            instructions = base_instructions + search_instructions + translation_instructions
+        else:
+            instructions = base_instructions + translation_instructions
         async with self.client.beta.realtime.connect(
             model="gpt-4o-mini-realtime-preview-2024-12-17"
                 "tool_choice": "auto" if tools else "none"
             }
+            # Add voice setting if target language is selected
+            if self.target_language:
+                # Map languages to appropriate voices
+                voice_map = {
+                    "en": "alloy",
+                    "es": "nova",
+                    "fr": "nova",
+                    "de": "nova",
+                    "ja": "nova",
+                    "zh": "nova",
+                    # Default to alloy for other languages
+                }
+                session_update["voice"] = voice_map.get(self.target_language, "alloy")
             await conn.session.update(session=session_update)
             self.connection = conn
+            print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}")
             async for event in self.connection:
                 # Debug logging for function calls
                     print(f"Function event: {event.type}")
                 if event.type == "response.audio_transcript.done":
+                    output_data = {
+                        "event": event,
+                        "language": SUPPORTED_LANGUAGES.get(self.target_language, "") if self.target_language else ""
+                    }
+                    await self.output_queue.put(AdditionalOutputs(output_data))
                 elif event.type == "response.audio.delta":
                     await self.output_queue.put(
             # Connection might be closed, ignore the error
     async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
+        item = await wait_for_item(self.output_queue)
+        # Check if it's a dict with text message
+        if isinstance(item, dict) and item.get('type') == 'text_message':
+            await self.process_text_message(item['content'])
+            return None
+        return item
     async def shutdown(self) -> None:
         if self.connection:
 # Intercept offer to capture settings
 @app.post("/webrtc/offer", include_in_schema=False)
 async def custom_offer(request: Request):
+    """Intercept offer to capture settings"""
     body = await request.json()
     webrtc_id = body.get("webrtc_id")
     web_search_enabled = body.get("web_search_enabled", False)
+    target_language = body.get("target_language", "")
+    system_prompt = body.get("system_prompt", "")
+    print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
+          f"target_language: {target_language}")
     # Store settings with timestamp
     if webrtc_id:
+        connection_settings[webrtc_id] = {
+            'web_search_enabled': web_search_enabled,
+            'target_language': target_language,
+            'system_prompt': system_prompt,
             'timestamp': asyncio.get_event_loop().time()
         }
     return response
+@app.post("/text_message/{webrtc_id}")
+async def receive_text_message(webrtc_id: str, request: Request):
+    """Receive text message from client"""
+    body = await request.json()
+    message = body.get("content", "")
+    # Find the handler for this connection
+    if webrtc_id in stream.handlers:
+        handler = stream.handlers[webrtc_id]
+        # Queue the text message for processing
+        await handler.output_queue.put({
+            'type': 'text_message',
+            'content': message
+        })
+    return {"status": "ok"}
 @app.get("/outputs")
 async def outputs(webrtc_id: str):
     """Stream outputs including search events"""
                 # Check if it's a search event
                 if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
                     yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
+                # Regular transcript event with language info
+                elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
+                    event = output.args[0]['event']
+                    if hasattr(event, 'transcript'):
+                        data = {
+                            "role": "assistant",
+                            "content": event.transcript,
+                            "language": output.args[0].get('language', '')
+                        }
+                        yield f"event: output\ndata: {json.dumps(data)}\n\n"
     return StreamingResponse(output_stream(), media_type="text/event-stream")