Private-AI

Running

App Files Files Community

seawolf2357 commited on 23 days ago

Commit

f324e82

verified ·

1 Parent(s): c88aa2b

Update app.py

Browse files

Files changed (1) hide show

app.py +195 -50

app.py CHANGED Viewed

@@ -134,6 +134,20 @@ HTML_CONTENT = """<!DOCTYPE html>
             gap: 15px;
             margin-bottom: 15px;
         }
         .setting-item {
             display: flex;
             align-items: center;
@@ -507,6 +521,52 @@ HTML_CONTENT = """<!DOCTYPE html>
                     </select>
                 </div>
             </div>
             <div class="text-input-section">
                 <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
                 <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
@@ -531,6 +591,8 @@ HTML_CONTENT = """<!DOCTYPE html>
         let webrtc_id;
         let webSearchEnabled = false;
         let selectedLanguage = "";
         let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
         const audioOutput = document.getElementById('audio-output');
         const startButton = document.getElementById('start-button');
@@ -540,6 +602,10 @@ HTML_CONTENT = """<!DOCTYPE html>
         const statusText = document.getElementById('status-text');
         const searchToggle = document.getElementById('search-toggle');
         const languageSelect = document.getElementById('language-select');
         const systemPromptInput = document.getElementById('system-prompt');
         const textInput = document.getElementById('text-input');
         let audioLevel = 0;
@@ -561,6 +627,37 @@ HTML_CONTENT = """<!DOCTYPE html>
             console.log('Selected language:', selectedLanguage);
         });
         // System prompt update
         systemPromptInput.addEventListener('input', () => {
             systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
@@ -580,6 +677,12 @@ HTML_CONTENT = """<!DOCTYPE html>
             const message = textInput.value.trim();
             if (!message) return;
             // Add user message to chat
             addMessage('user', message);
             textInput.value = '';
@@ -789,7 +892,9 @@ HTML_CONTENT = """<!DOCTYPE html>
                         webrtc_id: webrtc_id,
                         web_search_enabled: webSearchEnabled,
                         target_language: selectedLanguage,
-                        system_prompt: systemPrompt
                     })
                 });
                 const serverResponse = await response.json();
@@ -1005,7 +1110,8 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
 class OpenAIHandler(AsyncStreamHandler):
     def __init__(self, web_search_enabled: bool = False, target_language: str = "",
-                 system_prompt: str = "", webrtc_id: str = None) -> None:
         super().__init__(
             expected_layout="mono",
             output_sample_rate=SAMPLE_RATE,
@@ -1022,8 +1128,11 @@ class OpenAIHandler(AsyncStreamHandler):
         self.web_search_enabled = web_search_enabled
         self.target_language = target_language
         self.system_prompt = system_prompt
         print(f"Handler created with web_search_enabled={web_search_enabled}, "
-              f"target_language={target_language}, webrtc_id={webrtc_id}")
     def copy(self):
         # Get the most recent settings
@@ -1039,7 +1148,9 @@ class OpenAIHandler(AsyncStreamHandler):
                     web_search_enabled=settings.get('web_search_enabled', False),
                     target_language=settings.get('target_language', ''),
                     system_prompt=settings.get('system_prompt', ''),
-                    webrtc_id=recent_id
                 )
         print(f"Handler.copy() called - creating new handler with default settings")
@@ -1078,9 +1189,23 @@ class OpenAIHandler(AsyncStreamHandler):
             )
             await self.connection.response.create()
     def get_translation_instructions(self):
         """Get instructions for translation based on target language"""
-        if not self.target_language:
             return ""
         language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
@@ -1103,57 +1228,68 @@ class OpenAIHandler(AsyncStreamHandler):
                 self.web_search_enabled = settings.get('web_search_enabled', False)
                 self.target_language = settings.get('target_language', '')
                 self.system_prompt = settings.get('system_prompt', '')
                 self.webrtc_id = recent_id
                 print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
-                      f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}")
         print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
-              f"target_language={self.target_language}")
         self.client = openai.AsyncOpenAI()
         # Define the web search function
         tools = []
         base_instructions = self.system_prompt or "You are a helpful assistant."
-        # Add translation instructions if language is selected
-        translation_instructions = self.get_translation_instructions()
-        if self.web_search_enabled and self.search_client:
-            tools = [{
-                "type": "function",
-                "function": {
-                    "name": "web_search",
-                    "description": "Search the web for current information. Use this for weather, news, prices, current events, or any time-sensitive topics.",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "query": {
-                                "type": "string",
-                                "description": "The search query"
-                            }
-                        },
-                        "required": ["query"]
-                    }
-                }
-            }]
-            print("Web search function added to tools")
-            search_instructions = (
-                "\n\nYou have web search capabilities. "
-                "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
-                "- Weather (날씨, 기온, 비, 눈)\n"
-                "- News (뉴스, 소식)\n"
-                "- Current events (현재, 최근, 오늘, 지금)\n"
-                "- Prices (가격, 환율, 주가)\n"
-                "- Sports scores or results\n"
-                "- Any question about 2024 or 2025\n"
-                "- Any time-sensitive information\n\n"
-                "When in doubt, USE web_search. It's better to search and provide accurate information "
-                "than to guess or use outdated information."
-            )
-            instructions = base_instructions + search_instructions + translation_instructions
         else:
-            instructions = base_instructions + translation_instructions
         async with self.client.beta.realtime.connect(
             model="gpt-4o-mini-realtime-preview-2024-12-17"
@@ -1166,8 +1302,9 @@ class OpenAIHandler(AsyncStreamHandler):
                 "tool_choice": "auto" if tools else "none"
             }
-            # Add voice setting if target language is selected
-            if self.target_language:
                 # Map languages to appropriate voices
                 voice_map = {
                     "en": "alloy",
@@ -1178,7 +1315,7 @@ class OpenAIHandler(AsyncStreamHandler):
                     "zh": "nova",
                     # Default to alloy for other languages
                 }
-                session_update["voice"] = voice_map.get(self.target_language, "alloy")
             await conn.session.update(session=session_update)
             self.connection = conn
@@ -1192,7 +1329,10 @@ class OpenAIHandler(AsyncStreamHandler):
                 if event.type == "response.audio_transcript.done":
                     output_data = {
                         "event": event,
-                        "language": SUPPORTED_LANGUAGES.get(self.target_language, "") if self.target_language else ""
                     }
                     await self.output_queue.put(AdditionalOutputs(output_data))
@@ -1314,9 +1454,12 @@ async def custom_offer(request: Request):
     web_search_enabled = body.get("web_search_enabled", False)
     target_language = body.get("target_language", "")
     system_prompt = body.get("system_prompt", "")
     print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
-          f"target_language: {target_language}")
     # Store settings with timestamp
     if webrtc_id:
@@ -1324,6 +1467,8 @@ async def custom_offer(request: Request):
             'web_search_enabled': web_search_enabled,
             'target_language': target_language,
             'system_prompt': system_prompt,
             'timestamp': asyncio.get_event_loop().time()
         }

             gap: 15px;
             margin-bottom: 15px;
         }
+        .interpretation-section {
+            display: flex;
+            align-items: center;
+            gap: 15px;
+            padding: 15px;
+            background-color: var(--dark-bg);
+            border-radius: 8px;
+            margin-top: 15px;
+        }
+        .interpretation-info {
+            font-size: 13px;
+            color: #999;
+            margin-top: 5px;
+        }
         .setting-item {
             display: flex;
             align-items: center;
                     </select>
                 </div>
             </div>
+            <div class="interpretation-section">
+                <div class="setting-item">
+                    <span class="setting-label">자동 통역</span>
+                    <div id="interpretation-toggle" class="toggle-switch">
+                        <div class="toggle-slider"></div>
+                    </div>
+                </div>
+                <div class="setting-item" id="interpretation-language-container" style="display: none;">
+                    <span class="setting-label">통역 언어</span>
+                    <select id="interpretation-language-select">
+                        <option value="">언어 선택</option>
+                        <option value="ko">한국어 (Korean)</option>
+                        <option value="en">English</option>
+                        <option value="es">Español (Spanish)</option>
+                        <option value="fr">Français (French)</option>
+                        <option value="de">Deutsch (German)</option>
+                        <option value="it">Italiano (Italian)</option>
+                        <option value="pt">Português (Portuguese)</option>
+                        <option value="ru">Русский (Russian)</option>
+                        <option value="ja">日本語 (Japanese)</option>
+                        <option value="zh">中文 (Chinese)</option>
+                        <option value="ar">العربية (Arabic)</option>
+                        <option value="hi">हिन्दी (Hindi)</option>
+                        <option value="nl">Nederlands (Dutch)</option>
+                        <option value="pl">Polski (Polish)</option>
+                        <option value="tr">Türkçe (Turkish)</option>
+                        <option value="vi">Tiếng Việt (Vietnamese)</option>
+                        <option value="th">ไทย (Thai)</option>
+                        <option value="id">Bahasa Indonesia</option>
+                        <option value="sv">Svenska (Swedish)</option>
+                        <option value="da">Dansk (Danish)</option>
+                        <option value="no">Norsk (Norwegian)</option>
+                        <option value="fi">Suomi (Finnish)</option>
+                        <option value="he">עברית (Hebrew)</option>
+                        <option value="uk">Українська (Ukrainian)</option>
+                        <option value="cs">Čeština (Czech)</option>
+                        <option value="el">Ελληνικά (Greek)</option>
+                        <option value="ro">Română (Romanian)</option>
+                        <option value="hu">Magyar (Hungarian)</option>
+                        <option value="ms">Bahasa Melayu (Malay)</option>
+                    </select>
+                </div>
+            </div>
+            <div class="interpretation-info" id="interpretation-info" style="display: none;">
+                통역 모드: 입력한 음성이 선택한 언어로 자동 통역됩니다.
+            </div>
             <div class="text-input-section">
                 <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
                 <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
         let webrtc_id;
         let webSearchEnabled = false;
         let selectedLanguage = "";
+        let interpretationMode = false;
+        let interpretationLanguage = "";
         let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
         const audioOutput = document.getElementById('audio-output');
         const startButton = document.getElementById('start-button');
         const statusText = document.getElementById('status-text');
         const searchToggle = document.getElementById('search-toggle');
         const languageSelect = document.getElementById('language-select');
+        const interpretationToggle = document.getElementById('interpretation-toggle');
+        const interpretationLanguageSelect = document.getElementById('interpretation-language-select');
+        const interpretationLanguageContainer = document.getElementById('interpretation-language-container');
+        const interpretationInfo = document.getElementById('interpretation-info');
         const systemPromptInput = document.getElementById('system-prompt');
         const textInput = document.getElementById('text-input');
         let audioLevel = 0;
             console.log('Selected language:', selectedLanguage);
         });
+        // Interpretation mode toggle
+        interpretationToggle.addEventListener('click', () => {
+            interpretationMode = !interpretationMode;
+            interpretationToggle.classList.toggle('active', interpretationMode);
+            interpretationLanguageContainer.style.display = interpretationMode ? 'flex' : 'none';
+            interpretationInfo.style.display = interpretationMode ? 'block' : 'none';
+            // Disable translation mode when interpretation is enabled
+            if (interpretationMode) {
+                languageSelect.value = '';
+                selectedLanguage = '';
+                languageSelect.disabled = true;
+                searchToggle.classList.remove('active');
+                webSearchEnabled = false;
+                searchToggle.style.opacity = '0.5';
+                searchToggle.style.pointerEvents = 'none';
+            } else {
+                languageSelect.disabled = false;
+                searchToggle.style.opacity = '1';
+                searchToggle.style.pointerEvents = 'auto';
+            }
+            console.log('Interpretation mode:', interpretationMode);
+        });
+        // Interpretation language selection
+        interpretationLanguageSelect.addEventListener('change', () => {
+            interpretationLanguage = interpretationLanguageSelect.value;
+            console.log('Interpretation language:', interpretationLanguage);
+        });
         // System prompt update
         systemPromptInput.addEventListener('input', () => {
             systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
             const message = textInput.value.trim();
             if (!message) return;
+            // Don't allow text messages in interpretation mode
+            if (interpretationMode) {
+                showError('통역 모드에서는 텍스트 입력이 지원되지 않습니다.');
+                return;
+            }
             // Add user message to chat
             addMessage('user', message);
             textInput.value = '';
                         webrtc_id: webrtc_id,
                         web_search_enabled: webSearchEnabled,
                         target_language: selectedLanguage,
+                        system_prompt: systemPrompt,
+                        interpretation_mode: interpretationMode,
+                        interpretation_language: interpretationLanguage
                     })
                 });
                 const serverResponse = await response.json();
 class OpenAIHandler(AsyncStreamHandler):
     def __init__(self, web_search_enabled: bool = False, target_language: str = "",
+                 system_prompt: str = "", webrtc_id: str = None,
+                 interpretation_mode: bool = False, interpretation_language: str = "") -> None:
         super().__init__(
             expected_layout="mono",
             output_sample_rate=SAMPLE_RATE,
         self.web_search_enabled = web_search_enabled
         self.target_language = target_language
         self.system_prompt = system_prompt
+        self.interpretation_mode = interpretation_mode
+        self.interpretation_language = interpretation_language
         print(f"Handler created with web_search_enabled={web_search_enabled}, "
+              f"target_language={target_language}, webrtc_id={webrtc_id}, "
+              f"interpretation_mode={interpretation_mode}, interpretation_language={interpretation_language}")
     def copy(self):
         # Get the most recent settings
                     web_search_enabled=settings.get('web_search_enabled', False),
                     target_language=settings.get('target_language', ''),
                     system_prompt=settings.get('system_prompt', ''),
+                    webrtc_id=recent_id,
+                    interpretation_mode=settings.get('interpretation_mode', False),
+                    interpretation_language=settings.get('interpretation_language', '')
                 )
         print(f"Handler.copy() called - creating new handler with default settings")
             )
             await self.connection.response.create()
+    def get_interpretation_instructions(self):
+        """Get instructions for interpretation mode"""
+        if not self.interpretation_mode or not self.interpretation_language:
+            return ""
+        target_language_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
+        return (
+            f"\n\nIMPORTANT: You are now in INTERPRETATION MODE. "
+            f"You must ONLY translate what the user says into {target_language_name} ({self.interpretation_language}). "
+            f"DO NOT generate any responses, opinions, or additional content. "
+            f"Your ONLY task is to translate the user's speech accurately into {target_language_name}. "
+            f"You are a professional interpreter - simply translate what is said, nothing more."
+        )
     def get_translation_instructions(self):
         """Get instructions for translation based on target language"""
+        if not self.target_language or self.interpretation_mode:
             return ""
         language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
                 self.web_search_enabled = settings.get('web_search_enabled', False)
                 self.target_language = settings.get('target_language', '')
                 self.system_prompt = settings.get('system_prompt', '')
+                self.interpretation_mode = settings.get('interpretation_mode', False)
+                self.interpretation_language = settings.get('interpretation_language', '')
                 self.webrtc_id = recent_id
                 print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
+                      f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}, "
+                      f"interpretation_mode={self.interpretation_mode}")
         print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
+              f"target_language={self.target_language}, interpretation_mode={self.interpretation_mode}")
         self.client = openai.AsyncOpenAI()
         # Define the web search function
         tools = []
         base_instructions = self.system_prompt or "You are a helpful assistant."
+        # Check if in interpretation mode
+        if self.interpretation_mode:
+            # In interpretation mode, override all instructions
+            interpretation_instructions = self.get_interpretation_instructions()
+            instructions = interpretation_instructions
+            # No tools in interpretation mode
+            tools = []
         else:
+            # Normal mode - add translation instructions if language is selected
+            translation_instructions = self.get_translation_instructions()
+            if self.web_search_enabled and self.search_client:
+                tools = [{
+                    "type": "function",
+                    "function": {
+                        "name": "web_search",
+                        "description": "Search the web for current information. Use this for weather, news, prices, current events, or any time-sensitive topics.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {
+                                    "type": "string",
+                                    "description": "The search query"
+                                }
+                            },
+                            "required": ["query"]
+                        }
+                    }
+                }]
+                print("Web search function added to tools")
+                search_instructions = (
+                    "\n\nYou have web search capabilities. "
+                    "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
+                    "- Weather (날씨, 기온, 비, 눈)\n"
+                    "- News (뉴스, 소식)\n"
+                    "- Current events (현재, 최근, 오늘, 지금)\n"
+                    "- Prices (가격, 환율, 주가)\n"
+                    "- Sports scores or results\n"
+                    "- Any question about 2024 or 2025\n"
+                    "- Any time-sensitive information\n\n"
+                    "When in doubt, USE web_search. It's better to search and provide accurate information "
+                    "than to guess or use outdated information."
+                )
+                instructions = base_instructions + search_instructions + translation_instructions
+            else:
+                instructions = base_instructions + translation_instructions
         async with self.client.beta.realtime.connect(
             model="gpt-4o-mini-realtime-preview-2024-12-17"
                 "tool_choice": "auto" if tools else "none"
             }
+            # Add voice setting based on interpretation or translation language
+            voice_language = self.interpretation_language if self.interpretation_mode else self.target_language
+            if voice_language:
                 # Map languages to appropriate voices
                 voice_map = {
                     "en": "alloy",
                     "zh": "nova",
                     # Default to alloy for other languages
                 }
+                session_update["voice"] = voice_map.get(voice_language, "alloy")
             await conn.session.update(session=session_update)
             self.connection = conn
                 if event.type == "response.audio_transcript.done":
                     output_data = {
                         "event": event,
+                        "language": SUPPORTED_LANGUAGES.get(
+                            self.interpretation_language if self.interpretation_mode else self.target_language,
+                            ""
+                        ) if (self.interpretation_language or self.target_language) else ""
                     }
                     await self.output_queue.put(AdditionalOutputs(output_data))
     web_search_enabled = body.get("web_search_enabled", False)
     target_language = body.get("target_language", "")
     system_prompt = body.get("system_prompt", "")
+    interpretation_mode = body.get("interpretation_mode", False)
+    interpretation_language = body.get("interpretation_language", "")
     print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
+          f"target_language: {target_language}, interpretation_mode: {interpretation_mode}, "
+          f"interpretation_language: {interpretation_language}")
     # Store settings with timestamp
     if webrtc_id:
             'web_search_enabled': web_search_enabled,
             'target_language': target_language,
             'system_prompt': system_prompt,
+            'interpretation_mode': interpretation_mode,
+            'interpretation_language': interpretation_language,
             'timestamp': asyncio.get_event_loop().time()
         }