Private-AI

Running

App Files Files Community

seawolf2357 commited on 23 days ago

Commit

17b6035

verified ·

1 Parent(s): cedd312

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -49

app.py CHANGED Viewed

@@ -719,6 +719,15 @@ HTML_CONTENT = """<!DOCTYPE html>
                 sendButton.style.display = 'block';
                 console.log('Interpretation mode disabled');
             }
             console.log('Interpretation mode:', interpretationMode);
@@ -747,6 +756,15 @@ HTML_CONTENT = """<!DOCTYPE html>
                 sendButton.style.display = 'none';
                 console.log('Interpretation mode enabled with language:', interpretationLanguage);
             }
         });
@@ -979,6 +997,17 @@ HTML_CONTENT = """<!DOCTYPE html>
                     updateButtonState();
                 });
                 webrtc_id = Math.random().toString(36).substring(7);
                 const response = await fetch('/webrtc/offer', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
@@ -1006,8 +1035,21 @@ HTML_CONTENT = """<!DOCTYPE html>
                 eventSource.addEventListener("output", (event) => {
                     const eventJson = JSON.parse(event.data);
                     let content = eventJson.content;
                     if (selectedLanguage && eventJson.language) {
                         content += ` <span class="language-info">[${eventJson.language}]</span>`;
                     }
                     addMessage("assistant", content);
                 });
@@ -1298,30 +1340,22 @@ class OpenAIHandler(AsyncStreamHandler):
         target_language_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
         target_code = self.interpretation_language
-        # Language-specific examples
-        examples = {
-            "en": "Hello, the weather is nice today",
-            "es": "Hola, el clima está agradable hoy",
-            "fr": "Bonjour, il fait beau aujourd'hui",
-            "de": "Hallo, das Wetter ist heute schön",
-            "ja": "こんにちは、今日はいい天気ですね",
-            "zh": "你好，今天天气很好"
-        }
-        example_translation = examples.get(target_code, "Hello, the weather is nice today")
         return (
-            f"INTERPRETATION MODE - CRITICAL RULES:\n\n"
-            f"1. You are ONLY a translator to {target_language_name} (language code: {target_code}).\n"
-            f"2. NEVER respond in any other language.\n"
-            f"3. NEVER generate conversation or additional content.\n"
-            f"4. ONLY translate what the user says.\n"
-            f"5. STOP immediately after translating.\n\n"
-            f"Example:\n"
-            f"If user says: '안녕하세요, 오늘 날씨가 좋네요'\n"
-            f"You MUST respond ONLY: '{example_translation}'\n\n"
-            f"DO NOT say anything else. DO NOT continue talking.\n"
-            f"Your output language MUST be {target_language_name} ONLY."
         )
     def get_translation_instructions(self):
@@ -1368,18 +1402,18 @@ class OpenAIHandler(AsyncStreamHandler):
         # Check if in interpretation mode
         if self.interpretation_mode:
-            # In interpretation mode, override all instructions
-            base_instructions = (
-                f"You are a professional interpreter. Your ONLY task is to translate what the user says "
-                f"into {SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)}. "
-                f"Do not add any commentary, do not continue the conversation, do not generate new content. "
-                f"Simply translate what was said and stop."
             )
-            interpretation_instructions = self.get_interpretation_instructions()
-            instructions = base_instructions + "\n\n" + interpretation_instructions
             # No tools in interpretation mode
             tools = []
             print(f"Interpretation mode active - target language: {self.interpretation_language}")
         else:
             # Normal mode - add translation instructions if language is selected
             translation_instructions = self.get_translation_instructions()
@@ -1430,39 +1464,77 @@ class OpenAIHandler(AsyncStreamHandler):
                     "type": "server_vad",
                     "threshold": 0.5,
                     "prefix_padding_ms": 300,
-                    "silence_duration_ms": 500 if self.interpretation_mode else 700
                 },
                 "instructions": instructions,
                 "tools": tools,
-                "tool_choice": "auto" if tools else "none"
             }
             # Add voice setting based on interpretation or translation language
             voice_language = self.interpretation_language if self.interpretation_mode else self.target_language
-            if voice_language:
-                # Use only alloy voice to avoid language confusion
-                # The model will handle the language based on instructions
                 session_update["voice"] = "alloy"
-                # For interpretation mode, explicitly set the output language
-                if self.interpretation_mode:
-                    session_update["output_audio_format"] = "pcm16"
                 print(f"Voice set to: alloy for language: {voice_language}")
-            # For interpretation mode, ensure proper language settings
-            if self.interpretation_mode and self.interpretation_language:
-                session_update["modalities"] = ["text", "audio"]
-                session_update["temperature"] = 0.3  # Lower temperature for more accurate translation
-                session_update["max_response_output_tokens"] = 500  # Limit output to prevent long generations
-                print(f"Interpretation session config: voice={session_update.get('voice')}, lang={self.interpretation_language}")
             await conn.session.update(session=session_update)
             self.connection = conn
             print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}, "
                   f"interpretation_mode: {self.interpretation_mode}, language: {self.interpretation_language if self.interpretation_mode else self.target_language}")
             async for event in self.connection:
                 # Debug logging for function calls
                 if event.type.startswith("response.function_call"):
                     print(f"Function event: {event.type}")
@@ -1473,7 +1545,8 @@ class OpenAIHandler(AsyncStreamHandler):
                         "language": SUPPORTED_LANGUAGES.get(
                             self.interpretation_language if self.interpretation_mode else self.target_language,
                             ""
-                        ) if (self.interpretation_language or self.target_language) else ""
                     }
                     await self.output_queue.put(AdditionalOutputs(output_data))
@@ -1687,7 +1760,8 @@ async def outputs(webrtc_id: str):
                         data = {
                             "role": "assistant",
                             "content": event.transcript,
-                            "language": output.args[0].get('language', '')
                         }
                         yield f"event: output\ndata: {json.dumps(data)}\n\n"

                 sendButton.style.display = 'block';
                 console.log('Interpretation mode disabled');
+                // If connected, restart to apply normal mode
+                if (peerConnection && peerConnection.connectionState === 'connected') {
+                    showError('일반 모드로 전환하기 위해 연결을 다시 시작합니다.');
+                    stop();
+                    setTimeout(() => {
+                        setupWebRTC();
+                    }, 500);
+                }
             }
             console.log('Interpretation mode:', interpretationMode);
                 sendButton.style.display = 'none';
                 console.log('Interpretation mode enabled with language:', interpretationLanguage);
+                // If already connected, restart the connection with new settings
+                if (peerConnection && peerConnection.connectionState === 'connected') {
+                    showError('통역 모드 설정을 적용하기 위해 연결을 다시 시작합니다.');
+                    stop();
+                    setTimeout(() => {
+                        setupWebRTC();
+                    }, 500);
+                }
             }
         });
                     updateButtonState();
                 });
                 webrtc_id = Math.random().toString(36).substring(7);
+                // Log current settings before sending
+                console.log('Sending offer with settings:', {
+                    webrtc_id: webrtc_id,
+                    web_search_enabled: webSearchEnabled,
+                    target_language: selectedLanguage,
+                    system_prompt: systemPrompt,
+                    interpretation_mode: interpretationMode,
+                    interpretation_language: interpretationLanguage
+                });
                 const response = await fetch('/webrtc/offer', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                 eventSource.addEventListener("output", (event) => {
                     const eventJson = JSON.parse(event.data);
                     let content = eventJson.content;
+                    // Debug logging for interpretation mode
+                    if (interpretationMode) {
+                        console.log('[INTERPRETATION OUTPUT]', {
+                            content: content,
+                            language: eventJson.language,
+                            mode: eventJson.mode,
+                            expectedLanguage: interpretationLanguage
+                        });
+                    }
                     if (selectedLanguage && eventJson.language) {
                         content += ` <span class="language-info">[${eventJson.language}]</span>`;
+                    } else if (interpretationMode && eventJson.language) {
+                        content += ` <span class="language-info">[통역: ${eventJson.language}]</span>`;
                     }
                     addMessage("assistant", content);
                 });
         target_language_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
         target_code = self.interpretation_language
         return (
+            f"YOU ARE IN TRANSLATION MODE. YOUR ONLY FUNCTION IS TO TRANSLATE.\n\n"
+            f"RULES:\n"
+            f"1. TRANSLATE the input to {target_language_name} ({target_code})\n"
+            f"2. OUTPUT ONLY THE TRANSLATION\n"
+            f"3. DO NOT ANSWER QUESTIONS\n"
+            f"4. DO NOT PROVIDE INFORMATION\n"
+            f"5. DO NOT CONTINUE SPEAKING\n"
+            f"6. NEVER OUTPUT IN ANY OTHER LANGUAGE THAN {target_language_name}\n\n"
+            f"EXAMPLES:\n"
+            f"Input: '안녕하세요' → Output: 'Hello' (if English)\n"
+            f"Input: '날씨가 어때요?' → Output: 'How is the weather?' (if English)\n"
+            f"Input: '커피 한 잔 주세요' → Output: 'One coffee please' (if English)\n\n"
+            f"YOU ARE NOT AN AI ASSISTANT. YOU ARE A TRANSLATOR.\n"
+            f"TRANSLATE AND STOP. NO ADDITIONAL WORDS.\n"
+            f"OUTPUT LANGUAGE: {target_language_name} ONLY."
         )
     def get_translation_instructions(self):
         # Check if in interpretation mode
         if self.interpretation_mode:
+            # In interpretation mode, completely override everything
+            instructions = (
+                f"CRITICAL: YOU ARE A TRANSLATION SERVICE.\n"
+                f"TRANSLATE USER INPUT TO {SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)}.\n"
+                f"OUTPUT ONLY THE TRANSLATION. NOTHING ELSE.\n"
+                f"DO NOT ACT AS AN AI. DO NOT ANSWER QUESTIONS.\n"
+                f"JUST TRANSLATE AND STOP."
             )
             # No tools in interpretation mode
             tools = []
             print(f"Interpretation mode active - target language: {self.interpretation_language}")
+            print(f"Instructions: {instructions}")
         else:
             # Normal mode - add translation instructions if language is selected
             translation_instructions = self.get_translation_instructions()
                     "type": "server_vad",
                     "threshold": 0.5,
                     "prefix_padding_ms": 300,
+                    "silence_duration_ms": 100 if self.interpretation_mode else 700  # Even shorter silence
                 },
                 "instructions": instructions,
                 "tools": tools,
+                "tool_choice": "none" if self.interpretation_mode else ("auto" if tools else "none"),
+                "temperature": 0.0 if self.interpretation_mode else 0.7,
+                "max_response_output_tokens": 50 if self.interpretation_mode else 4096  # Very short responses
             }
             # Add voice setting based on interpretation or translation language
             voice_language = self.interpretation_language if self.interpretation_mode else self.target_language
+            # For interpretation mode, ensure proper settings
+            if self.interpretation_mode and self.interpretation_language:
+                # Force simple translation behavior
                 session_update["voice"] = "alloy"
+                session_update["modalities"] = ["audio", "text"]  # Changed order
+                # Create a very specific prompt for the target language
+                target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
+                # Use very explicit language instructions
+                if self.interpretation_language == "en":
+                    lang_specific = "Respond in English only. 영어로만 답하세요."
+                elif self.interpretation_language == "es":
+                    lang_specific = "Respond in Spanish only. Solo responde en español."
+                elif self.interpretation_language == "fr":
+                    lang_specific = "Respond in French only. Répondez uniquement en français."
+                elif self.interpretation_language == "de":
+                    lang_specific = "Respond in German only. Antworten Sie nur auf Deutsch."
+                elif self.interpretation_language == "ja":
+                    lang_specific = "Respond in Japanese only. 日本語でのみ答えてください。"
+                elif self.interpretation_language == "zh":
+                    lang_specific = "Respond in Chinese only. 只用中文回答。"
+                else:
+                    lang_specific = f"Respond in {target_lang_name} only."
+                # Override instructions with ultra-specific directive
+                session_update["instructions"] = (
+                    f"TRANSLATE TO {target_lang_name.upper()}. "
+                    f"{lang_specific} "
+                    f"You are a translation machine. "
+                    f"Input → Translation. Nothing else. "
+                    f"Do not chat. Do not explain. Just translate. "
+                    f"Maximum 20 words per response."
+                )
+                # Additional session parameters for interpretation mode
+                session_update["input_audio_transcription"] = {
+                    "model": "whisper-1"
+                }
+                print(f"[INTERPRETATION MODE] Target: {self.interpretation_language} ({target_lang_name})")
+                print(f"[INTERPRETATION MODE] Instructions: {session_update['instructions']}")
+                print(f"[INTERPRETATION MODE] Session config: {session_update}")
+            elif voice_language:
+                # Normal translation mode
+                session_update["voice"] = "alloy"
                 print(f"Voice set to: alloy for language: {voice_language}")
             await conn.session.update(session=session_update)
             self.connection = conn
             print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}, "
                   f"interpretation_mode: {self.interpretation_mode}, language: {self.interpretation_language if self.interpretation_mode else self.target_language}")
             async for event in self.connection:
+                # Debug logging for interpretation mode
+                if self.interpretation_mode and event.type == "response.audio_transcript.done":
+                    print(f"[INTERPRETATION] Transcript: {event.transcript[:100]}...")
+                    print(f"[INTERPRETATION] Expected language: {self.interpretation_language}")
                 # Debug logging for function calls
                 if event.type.startswith("response.function_call"):
                     print(f"Function event: {event.type}")
                         "language": SUPPORTED_LANGUAGES.get(
                             self.interpretation_language if self.interpretation_mode else self.target_language,
                             ""
+                        ) if (self.interpretation_language or self.target_language) else "",
+                        "mode": "interpretation" if self.interpretation_mode else "normal"
                     }
                     await self.output_queue.put(AdditionalOutputs(output_data))
                         data = {
                             "role": "assistant",
                             "content": event.transcript,
+                            "language": output.args[0].get('language', ''),
+                            "mode": output.args[0].get('mode', 'normal')
                         }
                         yield f"event: output\ndata: {json.dumps(data)}\n\n"