Private-AI

Running

App Files Files Community

seawolf2357 commited on 22 days ago

Commit

9c97703

verified ·

1 Parent(s): 64bea29

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -114

app.py CHANGED Viewed

@@ -1465,152 +1465,155 @@ class OpenAIHandler(AsyncStreamHandler):
             if not user_text:
                 return
-            # 2. Translate with GPT-4o-mini
             target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
-            # Create very explicit translation examples
-            translation_examples = {
-                "en": {
-                    "안녕하세요": "Hello",
-                    "감사합니다": "Thank you",
-                    "오늘 날씨가 좋네요": "The weather is nice today"
-                },
-                "ja": {
-                    "안녕하세요": "こんにちは",
-                    "감사합니다": "ありがとうございます",
-                    "오늘 날씨가 좋네요": "今日はいい天気ですね"
-                },
-                "zh": {
-                    "안녕하세요": "你好",
-                    "감사합니다": "谢谢",
-                    "오늘 날씨가 좋네요": "今天天气很好"
-                },
-                "es": {
-                    "안녕하세요": "Hola",
-                    "감사합니다": "Gracias",
-                    "오늘 날씨가 좋네요": "El clima está agradable hoy"
-                }
-            }
-            examples = translation_examples.get(self.interpretation_language, translation_examples["en"])
-            examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
-            # Ultra-specific prompt
-            system_prompt = f"""You are a Korean to {target_lang_name} translator.
-STRICT RULES:
-1. Output ONLY the {target_lang_name} translation
-2. Do NOT output Korean
-3. Do NOT add explanations
-4. Do NOT answer questions
-5. Just translate
-Examples:
-{examples_text}
-Now translate the Korean text to {target_lang_name}. Output ONLY the translation in {target_lang_name}:"""
-            print(f"[INTERPRETATION] Translating to {target_lang_name}...")
-            print(f"[INTERPRETATION] System prompt: {system_prompt}")
             translation_response = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
-                    {
-                        "role": "system",
-                        "content": system_prompt
-                    },
                     {
                         "role": "user",
-                        "content": f"Translate this Korean to {target_lang_name}: {user_text}"
                     }
                 ],
-                temperature=0.1,  # Very low temperature
                 max_tokens=200
             )
             translated_text = translation_response.choices[0].message.content.strip()
-            # Remove any Korean characters if they accidentally appear
             import re
-            if re.search(r'[가-힣]', translated_text):
-                print(f"[INTERPRETATION] WARNING: Korean characters detected in translation: {translated_text}")
-                # Try to extract only non-Korean parts
-                translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
-            print(f"[INTERPRETATION] Translated: {translated_text}")
             # 3. Generate speech with TTS
-            print(f"[INTERPRETATION] Generating speech for text: {translated_text}")
-            # Select appropriate voice and ensure it speaks the target language
             voice_map = {
-                "en": "alloy",     # Alloy is native English speaker
                 "es": "nova",      # Nova handles Spanish well
-                "fr": "shimmer",   # Shimmer handles French well
-                "de": "echo",      # Echo handles German well
-                "ja": "nova",      # Nova can handle Japanese
-                "zh": "nova",      # Nova can handle Chinese
-                "ko": "nova",      # Nova can handle Korean
             }
             selected_voice = voice_map.get(self.interpretation_language, "nova")
-            print(f"[INTERPRETATION] Using voice: {selected_voice} for language: {self.interpretation_language}")
-            # For some languages, we might need to add pronunciation hints
-            if self.interpretation_language == "en" and re.search(r'[가-힣]', translated_text):
-                print("[INTERPRETATION] ERROR: Korean characters in English translation!")
-                translated_text = "Translation error occurred"
             try:
                 tts_response = await self.client.audio.speech.create(
                     model="tts-1",
                     voice=selected_voice,
                     input=translated_text,
-                    response_format="pcm",  # PCM format for direct playback
                     speed=1.0
                 )
             except Exception as tts_error:
                 print(f"[INTERPRETATION] TTS Error: {tts_error}")
-                # If TTS fails, try with a different voice
-                tts_response = await self.client.audio.speech.create(
-                    model="tts-1",
-                    voice="alloy",  # Fallback to alloy
-                    input=translated_text,
-                    response_format="pcm",
-                    speed=1.0
-                )
-            # Convert response to bytes
-            audio_bytes = b""
-            async for chunk in tts_response.iter_bytes(1024):
-                audio_bytes += chunk
-            # Convert PCM to numpy array (TTS outputs at 24kHz)
-            audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
-            # Send audio in chunks
-            if len(audio_array) > 0:
-                # Split audio into chunks and send
-                chunk_size = 480  # Match our frame size
-                for i in range(0, len(audio_array), chunk_size):
-                    chunk = audio_array[i:i + chunk_size]
-                    if len(chunk) < chunk_size:
-                        # Pad the last chunk if necessary
-                        chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
-                    await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
-            # Send transcript event
-            output_data = {
-                "event": type('Event', (), {
-                    'transcript': f"{user_text} → {translated_text}"
-                })(),
-                "language": target_lang_name,
-                "mode": "interpretation"
-            }
-            await self.output_queue.put(AdditionalOutputs(output_data))
         except Exception as e:
             print(f"[INTERPRETATION] Error: {e}")
             import traceback
@@ -1630,6 +1633,8 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
             self.audio_buffer = []
             self.is_recording = False
             self.silence_frames = 0
     def get_translation_instructions(self):
         """Get instructions for translation based on target language"""
@@ -1672,7 +1677,7 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
         # If in interpretation mode, don't connect to Realtime API
         if self.interpretation_mode:
-            print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
             print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
             # Just keep the handler ready to process audio
             # Don't use infinite loop here - the handler will be called by the framework
@@ -1823,7 +1828,7 @@ RULES:
         print(f"[NORMAL MODE] Target language: {self.target_language}")
         async with self.client.beta.realtime.connect(
-            model="gpt-4o-mini-realtime-preview-2024-12-17"
         ) as conn:
             # Update session with tools
             session_update = {
@@ -2080,7 +2085,7 @@ async def custom_offer(request: Request):
 @app.post("/chat/text")
 async def chat_text(request: Request):
-    """Handle text chat messages using GPT-4o-mini"""
     try:
         body = await request.json()
         message = body.get("message", "")

             if not user_text:
                 return
+            # 2. Translate with GPT-4o-mini - FIXED VERSION
             target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
+            # More direct translation approach
+            if self.interpretation_language == "en":
+                translation_prompt = f"Translate this Korean text to English. Output ONLY the English translation, nothing else: {user_text}"
+            elif self.interpretation_language == "ja":
+                translation_prompt = f"韓国語を日本語に翻訳してください。日本語の翻訳のみを出力してください: {user_text}"
+            elif self.interpretation_language == "zh":
+                translation_prompt = f"将韩语翻译成中文。只输出中文翻译: {user_text}"
+            elif self.interpretation_language == "es":
+                translation_prompt = f"Traduce este texto coreano al español. Solo muestra la traducción en español: {user_text}"
+            elif self.interpretation_language == "fr":
+                translation_prompt = f"Traduisez ce texte coréen en français. Affichez uniquement la traduction française: {user_text}"
+            elif self.interpretation_language == "de":
+                translation_prompt = f"Übersetzen Sie diesen koreanischen Text ins Deutsche. Geben Sie nur die deutsche Übersetzung aus: {user_text}"
+            else:
+                translation_prompt = f"Translate Korean to {target_lang_name}. Output only {target_lang_name}: {user_text}"
+            print(f"[INTERPRETATION] Translation prompt: {translation_prompt}")
+            # Use a single user message approach for better results
             translation_response = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
                     {
                         "role": "user",
+                        "content": translation_prompt
                     }
                 ],
+                temperature=0.0,  # Set to 0 for most deterministic output
                 max_tokens=200
             )
             translated_text = translation_response.choices[0].message.content.strip()
+            # Validation: Check if Korean characters are present in non-Korean translations
             import re
+            if self.interpretation_language != "ko" and re.search(r'[가-힣]', translated_text):
+                print(f"[INTERPRETATION] WARNING: Korean detected in {self.interpretation_language} translation")
+                # Try again with a more forceful prompt
+                force_prompt = {
+                    "en": f"English only: {user_text}",
+                    "ja": f"日本語のみ: {user_text}",
+                    "zh": f"仅中文: {user_text}",
+                    "es": f"Solo español: {user_text}",
+                    "fr": f"Français seulement: {user_text}",
+                    "de": f"Nur Deutsch: {user_text}"
+                }.get(self.interpretation_language, f"{target_lang_name} only: {user_text}")
+                retry_response = await self.client.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=[{"role": "user", "content": force_prompt}],
+                    temperature=0.0,
+                    max_tokens=200
+                )
+                new_translation = retry_response.choices[0].message.content.strip()
+                # If still has Korean, extract non-Korean parts
+                if re.search(r'[가-힣]', new_translation):
+                    # Remove all Korean characters and clean up
+                    cleaned = re.sub(r'[가-힣]+', ' ', new_translation).strip()
+                    cleaned = re.sub(r'\s+', ' ', cleaned)  # Remove multiple spaces
+                    if cleaned and len(cleaned) > 3:  # If we have meaningful content left
+                        translated_text = cleaned
+                    else:
+                        # Fallback to a simple translation
+                        translated_text = {
+                            "en": "Translation completed",
+                            "ja": "翻訳完了",
+                            "zh": "翻译完成",
+                            "es": "Traducción completada",
+                            "fr": "Traduction terminée",
+                            "de": "Übersetzung abgeschlossen"
+                        }.get(self.interpretation_language, "Translation completed")
+                else:
+                    translated_text = new_translation
+            print(f"[INTERPRETATION] Final translated text: {translated_text}")
             # 3. Generate speech with TTS
+            # Select voice optimized for the target language
             voice_map = {
+                "en": "nova",      # Nova has clear English pronunciation
                 "es": "nova",      # Nova handles Spanish well
+                "fr": "shimmer",   # Shimmer for French
+                "de": "echo",      # Echo for German
+                "ja": "alloy",     # Alloy can handle Japanese
+                "zh": "alloy",     # Alloy can handle Chinese
+                "ko": "nova",      # Nova for Korean
+                "it": "nova",      # Nova for Italian
+                "pt": "shimmer",   # Shimmer for Portuguese
+                "ru": "onyx",      # Onyx for Russian
             }
             selected_voice = voice_map.get(self.interpretation_language, "nova")
+            print(f"[INTERPRETATION] Generating TTS with voice: {selected_voice}")
             try:
                 tts_response = await self.client.audio.speech.create(
                     model="tts-1",
                     voice=selected_voice,
                     input=translated_text,
+                    response_format="pcm",
                     speed=1.0
                 )
+                # Convert response to bytes
+                audio_bytes = b""
+                async for chunk in tts_response.iter_bytes(1024):
+                    audio_bytes += chunk
+                # Convert PCM to numpy array
+                audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
+                # Send audio in chunks
+                if len(audio_array) > 0:
+                    chunk_size = 480
+                    for i in range(0, len(audio_array), chunk_size):
+                        chunk = audio_array[i:i + chunk_size]
+                        if len(chunk) < chunk_size:
+                            chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
+                        await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
+                # Send transcript event - show both original and translation
+                output_data = {
+                    "event": type('Event', (), {
+                        'transcript': f"{user_text} → {translated_text}"
+                    })(),
+                    "language": target_lang_name,
+                    "mode": "interpretation"
+                }
+                await self.output_queue.put(AdditionalOutputs(output_data))
             except Exception as tts_error:
                 print(f"[INTERPRETATION] TTS Error: {tts_error}")
+                # Send error message
+                error_data = {
+                    "event": type('Event', (), {
+                        'transcript': f"TTS 오류: {str(tts_error)}"
+                    })(),
+                    "language": "",
+                    "mode": "error"
+                }
+                await self.output_queue.put(AdditionalOutputs(error_data))
         except Exception as e:
             print(f"[INTERPRETATION] Error: {e}")
             import traceback
             self.audio_buffer = []
             self.is_recording = False
             self.silence_frames = 0
     def get_translation_instructions(self):
         """Get instructions for translation based on target language"""
         # If in interpretation mode, don't connect to Realtime API
         if self.interpretation_mode:
+            print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4.1-mini + TTS")
             print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
             # Just keep the handler ready to process audio
             # Don't use infinite loop here - the handler will be called by the framework
         print(f"[NORMAL MODE] Target language: {self.target_language}")
         async with self.client.beta.realtime.connect(
+            model="gpt-4.0-mini-realtime-preview-2024-12-17"
         ) as conn:
             # Update session with tools
             session_update = {
 @app.post("/chat/text")
 async def chat_text(request: Request):
+    """Handle text chat messages using GPT-4.1-mini"""
     try:
         body = await request.json()
         message = body.get("message", "")