Private-AI

Running

App Files Files Community

seawolf2357 commited on 20 days ago

Commit

0232d6f

verified ·

1 Parent(s): 0d0e889

Update app-backup3.py

Browse files

Files changed (1) hide show

app-backup3.py +114 -119

app-backup3.py CHANGED Viewed

@@ -1465,155 +1465,152 @@ class OpenAIHandler(AsyncStreamHandler):
             if not user_text:
                 return
-            # 2. Translate with GPT-4o-mini - FIXED VERSION
             target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
-            # More direct translation approach
-            if self.interpretation_language == "en":
-                translation_prompt = f"Translate this Korean text to English. Output ONLY the English translation, nothing else: {user_text}"
-            elif self.interpretation_language == "ja":
-                translation_prompt = f"韓国語を日本語に翻訳してください。日本語の翻訳のみを出力してください: {user_text}"
-            elif self.interpretation_language == "zh":
-                translation_prompt = f"将韩语翻译成中文。只输出中文翻译: {user_text}"
-            elif self.interpretation_language == "es":
-                translation_prompt = f"Traduce este texto coreano al español. Solo muestra la traducción en español: {user_text}"
-            elif self.interpretation_language == "fr":
-                translation_prompt = f"Traduisez ce texte coréen en français. Affichez uniquement la traduction française: {user_text}"
-            elif self.interpretation_language == "de":
-                translation_prompt = f"Übersetzen Sie diesen koreanischen Text ins Deutsche. Geben Sie nur die deutsche Übersetzung aus: {user_text}"
-            else:
-                translation_prompt = f"Translate Korean to {target_lang_name}. Output only {target_lang_name}: {user_text}"
-            print(f"[INTERPRETATION] Translation prompt: {translation_prompt}")
-            # Use a single user message approach for better results
             translation_response = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
                     {
                         "role": "user",
-                        "content": translation_prompt
                     }
                 ],
-                temperature=0.0,  # Set to 0 for most deterministic output
                 max_tokens=200
             )
             translated_text = translation_response.choices[0].message.content.strip()
-            # Validation: Check if Korean characters are present in non-Korean translations
             import re
-            if self.interpretation_language != "ko" and re.search(r'[가-힣]', translated_text):
-                print(f"[INTERPRETATION] WARNING: Korean detected in {self.interpretation_language} translation")
-                # Try again with a more forceful prompt
-                force_prompt = {
-                    "en": f"English only: {user_text}",
-                    "ja": f"日本語のみ: {user_text}",
-                    "zh": f"仅中文: {user_text}",
-                    "es": f"Solo español: {user_text}",
-                    "fr": f"Français seulement: {user_text}",
-                    "de": f"Nur Deutsch: {user_text}"
-                }.get(self.interpretation_language, f"{target_lang_name} only: {user_text}")
-                retry_response = await self.client.chat.completions.create(
-                    model="gpt-4o-mini",
-                    messages=[{"role": "user", "content": force_prompt}],
-                    temperature=0.0,
-                    max_tokens=200
-                )
-                new_translation = retry_response.choices[0].message.content.strip()
-                # If still has Korean, extract non-Korean parts
-                if re.search(r'[가-힣]', new_translation):
-                    # Remove all Korean characters and clean up
-                    cleaned = re.sub(r'[가-힣]+', ' ', new_translation).strip()
-                    cleaned = re.sub(r'\s+', ' ', cleaned)  # Remove multiple spaces
-                    if cleaned and len(cleaned) > 3:  # If we have meaningful content left
-                        translated_text = cleaned
-                    else:
-                        # Fallback to a simple translation
-                        translated_text = {
-                            "en": "Translation completed",
-                            "ja": "翻訳完了",
-                            "zh": "翻译完成",
-                            "es": "Traducción completada",
-                            "fr": "Traduction terminée",
-                            "de": "Übersetzung abgeschlossen"
-                        }.get(self.interpretation_language, "Translation completed")
-                else:
-                    translated_text = new_translation
-            print(f"[INTERPRETATION] Final translated text: {translated_text}")
             # 3. Generate speech with TTS
-            # Select voice optimized for the target language
             voice_map = {
-                "en": "nova",      # Nova has clear English pronunciation
                 "es": "nova",      # Nova handles Spanish well
-                "fr": "shimmer",   # Shimmer for French
-                "de": "echo",      # Echo for German
-                "ja": "alloy",     # Alloy can handle Japanese
-                "zh": "alloy",     # Alloy can handle Chinese
-                "ko": "nova",      # Nova for Korean
-                "it": "nova",      # Nova for Italian
-                "pt": "shimmer",   # Shimmer for Portuguese
-                "ru": "onyx",      # Onyx for Russian
             }
             selected_voice = voice_map.get(self.interpretation_language, "nova")
-            print(f"[INTERPRETATION] Generating TTS with voice: {selected_voice}")
             try:
                 tts_response = await self.client.audio.speech.create(
                     model="tts-1",
                     voice=selected_voice,
                     input=translated_text,
-                    response_format="pcm",
                     speed=1.0
                 )
-                # Convert response to bytes
-                audio_bytes = b""
-                async for chunk in tts_response.iter_bytes(1024):
-                    audio_bytes += chunk
-                # Convert PCM to numpy array
-                audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
-                # Send audio in chunks
-                if len(audio_array) > 0:
-                    chunk_size = 480
-                    for i in range(0, len(audio_array), chunk_size):
-                        chunk = audio_array[i:i + chunk_size]
-                        if len(chunk) < chunk_size:
-                            chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
-                        await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
-                # Send transcript event - show both original and translation
-                output_data = {
-                    "event": type('Event', (), {
-                        'transcript': f"{user_text} → {translated_text}"
-                    })(),
-                    "language": target_lang_name,
-                    "mode": "interpretation"
-                }
-                await self.output_queue.put(AdditionalOutputs(output_data))
             except Exception as tts_error:
                 print(f"[INTERPRETATION] TTS Error: {tts_error}")
-                # Send error message
-                error_data = {
-                    "event": type('Event', (), {
-                        'transcript': f"TTS 오류: {str(tts_error)}"
-                    })(),
-                    "language": "",
-                    "mode": "error"
-                }
-                await self.output_queue.put(AdditionalOutputs(error_data))
         except Exception as e:
             print(f"[INTERPRETATION] Error: {e}")
             import traceback
@@ -1633,8 +1630,6 @@ class OpenAIHandler(AsyncStreamHandler):
             self.audio_buffer = []
             self.is_recording = False
             self.silence_frames = 0
     def get_translation_instructions(self):
         """Get instructions for translation based on target language"""
@@ -1677,7 +1672,7 @@ class OpenAIHandler(AsyncStreamHandler):
         # If in interpretation mode, don't connect to Realtime API
         if self.interpretation_mode:
-            print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4.1-mini + TTS")
             print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
             # Just keep the handler ready to process audio
             # Don't use infinite loop here - the handler will be called by the framework
@@ -1828,7 +1823,7 @@ RULES:
         print(f"[NORMAL MODE] Target language: {self.target_language}")
         async with self.client.beta.realtime.connect(
-            model="gpt-4.0-mini-realtime-preview-2024-12-17"
         ) as conn:
             # Update session with tools
             session_update = {
@@ -2085,7 +2080,7 @@ async def custom_offer(request: Request):
 @app.post("/chat/text")
 async def chat_text(request: Request):
-    """Handle text chat messages using GPT-4.1-mini"""
     try:
         body = await request.json()
         message = body.get("message", "")

             if not user_text:
                 return
+            # 2. Translate with GPT-4o-mini
             target_lang_name = SUPPORTED_LANGUAGES.get(self.interpretation_language, self.interpretation_language)
+            # Create very explicit translation examples
+            translation_examples = {
+                "en": {
+                    "안녕하세요": "Hello",
+                    "감사합니다": "Thank you",
+                    "오늘 날씨가 좋네요": "The weather is nice today"
+                },
+                "ja": {
+                    "안녕하세요": "こんにちは",
+                    "감사합니다": "ありがとうございます",
+                    "오늘 날씨가 좋네요": "今日はいい天気ですね"
+                },
+                "zh": {
+                    "안녕하세요": "你好",
+                    "감사합니다": "谢谢",
+                    "오늘 날씨가 좋네요": "今天天气很好"
+                },
+                "es": {
+                    "안녕하세요": "Hola",
+                    "감사합니다": "Gracias",
+                    "오늘 날씨가 좋네요": "El clima está agradable hoy"
+                }
+            }
+            examples = translation_examples.get(self.interpretation_language, translation_examples["en"])
+            examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
+            # Ultra-specific prompt
+            system_prompt = f"""You are a Korean to {target_lang_name} translator.
+STRICT RULES:
+1. Output ONLY the {target_lang_name} translation
+2. Do NOT output Korean
+3. Do NOT add explanations
+4. Do NOT answer questions
+5. Just translate
+Examples:
+{examples_text}
+Now translate the Korean text to {target_lang_name}. Output ONLY the translation in {target_lang_name}:"""
+            print(f"[INTERPRETATION] Translating to {target_lang_name}...")
+            print(f"[INTERPRETATION] System prompt: {system_prompt}")
             translation_response = await self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
+                    {
+                        "role": "system",
+                        "content": system_prompt
+                    },
                     {
                         "role": "user",
+                        "content": f"Translate this Korean to {target_lang_name}: {user_text}"
                     }
                 ],
+                temperature=0.1,  # Very low temperature
                 max_tokens=200
             )
             translated_text = translation_response.choices[0].message.content.strip()
+            # Remove any Korean characters if they accidentally appear
             import re
+            if re.search(r'[가-힣]', translated_text):
+                print(f"[INTERPRETATION] WARNING: Korean characters detected in translation: {translated_text}")
+                # Try to extract only non-Korean parts
+                translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
+            print(f"[INTERPRETATION] Translated: {translated_text}")
             # 3. Generate speech with TTS
+            print(f"[INTERPRETATION] Generating speech for text: {translated_text}")
+            # Select appropriate voice and ensure it speaks the target language
             voice_map = {
+                "en": "alloy",     # Alloy is native English speaker
                 "es": "nova",      # Nova handles Spanish well
+                "fr": "shimmer",   # Shimmer handles French well
+                "de": "echo",      # Echo handles German well
+                "ja": "nova",      # Nova can handle Japanese
+                "zh": "nova",      # Nova can handle Chinese
+                "ko": "nova",      # Nova can handle Korean
             }
             selected_voice = voice_map.get(self.interpretation_language, "nova")
+            print(f"[INTERPRETATION] Using voice: {selected_voice} for language: {self.interpretation_language}")
+            # For some languages, we might need to add pronunciation hints
+            if self.interpretation_language == "en" and re.search(r'[가-힣]', translated_text):
+                print("[INTERPRETATION] ERROR: Korean characters in English translation!")
+                translated_text = "Translation error occurred"
             try:
                 tts_response = await self.client.audio.speech.create(
                     model="tts-1",
                     voice=selected_voice,
                     input=translated_text,
+                    response_format="pcm",  # PCM format for direct playback
                     speed=1.0
                 )
             except Exception as tts_error:
                 print(f"[INTERPRETATION] TTS Error: {tts_error}")
+                # If TTS fails, try with a different voice
+                tts_response = await self.client.audio.speech.create(
+                    model="tts-1",
+                    voice="alloy",  # Fallback to alloy
+                    input=translated_text,
+                    response_format="pcm",
+                    speed=1.0
+                )
+            # Convert response to bytes
+            audio_bytes = b""
+            async for chunk in tts_response.iter_bytes(1024):
+                audio_bytes += chunk
+            # Convert PCM to numpy array (TTS outputs at 24kHz)
+            audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
+            # Send audio in chunks
+            if len(audio_array) > 0:
+                # Split audio into chunks and send
+                chunk_size = 480  # Match our frame size
+                for i in range(0, len(audio_array), chunk_size):
+                    chunk = audio_array[i:i + chunk_size]
+                    if len(chunk) < chunk_size:
+                        # Pad the last chunk if necessary
+                        chunk = np.pad(chunk, (0, chunk_size - len(chunk)), 'constant')
+                    await self.output_queue.put((SAMPLE_RATE, chunk.reshape(1, -1)))
+            # Send transcript event
+            output_data = {
+                "event": type('Event', (), {
+                    'transcript': f"{user_text} → {translated_text}"
+                })(),
+                "language": target_lang_name,
+                "mode": "interpretation"
+            }
+            await self.output_queue.put(AdditionalOutputs(output_data))
         except Exception as e:
             print(f"[INTERPRETATION] Error: {e}")
             import traceback
             self.audio_buffer = []
             self.is_recording = False
             self.silence_frames = 0
     def get_translation_instructions(self):
         """Get instructions for translation based on target language"""
         # If in interpretation mode, don't connect to Realtime API
         if self.interpretation_mode:
+            print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
             print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
             # Just keep the handler ready to process audio
             # Don't use infinite loop here - the handler will be called by the framework
         print(f"[NORMAL MODE] Target language: {self.target_language}")
         async with self.client.beta.realtime.connect(
+            model="gpt-4o-mini-realtime-preview-2024-12-17"
         ) as conn:
             # Update session with tools
             session_update = {
 @app.post("/chat/text")
 async def chat_text(request: Request):
+    """Handle text chat messages using GPT-4o-mini"""
     try:
         body = await request.json()
         message = body.get("message", "")