Private-AI

Running

App Files Files Community

seawolf2357 commited on Jun 12

Commit

0aafe09

verified ·

1 Parent(s): 7126ca9

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -104

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import base64
-import re
 import json
 from pathlib import Path
 import os
@@ -1341,75 +1341,38 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
 class OpenAIHandler(AsyncStreamHandler):
-    # ① process_interpretation 내부 ― 번역 단계만 교체
-    async def process_interpretation(self):
-        ...
-        # --- 번역 단계 전체 삭제 후 다음 한 줄로 교체 ---
-        translated_text = await self._translate_text(user_text)
-        # --------------------------------------------------
-        ...
-        print(f"[INTERPRETATION] Translated: {translated_text}")
-        ...
-    # ② 새 헬퍼 함수 추가
-    async def _translate_text(self, source_ko: str) -> str:
-        """
-        Korean ➜ self.interpretation_language 번역 전용.
-        - 함수-콜 활용으로 ‘순수 번역문’만 보장.
-        """
-        tgt_code = self.interpretation_language
-        tgt_name = SUPPORTED_LANGUAGES.get(tgt_code, tgt_code)
-        translate_tool = [{
-            "type": "function",
-            "function": {
-                "name": "return_translation",
-                "description": f"Return ONLY the {tgt_name} translation of the given Korean sentence.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "translation": {"type": "string"}
-                    },
-                    "required": ["translation"]
-                }
-            }
-        }]
-        resp = await self.client.chat.completions.create(
-            model       = "gpt-4o-mini",
-            tools       = translate_tool,
-            tool_choice = "auto",
-            messages    = [
-                {"role": "system",
-                 "content": f"You are a Korean→{tgt_name} translator. Respond via function call ONLY."},
-                {"role": "user", "content": source_ko}
-            ],
-            temperature = 0.0,
         )
-        choice = resp.choices[0]
-        if choice.finish_reason == "tool_calls":
-            tr = json.loads(choice.message.tool_calls[0].function.arguments)["translation"].strip()
-        else:                          # 예외적으로 본문에 직접 들어온 경우
-            tr = choice.message.content.strip()
-        # 한글 잔존 시 재시도
-        if re.search(r'[가-힣]', tr):
-            retry = await self.client.chat.completions.create(
-                model="gpt-4o-mini",
-                messages=[
-                    {"role": "system",
-                     "content": f"Translate the Korean text to {tgt_name}. Output ONLY the translation."},
-                    {"role": "user", "content": source_ko}
-                ],
-                temperature=0.0,
-                max_tokens=160
-            )
-            tr = retry.choices[0].message.content.strip()
-        # 최종 필터: 한글 제거
-        return re.sub(r'[가-힣]+', '', tr).strip()
     def copy(self):
         # Get the most recent settings
@@ -1711,20 +1674,9 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
         if self.interpretation_mode:
             print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
             print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
-            print(f"[INTERPRETATION MODE] NOT connecting to Realtime API")
-            # Keep the handler alive without connecting to Realtime API
-            # The receive() and emit() methods will handle audio processing
-            while True:
-                try:
-                    await asyncio.sleep(1.0)
-                    # Just keep alive
-                    if not self.interpretation_mode:
-                        print("[INTERPRETATION MODE] Mode disabled, exiting loop")
-                        break
-                except asyncio.CancelledError:
-                    print("[INTERPRETATION MODE] Cancelled")
-                    break
             return
         # Normal mode - connect to Realtime API
@@ -1993,12 +1945,6 @@ RULES:
             _, array = frame
             array = array.squeeze()
-            # Make sure we're not connected to Realtime API
-            if self.connection:
-                print("[INTERPRETATION] WARNING: Connection exists in interpretation mode! Closing it.")
-                await self.connection.close()
-                self.connection = None
             # Simple voice activity detection
             audio_level = np.abs(array).mean()
@@ -2020,7 +1966,6 @@ RULES:
         else:
             # Normal mode - use Realtime API
             if not self.connection:
-                print("[NORMAL MODE] No connection in receive()")
                 return
             try:
                 _, array = frame
@@ -2034,23 +1979,9 @@ RULES:
     async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
         # In interpretation mode, we need to keep checking for audio
         if self.interpretation_mode:
-            # Make absolutely sure no Realtime API connection
-            if self.connection:
-                print("[INTERPRETATION] WARNING: Connection found in emit()! Closing.")
-                try:
-                    await self.connection.close()
-                except:
-                    pass
-                self.connection = None
             # Use a timeout to prevent blocking forever
             try:
                 item = await asyncio.wait_for(wait_for_item(self.output_queue), timeout=0.1)
-                # Log what we're emitting
-                if isinstance(item, tuple):
-                    print(f"[INTERPRETATION] Emitting audio chunk")
-                elif isinstance(item, AdditionalOutputs):
-                    print(f"[INTERPRETATION] Emitting transcript/event")
                 return item
             except asyncio.TimeoutError:
                 return None

+import asyncio
 import base64
 import json
 from pathlib import Path
 import os
 class OpenAIHandler(AsyncStreamHandler):
+    def __init__(self, web_search_enabled: bool = False, target_language: str = "",
+                 system_prompt: str = "", webrtc_id: str = None,
+                 interpretation_mode: bool = False, interpretation_language: str = "") -> None:
+        super().__init__(
+            expected_layout="mono",
+            output_sample_rate=SAMPLE_RATE,
+            output_frame_size=480,
+            input_sample_rate=SAMPLE_RATE,
         )
+        self.connection = None
+        self.output_queue = asyncio.Queue()
+        self.search_client = search_client
+        self.function_call_in_progress = False
+        self.current_function_args = ""
+        self.current_call_id = None
+        self.webrtc_id = webrtc_id
+        self.web_search_enabled = web_search_enabled
+        self.target_language = target_language
+        self.system_prompt = system_prompt
+        self.interpretation_mode = interpretation_mode
+        self.interpretation_language = interpretation_language
+        # For interpretation mode
+        self.audio_buffer = []
+        self.is_recording = False
+        self.silence_frames = 0
+        self.silence_threshold = 20  # Reduced for faster response (20 frames = ~0.4 seconds)
+        self.min_audio_length = 10  # Minimum frames to consider as speech
+        print(f"Handler created with web_search_enabled={web_search_enabled}, "
+              f"target_language={target_language}, webrtc_id={webrtc_id}, "
+              f"interpretation_mode={interpretation_mode}, interpretation_language={interpretation_language}")
     def copy(self):
         # Get the most recent settings
         if self.interpretation_mode:
             print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
             print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
+            # Just keep the handler ready to process audio
+            # Don't use infinite loop here - the handler will be called by the framework
+            self.client = openai.AsyncOpenAI()
             return
         # Normal mode - connect to Realtime API
             _, array = frame
             array = array.squeeze()
             # Simple voice activity detection
             audio_level = np.abs(array).mean()
         else:
             # Normal mode - use Realtime API
             if not self.connection:
                 return
             try:
                 _, array = frame
     async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
         # In interpretation mode, we need to keep checking for audio
         if self.interpretation_mode:
             # Use a timeout to prevent blocking forever
             try:
                 item = await asyncio.wait_for(wait_for_item(self.output_queue), timeout=0.1)
                 return item
             except asyncio.TimeoutError:
                 return None