seawolf2357 commited on
Commit
0aafe09
ยท
verified ยท
1 Parent(s): 7126ca9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -104
app.py CHANGED
@@ -1,5 +1,5 @@
 
1
  import base64
2
- import re
3
  import json
4
  from pathlib import Path
5
  import os
@@ -1341,75 +1341,38 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
1341
 
1342
 
1343
  class OpenAIHandler(AsyncStreamHandler):
1344
- # โ‘  process_interpretation ๋‚ด๋ถ€ โ€• ๋ฒˆ์—ญ ๋‹จ๊ณ„๋งŒ ๊ต์ฒด
1345
- async def process_interpretation(self):
1346
- ...
1347
- # --- ๋ฒˆ์—ญ ๋‹จ๊ณ„ ์ „์ฒด ์‚ญ์ œ ํ›„ ๋‹ค์Œ ํ•œ ์ค„๋กœ ๊ต์ฒด ---
1348
- translated_text = await self._translate_text(user_text)
1349
- # --------------------------------------------------
1350
- ...
1351
- print(f"[INTERPRETATION] Translated: {translated_text}")
1352
- ...
1353
-
1354
- # โ‘ก ์ƒˆ ํ—ฌํผ ํ•จ์ˆ˜ ์ถ”๊ฐ€
1355
- async def _translate_text(self, source_ko: str) -> str:
1356
- """
1357
- Korean โžœ self.interpretation_language ๋ฒˆ์—ญ ์ „์šฉ.
1358
- - ํ•จ์ˆ˜-์ฝœ ํ™œ์šฉ์œผ๋กœ โ€˜์ˆœ์ˆ˜ ๋ฒˆ์—ญ๋ฌธโ€™๋งŒ ๋ณด์žฅ.
1359
- """
1360
- tgt_code = self.interpretation_language
1361
- tgt_name = SUPPORTED_LANGUAGES.get(tgt_code, tgt_code)
1362
-
1363
- translate_tool = [{
1364
- "type": "function",
1365
- "function": {
1366
- "name": "return_translation",
1367
- "description": f"Return ONLY the {tgt_name} translation of the given Korean sentence.",
1368
- "parameters": {
1369
- "type": "object",
1370
- "properties": {
1371
- "translation": {"type": "string"}
1372
- },
1373
- "required": ["translation"]
1374
- }
1375
- }
1376
- }]
1377
-
1378
- resp = await self.client.chat.completions.create(
1379
- model = "gpt-4o-mini",
1380
- tools = translate_tool,
1381
- tool_choice = "auto",
1382
- messages = [
1383
- {"role": "system",
1384
- "content": f"You are a Koreanโ†’{tgt_name} translator. Respond via function call ONLY."},
1385
- {"role": "user", "content": source_ko}
1386
- ],
1387
- temperature = 0.0,
1388
  )
1389
-
1390
- choice = resp.choices[0]
1391
- if choice.finish_reason == "tool_calls":
1392
- tr = json.loads(choice.message.tool_calls[0].function.arguments)["translation"].strip()
1393
- else: # ์˜ˆ์™ธ์ ์œผ๋กœ ๋ณธ๋ฌธ์— ์ง์ ‘ ๋“ค์–ด์˜จ ๊ฒฝ์šฐ
1394
- tr = choice.message.content.strip()
1395
-
1396
- # ํ•œ๊ธ€ ์ž”์กด ์‹œ ์žฌ์‹œ๋„
1397
- if re.search(r'[๊ฐ€-ํžฃ]', tr):
1398
- retry = await self.client.chat.completions.create(
1399
- model="gpt-4o-mini",
1400
- messages=[
1401
- {"role": "system",
1402
- "content": f"Translate the Korean text to {tgt_name}. Output ONLY the translation."},
1403
- {"role": "user", "content": source_ko}
1404
- ],
1405
- temperature=0.0,
1406
- max_tokens=160
1407
- )
1408
- tr = retry.choices[0].message.content.strip()
1409
-
1410
- # ์ตœ์ข… ํ•„ํ„ฐ: ํ•œ๊ธ€ ์ œ๊ฑฐ
1411
- return re.sub(r'[๊ฐ€-ํžฃ]+', '', tr).strip()
1412
-
1413
 
1414
  def copy(self):
1415
  # Get the most recent settings
@@ -1711,20 +1674,9 @@ Now translate the Korean text to {target_lang_name}. Output ONLY the translation
1711
  if self.interpretation_mode:
1712
  print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
1713
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1714
- print(f"[INTERPRETATION MODE] NOT connecting to Realtime API")
1715
-
1716
- # Keep the handler alive without connecting to Realtime API
1717
- # The receive() and emit() methods will handle audio processing
1718
- while True:
1719
- try:
1720
- await asyncio.sleep(1.0)
1721
- # Just keep alive
1722
- if not self.interpretation_mode:
1723
- print("[INTERPRETATION MODE] Mode disabled, exiting loop")
1724
- break
1725
- except asyncio.CancelledError:
1726
- print("[INTERPRETATION MODE] Cancelled")
1727
- break
1728
  return
1729
 
1730
  # Normal mode - connect to Realtime API
@@ -1993,12 +1945,6 @@ RULES:
1993
  _, array = frame
1994
  array = array.squeeze()
1995
 
1996
- # Make sure we're not connected to Realtime API
1997
- if self.connection:
1998
- print("[INTERPRETATION] WARNING: Connection exists in interpretation mode! Closing it.")
1999
- await self.connection.close()
2000
- self.connection = None
2001
-
2002
  # Simple voice activity detection
2003
  audio_level = np.abs(array).mean()
2004
 
@@ -2020,7 +1966,6 @@ RULES:
2020
  else:
2021
  # Normal mode - use Realtime API
2022
  if not self.connection:
2023
- print("[NORMAL MODE] No connection in receive()")
2024
  return
2025
  try:
2026
  _, array = frame
@@ -2034,23 +1979,9 @@ RULES:
2034
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
2035
  # In interpretation mode, we need to keep checking for audio
2036
  if self.interpretation_mode:
2037
- # Make absolutely sure no Realtime API connection
2038
- if self.connection:
2039
- print("[INTERPRETATION] WARNING: Connection found in emit()! Closing.")
2040
- try:
2041
- await self.connection.close()
2042
- except:
2043
- pass
2044
- self.connection = None
2045
-
2046
  # Use a timeout to prevent blocking forever
2047
  try:
2048
  item = await asyncio.wait_for(wait_for_item(self.output_queue), timeout=0.1)
2049
- # Log what we're emitting
2050
- if isinstance(item, tuple):
2051
- print(f"[INTERPRETATION] Emitting audio chunk")
2052
- elif isinstance(item, AdditionalOutputs):
2053
- print(f"[INTERPRETATION] Emitting transcript/event")
2054
  return item
2055
  except asyncio.TimeoutError:
2056
  return None
 
1
+ import asyncio
2
  import base64
 
3
  import json
4
  from pathlib import Path
5
  import os
 
1341
 
1342
 
1343
  class OpenAIHandler(AsyncStreamHandler):
1344
+ def __init__(self, web_search_enabled: bool = False, target_language: str = "",
1345
+ system_prompt: str = "", webrtc_id: str = None,
1346
+ interpretation_mode: bool = False, interpretation_language: str = "") -> None:
1347
+ super().__init__(
1348
+ expected_layout="mono",
1349
+ output_sample_rate=SAMPLE_RATE,
1350
+ output_frame_size=480,
1351
+ input_sample_rate=SAMPLE_RATE,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1352
  )
1353
+ self.connection = None
1354
+ self.output_queue = asyncio.Queue()
1355
+ self.search_client = search_client
1356
+ self.function_call_in_progress = False
1357
+ self.current_function_args = ""
1358
+ self.current_call_id = None
1359
+ self.webrtc_id = webrtc_id
1360
+ self.web_search_enabled = web_search_enabled
1361
+ self.target_language = target_language
1362
+ self.system_prompt = system_prompt
1363
+ self.interpretation_mode = interpretation_mode
1364
+ self.interpretation_language = interpretation_language
1365
+
1366
+ # For interpretation mode
1367
+ self.audio_buffer = []
1368
+ self.is_recording = False
1369
+ self.silence_frames = 0
1370
+ self.silence_threshold = 20 # Reduced for faster response (20 frames = ~0.4 seconds)
1371
+ self.min_audio_length = 10 # Minimum frames to consider as speech
1372
+
1373
+ print(f"Handler created with web_search_enabled={web_search_enabled}, "
1374
+ f"target_language={target_language}, webrtc_id={webrtc_id}, "
1375
+ f"interpretation_mode={interpretation_mode}, interpretation_language={interpretation_language}")
 
1376
 
1377
  def copy(self):
1378
  # Get the most recent settings
 
1674
  if self.interpretation_mode:
1675
  print(f"[INTERPRETATION MODE] Active - using Whisper + GPT-4o-mini + TTS")
1676
  print(f"[INTERPRETATION MODE] Target language: {self.interpretation_language}")
1677
+ # Just keep the handler ready to process audio
1678
+ # Don't use infinite loop here - the handler will be called by the framework
1679
+ self.client = openai.AsyncOpenAI()
 
 
 
 
 
 
 
 
 
 
 
1680
  return
1681
 
1682
  # Normal mode - connect to Realtime API
 
1945
  _, array = frame
1946
  array = array.squeeze()
1947
 
 
 
 
 
 
 
1948
  # Simple voice activity detection
1949
  audio_level = np.abs(array).mean()
1950
 
 
1966
  else:
1967
  # Normal mode - use Realtime API
1968
  if not self.connection:
 
1969
  return
1970
  try:
1971
  _, array = frame
 
1979
  async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
1980
  # In interpretation mode, we need to keep checking for audio
1981
  if self.interpretation_mode:
 
 
 
 
 
 
 
 
 
1982
  # Use a timeout to prevent blocking forever
1983
  try:
1984
  item = await asyncio.wait_for(wait_for_item(self.output_queue), timeout=0.1)
 
 
 
 
 
1985
  return item
1986
  except asyncio.TimeoutError:
1987
  return None