diff --git "a/app.py" "b/app.py"
new file mode 100644--- /dev/null
+++ "b/app.py"
@@ -0,0 +1,2689 @@
+import asyncio
+import base64
+import json
+from pathlib import Path
+import os
+import numpy as np
+import openai
+from dotenv import load_dotenv
+from fastapi import FastAPI, Request, UploadFile, File, Form
+from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
+from fastrtc import (
+    AdditionalOutputs,
+    AsyncStreamHandler,
+    Stream,
+    get_twilio_turn_credentials,
+    wait_for_item,
+)
+from gradio.utils import get_space
+from openai.types.beta.realtime import ResponseAudioTranscriptDoneEvent
+import httpx
+from typing import Optional, List, Dict
+import gradio as gr
+import io
+from scipy import signal
+import wave
+import torch
+from transformers import pipeline
+import tempfile
+import subprocess
+import pdfplumber
+import scipy.signal as sps
+from datetime import datetime
+from zoneinfo import ZoneInfo
+import concurrent.futures
+
+load_dotenv()
+
+SAMPLE_RATE = 24000
+WHISPER_SAMPLE_RATE = 16000
+SEOUL_TZ = ZoneInfo("Asia/Seoul")
+
+# Whisper model settings
+WHISPER_MODEL_NAME = "openai/whisper-large-v3-turbo"
+WHISPER_BATCH_SIZE = 8
+
+# Real-time segmentation parameters
+MIN_SEG_SEC = 10
+MAX_SEG_SEC = 15
+SILENCE_SEC = 0.6
+SILENCE_THRESH = 1e-4
+
+# CPU-side pool for Whisper tasks
+whisper_executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
+whisper_futures_queue: list[concurrent.futures.Future] = []
+
+# Supported languages for OpenAI Realtime API
+SUPPORTED_LANGUAGES = {
+    "ko": "한국어 (Korean)",
+    "en": "English",
+    "es": "Español (Spanish)",
+    "fr": "Français (French)",
+    "de": "Deutsch (German)",
+    "it": "Italiano (Italian)",
+    "pt": "Português (Portuguese)",
+    "ru": "Русский (Russian)",
+    "ja": "日本語 (Japanese)",
+    "zh": "中文 (Chinese)",
+    "ar": "العربية (Arabic)",
+    "hi": "हिन्दी (Hindi)",
+    "nl": "Nederlands (Dutch)",
+    "pl": "Polski (Polish)",
+    "tr": "Türkçe (Turkish)",
+    "vi": "Tiếng Việt (Vietnamese)",
+    "th": "ไทย (Thai)",
+    "id": "Bahasa Indonesia",
+    "sv": "Svenska (Swedish)",
+    "da": "Dansk (Danish)",
+    "no": "Norsk (Norwegian)",
+    "fi": "Suomi (Finnish)",
+    "he": "עברית (Hebrew)",
+    "uk": "Українська (Ukrainian)",
+    "cs": "Čeština (Czech)",
+    "el": "Ελληνικά (Greek)",
+    "ro": "Română (Romanian)",
+    "hu": "Magyar (Hungarian)",
+    "ms": "Bahasa Melayu (Malay)"
+}
+
+# HTML content embedded as a string (extended with new tabs)
+HTML_CONTENT = """<!DOCTYPE html>
+<html lang="ko">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Mouth of 'MOUSE' - Extended</title>
+    <style>
+        :root {
+            --primary-color: #6f42c1;
+            --secondary-color: #563d7c;
+            --dark-bg: #121212;
+            --card-bg: #1e1e1e;
+            --text-color: #f8f9fa;
+            --border-color: #333;
+            --hover-color: #8a5cf6;
+        }
+        body {
+            font-family: "SF Pro Display", -apple-system, BlinkMacSystemFont, sans-serif;
+            background-color: var(--dark-bg);
+            color: var(--text-color);
+            margin: 0;
+            padding: 0;
+            height: 100vh;
+            display: flex;
+            flex-direction: column;
+            overflow: hidden;
+        }
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+            padding: 20px;
+            flex-grow: 1;
+            display: flex;
+            flex-direction: column;
+            width: 100%;
+            height: 100vh;
+            box-sizing: border-box;
+            overflow: hidden;
+        }
+        .header {
+            text-align: center;
+            padding: 15px 0;
+            border-bottom: 1px solid var(--border-color);
+            margin-bottom: 20px;
+            flex-shrink: 0;
+            background-color: var(--card-bg);
+        }
+        .tabs-container {
+            display: flex;
+            gap: 10px;
+            margin-bottom: 20px;
+            border-bottom: 2px solid var(--border-color);
+            padding-bottom: 10px;
+            overflow-x: auto;
+            scrollbar-width: thin;
+            scrollbar-color: var(--primary-color) var(--card-bg);
+        }
+        .tab-button {
+            padding: 10px 20px;
+            background-color: var(--card-bg);
+            color: var(--text-color);
+            border: 1px solid var(--border-color);
+            border-radius: 8px 8px 0 0;
+            cursor: pointer;
+            transition: all 0.3s;
+            white-space: nowrap;
+            font-size: 14px;
+        }
+        .tab-button:hover {
+            background-color: var(--secondary-color);
+        }
+        .tab-button.active {
+            background-color: var(--primary-color);
+            border-bottom: 2px solid var(--primary-color);
+        }
+        .tab-content {
+            display: none;
+            flex-grow: 1;
+            overflow: hidden;
+            flex-direction: column;
+        }
+        .tab-content.active {
+            display: flex;
+        }
+        .main-content {
+            display: flex;
+            gap: 20px;
+            flex-grow: 1;
+            min-height: 0;
+            overflow: hidden;
+        }
+        .sidebar {
+            width: 350px;
+            flex-shrink: 0;
+            display: flex;
+            flex-direction: column;
+            gap: 20px;
+            overflow-y: auto;
+            max-height: calc(100vh - 120px);
+        }
+        .chat-section {
+            flex-grow: 1;
+            display: flex;
+            flex-direction: column;
+            min-width: 0;
+        }
+        .logo {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 10px;
+        }
+        .logo h1 {
+            margin: 0;
+            background: linear-gradient(135deg, var(--primary-color), #a78bfa);
+            -webkit-background-clip: text;
+            background-clip: text;
+            color: transparent;
+            font-size: 32px;
+            letter-spacing: 1px;
+        }
+        /* Settings section */
+        .settings-section {
+            background-color: var(--card-bg);
+            border-radius: 12px;
+            padding: 20px;
+            border: 1px solid var(--border-color);
+            overflow-y: auto;
+            flex-grow: 1;
+        }
+        .settings-grid {
+            display: flex;
+            flex-direction: column;
+            gap: 15px;
+            margin-bottom: 15px;
+        }
+        .setting-item {
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            gap: 10px;
+        }
+        .setting-label {
+            font-size: 14px;
+            color: #aaa;
+            min-width: 60px;
+        }
+        /* Toggle switch */
+        .toggle-switch {
+            position: relative;
+            width: 50px;
+            height: 26px;
+            background-color: #ccc;
+            border-radius: 13px;
+            cursor: pointer;
+            transition: background-color 0.3s;
+        }
+        .toggle-switch.active {
+            background-color: var(--primary-color);
+        }
+        .toggle-slider {
+            position: absolute;
+            top: 3px;
+            left: 3px;
+            width: 20px;
+            height: 20px;
+            background-color: white;
+            border-radius: 50%;
+            transition: transform 0.3s;
+        }
+        .toggle-switch.active .toggle-slider {
+            transform: translateX(24px);
+        }
+        /* Select dropdown */
+        select {
+            background-color: var(--card-bg);
+            color: var(--text-color);
+            border: 1px solid var(--border-color);
+            padding: 8px 12px;
+            border-radius: 6px;
+            font-size: 14px;
+            cursor: pointer;
+            min-width: 120px;
+            max-width: 200px;
+        }
+        select:focus {
+            outline: none;
+            border-color: var(--primary-color);
+        }
+        /* Text inputs */
+        .text-input-section {
+            margin-top: 15px;
+        }
+        input[type="text"], input[type="file"], textarea {
+            width: 100%;
+            background-color: var(--dark-bg);
+            color: var(--text-color);
+            border: 1px solid var(--border-color);
+            padding: 10px;
+            border-radius: 6px;
+            font-size: 14px;
+            box-sizing: border-box;
+            margin-top: 5px;
+        }
+        input[type="text"]:focus, textarea:focus {
+            outline: none;
+            border-color: var(--primary-color);
+        }
+        textarea {
+            resize: vertical;
+            min-height: 80px;
+        }
+        .chat-container {
+            border-radius: 12px;
+            background-color: var(--card-bg);
+            box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
+            padding: 20px;
+            flex-grow: 1;
+            display: flex;
+            flex-direction: column;
+            border: 1px solid var(--border-color);
+            overflow: hidden;
+            min-height: 0;
+            height: 100%;
+        }
+        .chat-messages {
+            flex-grow: 1;
+            overflow-y: auto;
+            padding: 15px;
+            scrollbar-width: thin;
+            scrollbar-color: var(--primary-color) var(--card-bg);
+            min-height: 0;
+            max-height: calc(100vh - 250px);
+        }
+        .chat-messages::-webkit-scrollbar {
+            width: 6px;
+        }
+        .chat-messages::-webkit-scrollbar-thumb {
+            background-color: var(--primary-color);
+            border-radius: 6px;
+        }
+        .message {
+            margin-bottom: 15px;
+            padding: 12px 16px;
+            border-radius: 8px;
+            font-size: 15px;
+            line-height: 1.5;
+            position: relative;
+            max-width: 85%;
+            animation: fade-in 0.3s ease-out;
+            word-wrap: break-word;
+        }
+        @keyframes fade-in {
+            from {
+                opacity: 0;
+                transform: translateY(10px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+        .message.user {
+            background: linear-gradient(135deg, #2c3e50, #34495e);
+            margin-left: auto;
+            border-bottom-right-radius: 2px;
+        }
+        .message.assistant {
+            background: linear-gradient(135deg, var(--secondary-color), var(--primary-color));
+            margin-right: auto;
+            border-bottom-left-radius: 2px;
+        }
+        .message.search-result {
+            background: linear-gradient(135deg, #1a5a3e, #2e7d32);
+            font-size: 14px;
+            padding: 10px;
+            margin-bottom: 10px;
+        }
+        .language-info {
+            font-size: 12px;
+            color: #888;
+            margin-left: 5px;
+        }
+        .controls {
+            text-align: center;
+            margin-top: auto;
+            display: flex;
+            justify-content: center;
+            gap: 10px;
+            flex-shrink: 0;
+            padding-top: 20px;
+        }
+        /* Whisper Tab Styles */
+        .whisper-container {
+            padding: 20px;
+            background-color: var(--card-bg);
+            border-radius: 12px;
+            border: 1px solid var(--border-color);
+            margin: 20px auto;
+            max-width: 800px;
+            width: 100%;
+        }
+        .whisper-input-section {
+            margin-bottom: 20px;
+        }
+        .whisper-output-section {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 20px;
+            margin-top: 20px;
+        }
+        .whisper-output {
+            background-color: var(--dark-bg);
+            padding: 15px;
+            border-radius: 8px;
+            border: 1px solid var(--border-color);
+            max-height: 300px;
+            overflow-y: auto;
+        }
+        .whisper-output h4 {
+            margin-top: 0;
+            color: var(--primary-color);
+        }
+        .file-upload-area {
+            border: 2px dashed var(--border-color);
+            border-radius: 8px;
+            padding: 30px;
+            text-align: center;
+            cursor: pointer;
+            transition: all 0.3s;
+        }
+        .file-upload-area:hover {
+            border-color: var(--primary-color);
+            background-color: rgba(111, 66, 193, 0.1);
+        }
+        .file-upload-area.drag-over {
+            border-color: var(--primary-color);
+            background-color: rgba(111, 66, 193, 0.2);
+        }
+        .realtime-output {
+            background-color: var(--dark-bg);
+            padding: 20px;
+            border-radius: 8px;
+            margin-top: 20px;
+            min-height: 200px;
+            max-height: 400px;
+            overflow-y: auto;
+        }
+        .recording-indicator {
+            display: inline-flex;
+            align-items: center;
+            gap: 10px;
+            padding: 10px 20px;
+            background-color: #f44336;
+            color: white;
+            border-radius: 20px;
+            animation: pulse 1.5s infinite;
+        }
+        .recording-dot {
+            width: 10px;
+            height: 10px;
+            background-color: white;
+            border-radius: 50%;
+            animation: blink 1s infinite;
+        }
+        @keyframes blink {
+            0%, 50% { opacity: 1; }
+            51%, 100% { opacity: 0; }
+        }
+        /* Responsive design */
+        @media (max-width: 1024px) {
+            .sidebar {
+                width: 300px;
+            }
+            .whisper-output-section {
+                grid-template-columns: 1fr;
+            }
+        }
+        @media (max-width: 768px) {
+            .main-content {
+                flex-direction: column;
+            }
+            .sidebar {
+                width: 100%;
+                margin-bottom: 20px;
+            }
+            .chat-section {
+                height: 400px;
+            }
+            .tabs-container {
+                flex-wrap: wrap;
+            }
+        }
+        button {
+            background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
+            color: white;
+            border: none;
+            padding: 14px 28px;
+            font-family: inherit;
+            font-size: 16px;
+            cursor: pointer;
+            transition: all 0.3s;
+            text-transform: uppercase;
+            letter-spacing: 1px;
+            border-radius: 50px;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 10px;
+            box-shadow: 0 4px 10px rgba(111, 66, 193, 0.3);
+        }
+        button:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 6px 15px rgba(111, 66, 193, 0.5);
+            background: linear-gradient(135deg, var(--hover-color), var(--primary-color));
+        }
+        button:active {
+            transform: translateY(1px);
+        }
+        #send-button {
+            background: linear-gradient(135deg, #2ecc71, #27ae60);
+            padding: 10px 20px;
+            font-size: 14px;
+            flex-shrink: 0;
+        }
+        #send-button:hover {
+            background: linear-gradient(135deg, #27ae60, #229954);
+        }
+        #audio-output {
+            display: none;
+        }
+        .icon-with-spinner {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 12px;
+            min-width: 180px;
+        }
+        .spinner {
+            width: 20px;
+            height: 20px;
+            border: 2px solid #ffffff;
+            border-top-color: transparent;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            flex-shrink: 0;
+        }
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+        .audio-visualizer {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 5px;
+            min-width: 80px;
+            height: 25px;
+        }
+        .visualizer-bar {
+            width: 4px;
+            height: 100%;
+            background-color: rgba(255, 255, 255, 0.7);
+            border-radius: 2px;
+            transform-origin: bottom;
+            transform: scaleY(0.1);
+            transition: transform 0.1s ease;
+        }
+        .toast {
+            position: fixed;
+            top: 20px;
+            left: 50%;
+            transform: translateX(-50%);
+            padding: 16px 24px;
+            border-radius: 8px;
+            font-size: 14px;
+            z-index: 1000;
+            display: none;
+            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
+        }
+        .toast.error {
+            background-color: #f44336;
+            color: white;
+        }
+        .toast.warning {
+            background-color: #ff9800;
+            color: white;
+        }
+        .toast.success {
+            background-color: #4caf50;
+            color: white;
+        }
+        .status-indicator {
+            display: inline-flex;
+            align-items: center;
+            margin-top: 10px;
+            font-size: 14px;
+            color: #aaa;
+        }
+        .status-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+            margin-right: 8px;
+        }
+        .status-dot.connected {
+            background-color: #4caf50;
+        }
+        .status-dot.disconnected {
+            background-color: #f44336;
+        }
+        .status-dot.connecting {
+            background-color: #ff9800;
+            animation: pulse 1.5s infinite;
+        }
+        @keyframes pulse {
+            0% {
+                opacity: 0.6;
+            }
+            50% {
+                opacity: 1;
+            }
+            100% {
+                opacity: 0.6;
+            }
+        }
+        .mouse-logo {
+            position: relative;
+            width: 40px;
+            height: 40px;
+        }
+        .mouse-ears {
+            position: absolute;
+            width: 15px;
+            height: 15px;
+            background-color: var(--primary-color);
+            border-radius: 50%;
+        }
+        .mouse-ear-left {
+            top: 0;
+            left: 5px;
+        }
+        .mouse-ear-right {
+            top: 0;
+            right: 5px;
+        }
+        .mouse-face {
+            position: absolute;
+            top: 10px;
+            left: 5px;
+            width: 30px;
+            height: 30px;
+            background-color: var(--secondary-color);
+            border-radius: 50%;
+        }
+    </style>
+</head>
+
+<body>
+    <div id="error-toast" class="toast"></div>
+    <div class="container">
+        <div class="header">
+            <div class="logo">
+                <div class="mouse-logo">
+                    <div class="mouse-ears mouse-ear-left"></div>
+                    <div class="mouse-ears mouse-ear-right"></div>
+                    <div class="mouse-face"></div>
+                </div>
+                <h1>MOUSE Extended</h1>
+            </div>
+            <div class="status-indicator">
+                <div id="status-dot" class="status-dot disconnected"></div>
+                <span id="status-text">연결 대기 중</span>
+            </div>
+        </div>
+        
+        <div class="tabs-container">
+            <button class="tab-button active" onclick="switchTab('voice-chat')">음성 채팅</button>
+            <button class="tab-button" onclick="switchTab('mic-whisper')">마이크 전사</button>
+            <button class="tab-button" onclick="switchTab('audio-whisper')">오디오 파일</button>
+            <button class="tab-button" onclick="switchTab('video-whisper')">비디오 파일</button>
+            <button class="tab-button" onclick="switchTab('pdf-whisper')">PDF 번역</button>
+            <button class="tab-button" onclick="switchTab('realtime-whisper')">실시간 통역</button>
+        </div>
+        
+        <!-- Voice Chat Tab (Original) -->
+        <div id="voice-chat" class="tab-content active">
+            <div class="main-content">
+                <div class="sidebar">
+                    <div class="settings-section">
+                        <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정(텍스트 채팅에만 적용)</h3>
+                        <div class="settings-grid">
+                            <div class="setting-item">
+                                <span class="setting-label">웹 검색</span>
+                                <div id="search-toggle" class="toggle-switch">
+                                    <div class="toggle-slider"></div>
+                                </div>
+                            </div>
+                            <div class="setting-item">
+                                <span class="setting-label">다국어 번역 채팅</span>
+                                <select id="language-select">
+                                    <option value="">비활성화</option>
+                                    <option value="ko">한국어 (Korean)</option>
+                                    <option value="en">English</option>
+                                    <option value="es">Español (Spanish)</option>
+                                    <option value="fr">Français (French)</option>
+                                    <option value="de">Deutsch (German)</option>
+                                    <option value="it">Italiano (Italian)</option>
+                                    <option value="pt">Português (Portuguese)</option>
+                                    <option value="ru">Русский (Russian)</option>
+                                    <option value="ja">日本語 (Japanese)</option>
+                                    <option value="zh">中文 (Chinese)</option>
+                                    <option value="ar">العربية (Arabic)</option>
+                                    <option value="hi">हिन्दी (Hindi)</option>
+                                    <option value="nl">Nederlands (Dutch)</option>
+                                    <option value="pl">Polski (Polish)</option>
+                                    <option value="tr">Türkçe (Turkish)</option>
+                                    <option value="vi">Tiếng Việt (Vietnamese)</option>
+                                    <option value="th">ไทย (Thai)</option>
+                                    <option value="id">Bahasa Indonesia</option>
+                                    <option value="sv">Svenska (Swedish)</option>
+                                    <option value="da">Dansk (Danish)</option>
+                                    <option value="no">Norsk (Norwegian)</option>
+                                    <option value="fi">Suomi (Finnish)</option>
+                                    <option value="he">עברית (Hebrew)</option>
+                                    <option value="uk">Українська (Ukrainian)</option>
+                                    <option value="cs">Čeština (Czech)</option>
+                                    <option value="el">Ελληνικά (Greek)</option>
+                                    <option value="ro">Română (Romanian)</option>
+                                    <option value="hu">Magyar (Hungarian)</option>
+                                    <option value="ms">Bahasa Melayu (Malay)</option>
+                                </select>
+                            </div>
+                        </div>
+                        <div class="text-input-section">
+                            <label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
+                            <textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
+                        </div>
+                    </div>
+                    
+                    <div class="controls">
+                        <button id="start-button">대화 시작</button>
+                    </div>
+                </div>
+                
+                <div class="chat-section">
+                    <div class="chat-container">
+                        <h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화</h3>
+                        <div class="chat-messages" id="chat-messages"></div>
+                        <div class="text-input-section" style="margin-top: 10px;">
+                            <div style="display: flex; gap: 10px;">
+                                <input type="text" id="text-input" placeholder="텍스트 메시지를 입력하세요..." style="flex-grow: 1;" />
+                                <button id="send-button" style="display: none;">전송</button>
+                            </div>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+        
+        <!-- Microphone Whisper Tab -->
+        <div id="mic-whisper" class="tab-content">
+            <div class="whisper-container">
+                <h3>마이크 녹음 → 전사 및 4개 언어 번역</h3>
+                <div class="whisper-input-section">
+                    <button id="mic-record-btn" onclick="toggleMicRecording()">녹음 시작</button>
+                    <div id="mic-status" style="margin-top: 10px;"></div>
+                </div>
+                <div class="whisper-output-section">
+                    <div class="whisper-output">
+                        <h4>원문</h4>
+                        <div id="mic-original"></div>
+                    </div>
+                    <div class="whisper-output">
+                        <h4>번역 (EN/ZH/TH/RU)</h4>
+                        <div id="mic-translation"></div>
+                    </div>
+                </div>
+            </div>
+        </div>
+        
+        <!-- Audio File Whisper Tab -->
+        <div id="audio-whisper" class="tab-content">
+            <div class="whisper-container">
+                <h3>오디오 파일 → 전사 및 4개 언어 번역</h3>
+                <div class="whisper-input-section">
+                    <div class="file-upload-area" id="audio-upload-area">
+                        <p>오디오 파일을 드래그하거나 클릭하여 업로드</p>
+                        <input type="file" id="audio-file-input" accept="audio/*" style="display: none;">
+                    </div>
+                </div>
+                <div class="whisper-output-section">
+                    <div class="whisper-output">
+                        <h4>원문</h4>
+                        <div id="audio-original"></div>
+                    </div>
+                    <div class="whisper-output">
+                        <h4>번역 (EN/ZH/TH/RU)</h4>
+                        <div id="audio-translation"></div>
+                    </div>
+                </div>
+            </div>
+        </div>
+        
+        <!-- Video File Whisper Tab -->
+        <div id="video-whisper" class="tab-content">
+            <div class="whisper-container">
+                <h3>비디오 파일 → 오디오 추출 → 전사 및 4개 언어 번역</h3>
+                <div class="whisper-input-section">
+                    <div class="file-upload-area" id="video-upload-area">
+                        <p>비디오 파일을 드래그하거나 클릭하여 업로드</p>
+                        <input type="file" id="video-file-input" accept="video/*" style="display: none;">
+                    </div>
+                </div>
+                <div class="whisper-output-section">
+                    <div class="whisper-output">
+                        <h4>원문</h4>
+                        <div id="video-original"></div>
+                    </div>
+                    <div class="whisper-output">
+                        <h4>번역 (EN/ZH/TH/RU)</h4>
+                        <div id="video-translation"></div>
+                    </div>
+                </div>
+            </div>
+        </div>
+        
+        <!-- PDF Whisper Tab -->
+        <div id="pdf-whisper" class="tab-content">
+            <div class="whisper-container">
+                <h3>PDF 파일 → 텍스트 추출 → 4개 언어 번역</h3>
+                <div class="whisper-input-section">
+                    <div class="file-upload-area" id="pdf-upload-area">
+                        <p>PDF 파일을 드래그하거나 클릭하여 업로드</p>
+                        <input type="file" id="pdf-file-input" accept=".pdf" style="display: none;">
+                    </div>
+                    <div class="setting-item" style="margin-top: 15px;">
+                        <span class="setting-label">최대 페이지:</span>
+                        <input type="number" id="pdf-max-pages" value="10" min="1" max="50" style="width: 80px;">
+                    </div>
+                </div>
+                <div class="whisper-output-section">
+                    <div class="whisper-output">
+                        <h4>추출된 텍스트</h4>
+                        <div id="pdf-original"></div>
+                    </div>
+                    <div class="whisper-output">
+                        <h4>번역 (EN/ZH/TH/RU)</h4>
+                        <div id="pdf-translation"></div>
+                    </div>
+                </div>
+            </div>
+        </div>
+        
+        <!-- Realtime Whisper Tab -->
+        <div id="realtime-whisper" class="tab-content">
+            <div class="whisper-container">
+                <h3>실시간 통역 (Korean → EN/ZH/TH/RU)</h3>
+                <p>10-15초 문장 단위로 자동 전환 — 최신 내용이 위에 표시됩니다.</p>
+                <div class="whisper-input-section">
+                    <button id="realtime-start-btn" onclick="toggleRealtimeRecording()">실시간 통역 시작</button>
+                    <div id="realtime-status" style="margin-top: 10px;"></div>
+                </div>
+                <div class="realtime-output" id="realtime-output"></div>
+            </div>
+        </div>
+    </div>
+    <audio id="audio-output"></audio>
+
+    <script>
+        // Tab switching functionality - 맨 앞에 배치
+        function switchTab(tabName) {
+            console.log('Switching to tab:', tabName);
+            
+            // Hide all tabs
+            document.querySelectorAll('.tab-content').forEach(tab => {
+                tab.style.display = 'none';
+                tab.classList.remove('active');
+            });
+            
+            console.log('All initialized!');
+            
+            // Remove active from all buttons
+            document.querySelectorAll('.tab-button').forEach(btn => {
+                btn.classList.remove('active');
+            });
+            
+            // Show selected tab
+            const selectedTab = document.getElementById(tabName);
+            if (selectedTab) {
+                selectedTab.style.display = 'flex';
+                selectedTab.classList.add('active');
+            }
+            
+            // Mark button as active
+            event.target.classList.add('active');
+        }
+        
+        // Global variables
+        let peerConnection = null;
+        let webrtc_id = null;
+        let webSearchEnabled = false;
+        let selectedLanguage = "";
+        let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
+        let audioLevel = 0;
+        let animationFrame = null;
+        let audioContext = null;
+        let analyser = null;
+        let audioSource = null;
+        let dataChannel = null;
+        let isVoiceActive = false;
+        
+        // Whisper variables
+        let micRecorder = null;
+        let isRecording = false;
+        let realtimeRecorder = null;
+        let isRealtimeRecording = false;
+        let realtimeStream = null;
+        
+        async function sendTextMessage() {
+            const textInput = document.getElementById('text-input');
+            const chatMessages = document.getElementById('chat-messages');
+            const message = textInput.value.trim();
+            if (!message) return;
+            
+            // Add user message to chat
+            addMessage('user', message);
+            textInput.value = '';
+            
+            // Show sending indicator
+            const typingIndicator = document.createElement('div');
+            typingIndicator.classList.add('message', 'assistant');
+            typingIndicator.textContent = '입력 중...';
+            typingIndicator.id = 'typing-indicator';
+            chatMessages.appendChild(typingIndicator);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+            
+            try {
+                // Send to text chat endpoint
+                const response = await fetch('/chat/text', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        message: message,
+                        web_search_enabled: webSearchEnabled,
+                        target_language: selectedLanguage,
+                        system_prompt: systemPrompt
+                    })
+                });
+                
+                const data = await response.json();
+                
+                // Remove typing indicator
+                const indicator = document.getElementById('typing-indicator');
+                if (indicator) indicator.remove();
+                
+                if (data.error) {
+                    showError(data.error);
+                } else {
+                    // Add assistant response
+                    let content = data.response;
+                    if (selectedLanguage && data.language) {
+                        content += ` <span class="language-info">[${data.language}]</span>`;
+                    }
+                    addMessage('assistant', content);
+                }
+            } catch (error) {
+                console.error('Error sending text message:', error);
+                const indicator = document.getElementById('typing-indicator');
+                if (indicator) indicator.remove();
+                showError('메시지 전송 중 오류가 발생했습니다.');
+            }
+        }
+        
+        function updateStatus(state) {
+            const statusDot = document.getElementById('status-dot');
+            const statusText = document.getElementById('status-text');
+            const sendButton = document.getElementById('send-button');
+            
+            statusDot.className = 'status-dot ' + state;
+            if (state === 'connected') {
+                statusText.textContent = '연결됨';
+                if (sendButton) sendButton.style.display = 'block';
+                isVoiceActive = true;
+            } else if (state === 'connecting') {
+                statusText.textContent = '연결 중...';
+                if (sendButton) sendButton.style.display = 'none';
+            } else {
+                statusText.textContent = '연결 대기 중';
+                if (sendButton) sendButton.style.display = 'block';  // Show send button even when disconnected for text chat
+                isVoiceActive = false;
+            }
+        }
+        
+        function updateButtonState() {
+            const button = document.getElementById('start-button');
+            if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
+                button.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="spinner"></div>
+                        <span>연결 중...</span>
+                    </div>
+                `;
+                updateStatus('connecting');
+            } else if (peerConnection && peerConnection.connectionState === 'connected') {
+                button.innerHTML = `
+                    <div class="icon-with-spinner">
+                        <div class="audio-visualizer" id="audio-visualizer">
+                            <div class="visualizer-bar"></div>
+                            <div class="visualizer-bar"></div>
+                            <div class="visualizer-bar"></div>
+                            <div class="visualizer-bar"></div>
+                            <div class="visualizer-bar"></div>
+                        </div>
+                        <span>대화 종료</span>
+                    </div>
+                `;
+                updateStatus('connected');
+            } else {
+                button.innerHTML = '대화 시작';
+                updateStatus('disconnected');
+            }
+        }
+        
+        function setupAudioVisualization(stream) {
+            audioContext = new (window.AudioContext || window.webkitAudioContext)();
+            analyser = audioContext.createAnalyser();
+            audioSource = audioContext.createMediaStreamSource(stream);
+            audioSource.connect(analyser);
+            analyser.fftSize = 256;
+            const bufferLength = analyser.frequencyBinCount;
+            const dataArray = new Uint8Array(bufferLength);
+            
+            const visualizerBars = document.querySelectorAll('.visualizer-bar');
+            const barCount = visualizerBars.length;
+            
+            function updateAudioLevel() {
+                analyser.getByteFrequencyData(dataArray);
+                
+                for (let i = 0; i < barCount; i++) {
+                    const start = Math.floor(i * (bufferLength / barCount));
+                    const end = Math.floor((i + 1) * (bufferLength / barCount));
+                    
+                    let sum = 0;
+                    for (let j = start; j < end; j++) {
+                        sum += dataArray[j];
+                    }
+                    
+                    const average = sum / (end - start) / 255;
+                    const scaleY = 0.1 + average * 0.9;
+                    visualizerBars[i].style.transform = `scaleY(${scaleY})`;
+                }
+                
+                animationFrame = requestAnimationFrame(updateAudioLevel);
+            }
+            
+            updateAudioLevel();
+        }
+        
+        function showError(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.className = 'toast error';
+            toast.style.display = 'block';
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 5000);
+        }
+        
+        function showSuccess(message) {
+            const toast = document.getElementById('error-toast');
+            toast.textContent = message;
+            toast.className = 'toast success';
+            toast.style.display = 'block';
+            setTimeout(() => {
+                toast.style.display = 'none';
+            }, 3000);
+        }
+        
+        async function setupWebRTC() {
+            const audioOutput = document.getElementById('audio-output');
+            const config = typeof __RTC_CONFIGURATION__ !== 'undefined' ? __RTC_CONFIGURATION__ : {iceServers: [{urls: 'stun:stun.l.google.com:19302'}]};
+            peerConnection = new RTCPeerConnection(config);
+            const timeoutId = setTimeout(() => {
+                const toast = document.getElementById('error-toast');
+                toast.textContent = "연결이 평소보다 오래 걸리고 있습니다. VPN을 사용 중이신가요?";
+                toast.className = 'toast warning';
+                toast.style.display = 'block';
+                setTimeout(() => {
+                    toast.style.display = 'none';
+                }, 5000);
+            }, 5000);
+            try {
+                const stream = await navigator.mediaDevices.getUserMedia({
+                    audio: true
+                });
+                setupAudioVisualization(stream);
+                stream.getTracks().forEach(track => {
+                    peerConnection.addTrack(track, stream);
+                });
+                peerConnection.addEventListener('track', (evt) => {
+                    if (audioOutput.srcObject !== evt.streams[0]) {
+                        audioOutput.srcObject = evt.streams[0];
+                        audioOutput.play();
+                    }
+                });
+                
+                // Create data channel for text messages
+                dataChannel = peerConnection.createDataChannel('text');
+                dataChannel.onopen = () => {
+                    console.log('Data channel opened');
+                };
+                dataChannel.onmessage = (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    if (eventJson.type === "error") {
+                        showError(eventJson.message);
+                    }
+                };
+                
+                const offer = await peerConnection.createOffer();
+                await peerConnection.setLocalDescription(offer);
+                await new Promise((resolve) => {
+                    if (peerConnection.iceGatheringState === "complete") {
+                        resolve();
+                    } else {
+                        const checkState = () => {
+                            if (peerConnection.iceGatheringState === "complete") {
+                                peerConnection.removeEventListener("icegatheringstatechange", checkState);
+                                resolve();
+                            }
+                        };
+                        peerConnection.addEventListener("icegatheringstatechange", checkState);
+                    }
+                });
+                peerConnection.addEventListener('connectionstatechange', () => {
+                    console.log('connectionstatechange', peerConnection.connectionState);
+                    if (peerConnection.connectionState === 'connected') {
+                        clearTimeout(timeoutId);
+                        const toast = document.getElementById('error-toast');
+                        toast.style.display = 'none';
+                    }
+                    updateButtonState();
+                });
+                webrtc_id = Math.random().toString(36).substring(7);
+                
+                const response = await fetch('/webrtc/offer', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        sdp: peerConnection.localDescription.sdp,
+                        type: peerConnection.localDescription.type,
+                        webrtc_id: webrtc_id,
+                        web_search_enabled: webSearchEnabled,
+                        target_language: selectedLanguage,
+                        system_prompt: systemPrompt
+                    })
+                });
+                const serverResponse = await response.json();
+                if (serverResponse.status === 'failed') {
+                    showError(serverResponse.meta.error === 'concurrency_limit_reached'
+                        ? `너무 많은 연결입니다. 최대 한도는 ${serverResponse.meta.limit} 입니다.`
+                        : serverResponse.meta.error);
+                    stop();
+                    return;
+                }
+                await peerConnection.setRemoteDescription(serverResponse);
+                const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
+                eventSource.addEventListener("output", (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    let content = eventJson.content;
+                    
+                    if (selectedLanguage && eventJson.language) {
+                        content += ` <span class="language-info">[${eventJson.language}]</span>`;
+                    }
+                    addMessage("assistant", content);
+                });
+                eventSource.addEventListener("search", (event) => {
+                    const eventJson = JSON.parse(event.data);
+                    if (eventJson.query) {
+                        addMessage("search-result", `웹 검색 중: "${eventJson.query}"`);
+                    }
+                });
+            } catch (err) {
+                clearTimeout(timeoutId);
+                console.error('Error setting up WebRTC:', err);
+                showError('연결을 설정하지 못했습니다. 다시 시도해 주세요.');
+                stop();
+            }
+        }
+        
+        function addMessage(role, content) {
+            const chatMessages = document.getElementById('chat-messages');
+            const messageDiv = document.createElement('div');
+            messageDiv.classList.add('message', role);
+            
+            if (content.includes('<span')) {
+                messageDiv.innerHTML = content;
+            } else {
+                messageDiv.textContent = content;
+            }
+            chatMessages.appendChild(messageDiv);
+            chatMessages.scrollTop = chatMessages.scrollHeight;
+        }
+        
+        function stop() {
+            console.log('[STOP] Stopping connection...');
+            
+            // Cancel animation frame first
+            if (animationFrame) {
+                cancelAnimationFrame(animationFrame);
+                animationFrame = null;
+            }
+            
+            // Close audio context
+            if (audioContext) {
+                audioContext.close();
+                audioContext = null;
+                analyser = null;
+                audioSource = null;
+            }
+            
+            // Close data channel
+            if (dataChannel) {
+                dataChannel.close();
+                dataChannel = null;
+            }
+            
+            // Close peer connection
+            if (peerConnection) {
+                console.log('[STOP] Current connection state:', peerConnection.connectionState);
+                
+                // Stop all transceivers
+                if (peerConnection.getTransceivers) {
+                    peerConnection.getTransceivers().forEach(transceiver => {
+                        if (transceiver.stop) {
+                            transceiver.stop();
+                        }
+                    });
+                }
+                
+                // Stop all senders
+                if (peerConnection.getSenders) {
+                    peerConnection.getSenders().forEach(sender => {
+                        if (sender.track) {
+                            sender.track.stop();
+                        }
+                    });
+                }
+                
+                // Stop all receivers
+                if (peerConnection.getReceivers) {
+                    peerConnection.getReceivers().forEach(receiver => {
+                        if (receiver.track) {
+                            receiver.track.stop();
+                        }
+                    });
+                }
+                
+                // Close the connection
+                peerConnection.close();
+                
+                // Clear the reference
+                peerConnection = null;
+                
+                console.log('[STOP] Connection closed');
+            }
+            
+            // Reset audio level
+            audioLevel = 0;
+            isVoiceActive = false;
+            
+            // Update UI
+            updateButtonState();
+            
+            // Clear any existing webrtc_id
+            if (webrtc_id) {
+                console.log('[STOP] Clearing webrtc_id:', webrtc_id);
+                webrtc_id = null;
+            }
+        }
+        
+        // Whisper Tab Functions
+        
+        // Microphone recording
+        async function toggleMicRecording() {
+            const btn = document.getElementById('mic-record-btn');
+            const status = document.getElementById('mic-status');
+            
+            if (!isRecording) {
+                try {
+                    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                    micRecorder = new MediaRecorder(stream);
+                    const chunks = [];
+                    
+                    micRecorder.ondataavailable = (e) => chunks.push(e.data);
+                    micRecorder.onstop = async () => {
+                        const blob = new Blob(chunks, { type: 'audio/webm' });
+                        await processAudioBlob(blob, 'mic');
+                        stream.getTracks().forEach(track => track.stop());
+                    };
+                    
+                    micRecorder.start();
+                    isRecording = true;
+                    btn.textContent = '녹음 중지';
+                    status.innerHTML = '<div class="recording-indicator"><div class="recording-dot"></div>녹음 중...</div>';
+                } catch (err) {
+                    showError('마이크 접근 권한이 필요합니다.');
+                }
+            } else {
+                micRecorder.stop();
+                isRecording = false;
+                btn.textContent = '녹음 시작';
+                status.textContent = '처리 중...';
+            }
+        }
+        
+        // Process audio blob (for microphone recording)
+        async function processAudioBlob(blob, type) {
+            const formData = new FormData();
+            formData.append('audio', blob, 'recording.webm');
+            
+            try {
+                const response = await fetch('/whisper/transcribe', {
+                    method: 'POST',
+                    body: formData
+                });
+                
+                const result = await response.json();
+                if (result.error) {
+                    showError(result.error);
+                } else {
+                    document.getElementById(`${type}-original`).textContent = result.text;
+                    document.getElementById(`${type}-translation`).innerHTML = result.translation.replace(/\n/g, '<br>');
+                    document.getElementById(`${type}-status`).textContent = '';
+                }
+            } catch (error) {
+                showError('처리 중 오류가 발생했습니다.');
+                document.getElementById(`${type}-status`).textContent = '';
+            }
+        }
+        
+        // Process audio file
+        async function processAudioFile(file) {
+            const formData = new FormData();
+            formData.append('audio', file);
+            
+            try {
+                showSuccess('오디오 파일 처리 중...');
+                const response = await fetch('/whisper/audio', {
+                    method: 'POST',
+                    body: formData
+                });
+                
+                const result = await response.json();
+                if (result.error) {
+                    showError(result.error);
+                } else {
+                    document.getElementById('audio-original').textContent = result.text;
+                    document.getElementById('audio-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
+                }
+            } catch (error) {
+                showError('오디오 파일 처리 중 오류가 발생했습니다.');
+            }
+        }
+        
+        // Process video file
+        async function processVideoFile(file) {
+            const formData = new FormData();
+            formData.append('video', file);
+            
+            try {
+                showSuccess('비디오 파일 처리 중... (시간이 걸릴 수 있습니다)');
+                const response = await fetch('/whisper/video', {
+                    method: 'POST',
+                    body: formData
+                });
+                
+                const result = await response.json();
+                if (result.error) {
+                    showError(result.error);
+                } else {
+                    document.getElementById('video-original').textContent = result.text;
+                    document.getElementById('video-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
+                }
+            } catch (error) {
+                showError('비디오 파일 처리 중 오류가 발생했습니다.');
+            }
+        }
+        
+        // Process PDF file
+        async function processPDFFile(file) {
+            const formData = new FormData();
+            formData.append('pdf', file);
+            formData.append('max_pages', document.getElementById('pdf-max-pages').value);
+            
+            try {
+                showSuccess('PDF 파일 처리 중...');
+                const response = await fetch('/whisper/pdf', {
+                    method: 'POST',
+                    body: formData
+                });
+                
+                const result = await response.json();
+                if (result.error) {
+                    showError(result.error);
+                } else {
+                    document.getElementById('pdf-original').textContent = result.text;
+                    document.getElementById('pdf-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
+                }
+            } catch (error) {
+                showError('PDF 파일 처리 중 오류가 발생했습니다.');
+            }
+        }
+        
+        // Realtime recording
+        let realtimeEventSource = null;
+        
+        async function toggleRealtimeRecording() {
+            const btn = document.getElementById('realtime-start-btn');
+            const status = document.getElementById('realtime-status');
+            const output = document.getElementById('realtime-output');
+            
+            if (!isRealtimeRecording) {
+                try {
+                    const response = await fetch('/whisper/realtime/start', {
+                        method: 'POST'
+                    });
+                    
+                    const result = await response.json();
+                    if (result.session_id) {
+                        // Start listening for results
+                        realtimeEventSource = new EventSource(`/whisper/realtime/stream?session_id=${result.session_id}`);
+                        
+                        realtimeEventSource.onmessage = (event) => {
+                            const data = JSON.parse(event.data);
+                            if (data.timestamp) {
+                                const segment = document.createElement('div');
+                                segment.style.borderBottom = '1px solid #333';
+                                segment.style.paddingBottom = '15px';
+                                segment.style.marginBottom = '15px';
+                                segment.innerHTML = `
+                                    <strong>[${data.timestamp}]</strong><br>
+                                    <strong>[KO]</strong> ${data.text}<br>
+                                    ${data.translation.replace(/\n/g, '<br>')}
+                                `;
+                                output.insertBefore(segment, output.firstChild);
+                            }
+                        };
+                        
+                        realtimeEventSource.onerror = () => {
+                            stopRealtimeRecording();
+                            showError('실시간 통역 연결이 끊어졌습니다.');
+                        };
+                        
+                        // Start recording
+                        realtimeStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+                        
+                        isRealtimeRecording = true;
+                        btn.textContent = '통역 중지';
+                        status.innerHTML = '<div class="recording-indicator"><div class="recording-dot"></div>실시간 통역 중...</div>';
+                        
+                        // Send audio data periodically
+                        startRealtimeAudioCapture(result.session_id);
+                    }
+                } catch (err) {
+                    showError('실시간 통역을 시작할 수 없습니다.');
+                }
+            } else {
+                stopRealtimeRecording();
+            }
+        }
+        
+        function stopRealtimeRecording() {
+            if (realtimeEventSource) {
+                realtimeEventSource.close();
+                realtimeEventSource = null;
+            }
+            
+            if (realtimeStream) {
+                realtimeStream.getTracks().forEach(track => track.stop());
+                realtimeStream = null;
+            }
+            
+            isRealtimeRecording = false;
+            document.getElementById('realtime-start-btn').textContent = '실시간 통역 시작';
+            document.getElementById('realtime-status').textContent = '';
+        }
+        
+        async function startRealtimeAudioCapture(sessionId) {
+            const audioContext = new AudioContext({ sampleRate: 16000 });
+            const source = audioContext.createMediaStreamSource(realtimeStream);
+            const processor = audioContext.createScriptProcessor(4096, 1, 1);
+            
+            let audioBuffer = [];
+            let silenceFrames = 0;
+            const SILENCE_THRESHOLD = 0.01;
+            const MIN_BUFFER_SIZE = 16000 * 2; // 2 seconds minimum
+            const MAX_BUFFER_SIZE = 16000 * 15; // 15 seconds maximum
+            
+            processor.onaudioprocess = async (e) => {
+                if (!isRealtimeRecording) return;
+                
+                const inputData = e.inputBuffer.getChannelData(0);
+                audioBuffer.push(...inputData);
+                
+                // Simple voice activity detection
+                const rms = Math.sqrt(inputData.reduce((sum, val) => sum + val * val, 0) / inputData.length);
+                
+                if (rms < SILENCE_THRESHOLD) {
+                    silenceFrames++;
+                } else {
+                    silenceFrames = 0;
+                }
+                
+                // Send audio when we have enough silence or max buffer reached
+                if ((silenceFrames > 20 && audioBuffer.length > MIN_BUFFER_SIZE) || 
+                    audioBuffer.length > MAX_BUFFER_SIZE) {
+                    
+                    const audioData = new Float32Array(audioBuffer);
+                    audioBuffer = [];
+                    silenceFrames = 0;
+                    
+                    // Convert to 16-bit PCM
+                    const pcmData = new Int16Array(audioData.length);
+                    for (let i = 0; i < audioData.length; i++) {
+                        pcmData[i] = Math.max(-32768, Math.min(32767, audioData[i] * 32768));
+                    }
+                    
+                    // Send to server
+                    const formData = new FormData();
+                    formData.append('audio', new Blob([pcmData.buffer], { type: 'audio/pcm' }));
+                    formData.append('session_id', sessionId);
+                    
+                    fetch('/whisper/realtime/process', {
+                        method: 'POST',
+                        body: formData
+                    }).catch(err => console.error('Error sending audio:', err));
+                }
+            };
+            
+            source.connect(processor);
+            processor.connect(audioContext.destination);
+        }
+        
+        // Simple initialization
+        window.onload = function() {
+            console.log('Page loaded!');
+            
+            // Web search toggle
+            document.getElementById('search-toggle').onclick = function() {
+                webSearchEnabled = !webSearchEnabled;
+                this.classList.toggle('active', webSearchEnabled);
+                console.log('Web search:', webSearchEnabled);
+            };
+            
+            // Language select
+            document.getElementById('language-select').onchange = function() {
+                selectedLanguage = this.value;
+                console.log('Language:', selectedLanguage);
+            };
+            
+            // System prompt
+            document.getElementById('system-prompt').oninput = function() {
+                systemPrompt = this.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
+            };
+            
+            // Text input enter key
+            document.getElementById('text-input').onkeypress = function(e) {
+                if (e.key === 'Enter' && !e.shiftKey) {
+                    e.preventDefault();
+                    sendTextMessage();
+                }
+            };
+            
+            // Send button
+            document.getElementById('send-button').onclick = sendTextMessage;
+            document.getElementById('send-button').style.display = 'block';
+            
+            // Start button
+            document.getElementById('start-button').onclick = function() {
+                if (!peerConnection || peerConnection.connectionState !== 'connected') {
+                    setupWebRTC();
+                } else {
+                    stop();
+                }
+            };
+            
+            // File upload areas
+            const audioUploadArea = document.getElementById('audio-upload-area');
+            if (audioUploadArea) {
+                audioUploadArea.onclick = function() {
+                    document.getElementById('audio-file-input').click();
+                };
+            }
+            
+            const videoUploadArea = document.getElementById('video-upload-area');
+            if (videoUploadArea) {
+                videoUploadArea.onclick = function() {
+                    document.getElementById('video-file-input').click();
+                };
+            }
+            
+            const pdfUploadArea = document.getElementById('pdf-upload-area');
+            if (pdfUploadArea) {
+                pdfUploadArea.onclick = function() {
+                    document.getElementById('pdf-file-input').click();
+                };
+            }
+            
+            // File input handlers
+            const audioFileInput = document.getElementById('audio-file-input');
+            if (audioFileInput) {
+                audioFileInput.onchange = function(e) {
+                    if (e.target.files[0]) processAudioFile(e.target.files[0]);
+                };
+            }
+            
+            const videoFileInput = document.getElementById('video-file-input');
+            if (videoFileInput) {
+                videoFileInput.onchange = function(e) {
+                    if (e.target.files[0]) processVideoFile(e.target.files[0]);
+                };
+            }
+            
+            const pdfFileInput = document.getElementById('pdf-file-input');
+            if (pdfFileInput) {
+                pdfFileInput.onchange = function(e) {
+                    if (e.target.files[0]) processPDFFile(e.target.files[0]);
+                };
+            }
+            
+            // Drag and drop handlers
+            ['audio', 'video', 'pdf'].forEach(type => {
+                const area = document.getElementById(`${type}-upload-area`);
+                if (area) {
+                    area.ondragover = function(e) {
+                        e.preventDefault();
+                        area.classList.add('drag-over');
+                    };
+                    
+                    area.ondragleave = function() {
+                        area.classList.remove('drag-over');
+                    };
+                    
+                    area.ondrop = function(e) {
+                        e.preventDefault();
+                        area.classList.remove('drag-over');
+                        const file = e.dataTransfer.files[0];
+                        if (file) {
+                            if (type === 'audio') processAudioFile(file);
+                            else if (type === 'video') processVideoFile(file);
+                            else if (type === 'pdf') processPDFFile(file);
+                        }
+                    };
+                }
+            });
+        };
+    </script>
+</body>
+
+</html>"""
+
+# Whisper model loader
+def _get_whisper_pipe():
+    """Lazy load Whisper pipeline"""
+    if not hasattr(_get_whisper_pipe, "pipe"):
+        device = 0 if torch.cuda.is_available() else "cpu"
+        _get_whisper_pipe.pipe = pipeline(
+            task="automatic-speech-recognition",
+            model=WHISPER_MODEL_NAME,
+            chunk_length_s=30,
+            device=device,
+        )
+    return _get_whisper_pipe.pipe
+
+# Audio helpers for Whisper
+def _ensure_16k_whisper(y: np.ndarray, sr: int) -> tuple[np.ndarray, int]:
+    """Resample audio to 16kHz for Whisper"""
+    if sr == WHISPER_SAMPLE_RATE:
+        return y.astype(np.float32), WHISPER_SAMPLE_RATE
+    g = np.gcd(sr, WHISPER_SAMPLE_RATE)
+    y = sps.resample_poly(y, WHISPER_SAMPLE_RATE // g, sr // g).astype(np.float32)
+    return y, WHISPER_SAMPLE_RATE
+
+def _should_flush_whisper(buffer: np.ndarray, sr: int) -> bool:
+    """Check if audio buffer should be flushed for processing"""
+    dur = len(buffer) / sr
+    if dur < MIN_SEG_SEC:
+        return False
+    tail_len = int(SILENCE_SEC * sr)
+    tail = buffer[-tail_len:]
+    rms = np.sqrt(np.mean(tail ** 2)) if len(tail) else 1.0
+    end_of_sentence = rms < SILENCE_THRESH
+    return end_of_sentence or dur >= MAX_SEG_SEC
+
+# Translation helper
+def _translate_text_4langs(text: str) -> str:
+    """Translate text to 4 languages using OpenAI"""
+    try:
+        client = openai.OpenAI()
+        prompt = (
+            "Translate the following text into English (EN), Chinese (ZH), Thai (TH) and Russian (RU).\n"
+            "Return ONLY the translations in this format (one per line):\n"
+            "EN: <english>\nZH: <chinese>\nTH: <thai>\nRU: <russian>\n\n"
+            f"Text: {text}"
+        )
+        
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=[
+                {"role": "system", "content": "You are a professional translator."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.7,
+            max_tokens=512
+        )
+        
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        print(f"Translation error: {e}")
+        return f"Translation error: {str(e)}"
+
+# ffmpeg check
+def _check_ffmpeg() -> bool:
+    try:
+        subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
+        return True
+    except Exception:
+        return False
+
+_HAS_FFMPEG = _check_ffmpeg()
+
+def extract_audio_from_video(video_path: str) -> str:
+    """Extract audio from video file"""
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    tmp.close()
+    
+    if _HAS_FFMPEG:
+        cmd = [
+            "ffmpeg", "-i", video_path, "-vn",
+            "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", "-y", tmp.name
+        ]
+        result = subprocess.run(cmd, capture_output=True)
+        if result.returncode != 0:
+            os.unlink(tmp.name)
+            raise RuntimeError("ffmpeg error extracting audio")
+        return tmp.name
+    else:
+        raise RuntimeError("ffmpeg is required for video processing")
+
+# GPU workers for Whisper
+def gpu_transcribe_whisper(audio_path: str) -> str:
+    """Transcribe audio using Whisper on GPU"""
+    pipe = _get_whisper_pipe()
+    result = pipe(audio_path, batch_size=WHISPER_BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)
+    return result["text"].strip()
+
+def gpu_asr_translate_whisper(audio: np.ndarray, sr: int) -> str:
+    """Transcribe and translate audio for realtime"""
+    pipe = _get_whisper_pipe()
+    ko = pipe({"array": audio, "sampling_rate": sr}, batch_size=WHISPER_BATCH_SIZE)["text"].strip()
+    trans = _translate_text_4langs(ko).replace("\n", "<br>")
+    ts = datetime.now(SEOUL_TZ).strftime("%Y-%m-%d %H:%M:%S")
+    return f"[{ts}]<br>[KO] {ko}<br>{trans}<br>{'-'*40}<br>"
+
+class BraveSearchClient:
+    """Brave Search API client"""
+    def __init__(self, api_key: str):
+        self.api_key = api_key
+        self.base_url = "https://api.search.brave.com/res/v1/web/search"
+    
+    async def search(self, query: str, count: int = 10) -> List[Dict]:
+        """Perform a web search using Brave Search API"""
+        if not self.api_key:
+            return []
+        
+        headers = {
+            "Accept": "application/json",
+            "X-Subscription-Token": self.api_key
+        }
+        params = {
+            "q": query,
+            "count": count,
+            "lang": "ko"
+        }
+        
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.get(self.base_url, headers=headers, params=params)
+                response.raise_for_status()
+                data = response.json()
+                
+                results = []
+                if "web" in data and "results" in data["web"]:
+                    for result in data["web"]["results"][:count]:
+                        results.append({
+                            "title": result.get("title", ""),
+                            "url": result.get("url", ""),
+                            "description": result.get("description", "")
+                        })
+                return results
+            except Exception as e:
+                print(f"Brave Search error: {e}")
+                return []
+
+
+# Initialize search client globally
+brave_api_key = os.getenv("BSEARCH_API")
+search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
+print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
+
+# Store connection settings
+connection_settings = {}
+
+# Store realtime sessions
+realtime_sessions = {}
+
+# Initialize OpenAI client for text chat
+client = openai.AsyncOpenAI()
+
+def get_translation_instructions(target_language: str) -> str:
+    """Get instructions for translation based on target language"""
+    if not target_language:
+        return ""
+    
+    language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
+    return (
+        f"\n\nIMPORTANT: You must respond in {language_name} ({target_language}). "
+        f"Translate all your responses to {language_name}."
+    )
+
+def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
+    chatbot.append({"role": "assistant", "content": response.transcript})
+    return chatbot
+
+
+async def process_text_chat(message: str, web_search_enabled: bool, target_language: str, 
+                          system_prompt: str) -> Dict[str, str]:
+    """Process text chat using GPT-4o-mini model"""
+    try:
+        # If target language is set, override system prompt completely
+        if target_language:
+            language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
+            
+            # Create system prompt in target language
+            if target_language == "en":
+                base_instructions = f"You are a helpful assistant. You speak ONLY English. Never use Korean or any other language. {system_prompt}"
+                user_prefix = "Please respond in English: "
+            elif target_language == "ja":
+                base_instructions = f"あなたは親切なアシスタントです。日本語のみを話します。韓国語や他の言語は絶対に使用しません。{system_prompt}"
+                user_prefix = "日本語で答えてください: "
+            elif target_language == "zh":
+                base_instructions = f"你是一个乐于助人的助手。你只说中文。绝不使用韩语或其他语言。{system_prompt}"
+                user_prefix = "请用中文回答: "
+            elif target_language == "es":
+                base_instructions = f"Eres un asistente útil. Solo hablas español. Nunca uses coreano u otros idiomas. {system_prompt}"
+                user_prefix = "Por favor responde en español: "
+            else:
+                base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}. {system_prompt}"
+                user_prefix = f"Please respond in {language_name}: "
+        else:
+            base_instructions = system_prompt or "You are a helpful assistant."
+            user_prefix = ""
+        
+        messages = [
+            {"role": "system", "content": base_instructions}
+        ]
+        
+        # Handle web search if enabled
+        if web_search_enabled and search_client:
+            # Check if the message requires web search
+            search_keywords = ["날씨", "기온", "비", "눈", "뉴스", "소식", "현재", "최근", 
+                             "오늘", "지금", "가격", "환율", "주가", "weather", "news", 
+                             "current", "today", "price", "2024", "2025"]
+            
+            should_search = any(keyword in message.lower() for keyword in search_keywords)
+            
+            if should_search:
+                # Perform web search
+                search_results = await search_client.search(message)
+                if search_results:
+                    search_context = "웹 검색 결과:\n\n"
+                    for i, result in enumerate(search_results[:5], 1):
+                        search_context += f"{i}. {result['title']}\n{result['description']}\n\n"
+                    
+                    # Add search context in target language if set
+                    if target_language:
+                        search_instruction = f"Use this search information but respond in {SUPPORTED_LANGUAGES.get(target_language, target_language)} only: "
+                    else:
+                        search_instruction = "다음 웹 검색 결과를 참고하여 답변하세요: "
+                    
+                    messages.append({
+                        "role": "system", 
+                        "content": search_instruction + "\n\n" + search_context
+                    })
+        
+        # Add user message with language prefix
+        messages.append({"role": "user", "content": user_prefix + message})
+        
+        # Call GPT-4o-mini
+        response = await client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=messages,
+            temperature=0.7,
+            max_tokens=2000
+        )
+        
+        response_text = response.choices[0].message.content
+        
+        # Final check - remove any Korean if target language is not Korean
+        if target_language and target_language != "ko":
+            import re
+            if re.search(r'[가-힣]', response_text):
+                print(f"[TEXT CHAT] WARNING: Korean detected in response for {target_language}")
+                # Try again with stronger prompt
+                messages[-1] = {"role": "user", "content": f"ONLY {SUPPORTED_LANGUAGES.get(target_language, target_language)}, NO KOREAN: {message}"}
+                retry_response = await client.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=messages,
+                    temperature=0.3,
+                    max_tokens=2000
+                )
+                response_text = retry_response.choices[0].message.content
+        
+        print(f"[TEXT CHAT] Target language: {target_language}")
+        print(f"[TEXT CHAT] Response preview: {response_text[:100]}...")
+        
+        return {
+            "response": response_text,
+            "language": SUPPORTED_LANGUAGES.get(target_language, "") if target_language else ""
+        }
+        
+    except Exception as e:
+        print(f"Error in text chat: {e}")
+        return {"error": str(e)}
+
+
+class OpenAIHandler(AsyncStreamHandler):
+    def __init__(self, web_search_enabled: bool = False, target_language: str = "", 
+                 system_prompt: str = "", webrtc_id: str = None) -> None:
+        super().__init__(
+            expected_layout="mono",
+            output_sample_rate=SAMPLE_RATE,
+            output_frame_size=480,
+            input_sample_rate=SAMPLE_RATE,
+        )
+        self.connection = None
+        self.output_queue = asyncio.Queue()
+        self.search_client = search_client
+        self.function_call_in_progress = False
+        self.current_function_args = ""
+        self.current_call_id = None
+        self.webrtc_id = webrtc_id
+        self.web_search_enabled = web_search_enabled
+        self.target_language = target_language
+        self.system_prompt = system_prompt
+        
+        print(f"[INIT] Handler created with web_search={web_search_enabled}, "
+              f"target_language={target_language}")
+
+    def copy(self):
+        # Get the most recent settings
+        if connection_settings:
+            # Get the most recent webrtc_id
+            recent_ids = sorted(connection_settings.keys(), 
+                              key=lambda k: connection_settings[k].get('timestamp', 0), 
+                              reverse=True)
+            if recent_ids:
+                recent_id = recent_ids[0]
+                settings = connection_settings[recent_id]
+                
+                # Log the settings being copied
+                print(f"[COPY] Copying settings from {recent_id}:")
+                
+                return OpenAIHandler(
+                    web_search_enabled=settings.get('web_search_enabled', False),
+                    target_language=settings.get('target_language', ''),
+                    system_prompt=settings.get('system_prompt', ''),
+                    webrtc_id=recent_id
+                )
+        
+        print(f"[COPY] No settings found, creating default handler")
+        return OpenAIHandler(web_search_enabled=False)
+
+    async def search_web(self, query: str) -> str:
+        """Perform web search and return formatted results"""
+        if not self.search_client or not self.web_search_enabled:
+            return "웹 검색이 비활성화되어 있습니다."
+        
+        print(f"Searching web for: {query}")
+        results = await self.search_client.search(query)
+        if not results:
+            return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."
+        
+        # Format search results
+        formatted_results = []
+        for i, result in enumerate(results, 1):
+            formatted_results.append(
+                f"{i}. {result['title']}\n"
+                f"   URL: {result['url']}\n"
+                f"   {result['description']}\n"
+            )
+        
+        return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)
+
+    async def process_text_message(self, message: str):
+        """Process text message from user"""
+        if self.connection:
+            await self.connection.conversation.item.create(
+                item={
+                    "type": "message",
+                    "role": "user",
+                    "content": [{"type": "input_text", "text": message}]
+                }
+            )
+            await self.connection.response.create()
+
+    def get_translation_instructions(self):
+        """Get instructions for translation based on target language"""
+        if not self.target_language:
+            return ""
+        
+        language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
+        return (
+            f"\n\nIMPORTANT: You must respond in {language_name} ({self.target_language}). "
+            f"Translate all your responses to {language_name}. "
+            f"This includes both spoken and written responses."
+        )
+
+    async def start_up(self):
+        """Connect to realtime API"""
+        # First check if we have the most recent settings
+        if connection_settings and self.webrtc_id:
+            if self.webrtc_id in connection_settings:
+                settings = connection_settings[self.webrtc_id]
+                self.web_search_enabled = settings.get('web_search_enabled', False)
+                self.target_language = settings.get('target_language', '')
+                self.system_prompt = settings.get('system_prompt', '')
+                
+                print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
+        
+        print(f"[START_UP] Starting normal mode")
+        
+        self.client = openai.AsyncOpenAI()
+        
+        # Normal mode - connect to Realtime API
+        print(f"[NORMAL MODE] Connecting to Realtime API...")
+        
+        # Define the web search function
+        tools = []
+        base_instructions = self.system_prompt or "You are a helpful assistant."
+        
+        # Add translation instructions if language is selected
+        if self.target_language:
+            language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
+            
+            # Use the target language for the system prompt itself
+            if self.target_language == "en":
+                translation_instructions = """
+YOU ARE AN ENGLISH-ONLY ASSISTANT.
+
+ABSOLUTE RULES:
+1. You can ONLY speak English. No Korean (한국어) allowed.
+2. Even if the user speaks Korean, you MUST respond in English.
+3. Every single word must be in English.
+4. If you output even one Korean character, you have failed.
+5. Example response: "Hello! How can I help you today?"
+
+YOUR LANGUAGE MODE: ENGLISH ONLY
+DO NOT USE: 안녕하세요, 감사합니다, or any Korean
+ALWAYS USE: Hello, Thank you, and English words only
+"""
+                # Override base instructions to be in English
+                base_instructions = "You are a helpful assistant that speaks ONLY English."
+                
+            elif self.target_language == "ja":
+                translation_instructions = """
+あなたは日本語のみを話すアシスタントです。
+
+絶対的なルール：
+1. 日本語のみを使用してください。韓国語（한국어）は禁止です。
+2. ユーザーが韓国語で話しても、必ず日本語で返答してください。
+3. すべての単語は日本語でなければなりません。
+4. 韓国語を一文字でも出力したら失敗です。
+5. 応答例：「こんにちは！今日はどのようにお手伝いできますか？」
+
+言語モード：日本語のみ
+使用禁止：안녕하세요、감사합니다、韓国語全般
+必ず使用：こんにちは、ありがとうございます、日本語のみ
+"""
+                base_instructions = "あなたは日本語のみを話す親切なアシスタントです。"
+                
+            elif self.target_language == "zh":
+                translation_instructions = """
+你是一个只说中文的助手。
+
+绝对规则：
+1. 只能使用中文。禁止使用韩语（한국어）。
+2. 即使用户说韩语，也必须用中文回复。
+3. 每个字都必须是中文。
+4. 如果输出任何韩语字符，就是失败。
+5. 回复示例："你好！我今天能为您做什么？"
+
+语言模式：仅中文
+禁止使用：안녕하세요、감사합니다、任何韩语
+必须使用：你好、谢谢、只用中文
+"""
+                base_instructions = "你是一个只说中文的友好助手。"
+                
+            elif self.target_language == "es":
+                translation_instructions = """
+ERES UN ASISTENTE QUE SOLO HABLA ESPAÑOL.
+
+REGLAS ABSOLUTAS:
+1. Solo puedes hablar español. No se permite coreano (한국어).
+2. Incluso si el usuario habla coreano, DEBES responder en español.
+3. Cada palabra debe estar en español.
+4. Si produces aunque sea un carácter coreano, has fallado.
+5. Respuesta ejemplo: "¡Hola! ¿Cómo puedo ayudarte hoy?"
+
+MODO DE IDIOMA: SOLO ESPAÑOL
+NO USAR: 안녕하세요, 감사합니다, o cualquier coreano
+SIEMPRE USAR: Hola, Gracias, y solo palabras en español
+"""
+                base_instructions = "Eres un asistente útil que habla SOLO español."
+            else:
+                translation_instructions = f"""
+YOU MUST ONLY SPEAK {language_name.upper()}.
+
+RULES:
+1. Output only in {language_name}
+2. Never use Korean
+3. Always respond in {language_name}
+"""
+                base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}."
+        else:
+            translation_instructions = ""
+        
+        if self.web_search_enabled and self.search_client:
+            tools = [{
+                "type": "function",
+                "function": {
+                    "name": "web_search",
+                    "description": "Search the web for current information. Use this for weather, news, prices, current events, or any time-sensitive topics.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "query": {
+                                "type": "string",
+                                "description": "The search query"
+                            }
+                        },
+                        "required": ["query"]
+                    }
+                }
+            }]
+            print("Web search function added to tools")
+            
+            search_instructions = (
+                "\n\nYou have web search capabilities. "
+                "IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
+                "- Weather (날씨, 기온, 비, 눈)\n"
+                "- News (뉴스, 소식)\n"
+                "- Current events (현재, 최근, 오늘, 지금)\n"
+                "- Prices (가격, 환율, 주가)\n"
+                "- Sports scores or results\n"
+                "- Any question about 2024 or 2025\n"
+                "- Any time-sensitive information\n\n"
+                "When in doubt, USE web_search. It's better to search and provide accurate information "
+                "than to guess or use outdated information."
+            )
+            
+            # Combine all instructions
+            if translation_instructions:
+                # Translation instructions already include base_instructions
+                instructions = translation_instructions + search_instructions
+            else:
+                instructions = base_instructions + search_instructions
+        else:
+            # No web search
+            if translation_instructions:
+                instructions = translation_instructions
+            else:
+                instructions = base_instructions
+        
+        print(f"[NORMAL MODE] Base instructions: {base_instructions[:100]}...")
+        print(f"[NORMAL MODE] Translation instructions: {translation_instructions[:200] if translation_instructions else 'None'}...")
+        print(f"[NORMAL MODE] Combined instructions length: {len(instructions)}")
+        print(f"[NORMAL MODE] Target language: {self.target_language}")
+        
+        async with self.client.beta.realtime.connect(
+            model="gpt-4o-mini-realtime-preview-2024-12-17"
+        ) as conn:
+            # Update session with tools
+            session_update = {
+                "turn_detection": {"type": "server_vad"},
+                "instructions": instructions,
+                "tools": tools,
+                "tool_choice": "auto" if tools else "none",
+                "temperature": 0.7,
+                "max_response_output_tokens": 4096,
+                "modalities": ["text", "audio"],
+                "voice": "alloy"  # Default voice
+            }
+            
+            # Use appropriate voice for the language
+            if self.target_language:
+                # Force language through multiple mechanisms
+                # 1. Use voice that's known to work well with the language
+                voice_map = {
+                    "en": "nova",      # Nova has clearer English
+                    "es": "nova",      # Nova works for Spanish
+                    "fr": "shimmer",   # Shimmer for French
+                    "de": "echo",      # Echo for German  
+                    "ja": "alloy",     # Alloy can do Japanese
+                    "zh": "alloy",     # Alloy can do Chinese
+                    "ko": "nova",      # Nova for Korean
+                }
+                session_update["voice"] = voice_map.get(self.target_language, "nova")
+                
+                # 2. Add language to modalities (experimental)
+                session_update["modalities"] = ["text", "audio"]
+                
+                # 3. Set output format
+                session_update["output_audio_format"] = "pcm16"
+                
+                # 4. Add language hint to the system (if supported by API)
+                if self.target_language in ["en", "es", "fr", "de", "ja", "zh"]:
+                    session_update["language"] = self.target_language  # Try setting language directly
+                
+                print(f"[TRANSLATION MODE] Session update: {json.dumps(session_update, indent=2)}")
+            
+            await conn.session.update(session=session_update)
+            self.connection = conn
+            print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}")
+            
+            async for event in self.connection:
+                # Debug logging for function calls
+                if event.type.startswith("response.function_call"):
+                    print(f"Function event: {event.type}")
+                
+                if event.type == "response.audio_transcript.done":
+                    print(f"[RESPONSE] Transcript: {event.transcript[:100]}...")
+                    print(f"[RESPONSE] Expected language: {self.target_language}")
+                    
+                    output_data = {
+                        "event": event,
+                        "language": SUPPORTED_LANGUAGES.get(self.target_language, "") if self.target_language else ""
+                    }
+                    await self.output_queue.put(AdditionalOutputs(output_data))
+                
+                elif event.type == "response.audio.delta":
+                    await self.output_queue.put(
+                        (
+                            self.output_sample_rate,
+                            np.frombuffer(
+                                base64.b64decode(event.delta), dtype=np.int16
+                            ).reshape(1, -1),
+                        ),
+                    )
+                
+                # Handle function calls
+                elif event.type == "response.function_call_arguments.start":
+                    print(f"Function call started")
+                    self.function_call_in_progress = True
+                    self.current_function_args = ""
+                    self.current_call_id = getattr(event, 'call_id', None)
+                
+                elif event.type == "response.function_call_arguments.delta":
+                    if self.function_call_in_progress:
+                        self.current_function_args += event.delta
+                
+                elif event.type == "response.function_call_arguments.done":
+                    if self.function_call_in_progress:
+                        print(f"Function call done, args: {self.current_function_args}")
+                        try:
+                            args = json.loads(self.current_function_args)
+                            query = args.get("query", "")
+                            
+                            # Emit search event to client
+                            await self.output_queue.put(AdditionalOutputs({
+                                "type": "search",
+                                "query": query
+                            }))
+                            
+                            # Perform the search
+                            search_results = await self.search_web(query)
+                            print(f"Search results length: {len(search_results)}")
+                            
+                            # Send function result back to the model
+                            if self.connection and self.current_call_id:
+                                await self.connection.conversation.item.create(
+                                    item={
+                                        "type": "function_call_output",
+                                        "call_id": self.current_call_id,
+                                        "output": search_results
+                                    }
+                                )
+                                await self.connection.response.create()
+                        
+                        except Exception as e:
+                            print(f"Function call error: {e}")
+                        finally:
+                            self.function_call_in_progress = False
+                            self.current_function_args = ""
+                            self.current_call_id = None
+
+    async def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        # Normal mode - use Realtime API
+        if not self.connection:
+            print(f"[RECEIVE] No connection in normal mode, skipping")
+            return
+        try:
+            _, array = frame
+            array = array.squeeze()
+            audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
+            await self.connection.input_audio_buffer.append(audio=audio_message)
+        except Exception as e:
+            print(f"Error in receive: {e}")
+
+    async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
+        # Normal mode
+        item = await wait_for_item(self.output_queue)
+        
+        # Check if it's a dict with text message
+        if isinstance(item, dict) and item.get('type') == 'text_message':
+            await self.process_text_message(item['content'])
+            return None
+        
+        return item
+
+    async def shutdown(self) -> None:
+        print(f"[SHUTDOWN] Called")
+        
+        # Normal mode - close Realtime API connection
+        if self.connection:
+            await self.connection.close()
+            self.connection = None
+            print("[NORMAL MODE] Connection closed")
+
+
+# Create initial handler instance
+handler = OpenAIHandler(web_search_enabled=False)
+
+# Create components
+chatbot = gr.Chatbot(type="messages")
+
+# Create stream with handler instance
+stream = Stream(
+    handler,  # Pass instance, not factory
+    mode="send-receive",
+    modality="audio",
+    additional_inputs=[chatbot],
+    additional_outputs=[chatbot],
+    additional_outputs_handler=update_chatbot,
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
+    concurrency_limit=5 if get_space() else None,
+    time_limit=300 if get_space() else None,
+)
+
+app = FastAPI()
+
+# Mount stream
+stream.mount(app)
+
+# Intercept offer to capture settings
+@app.post("/webrtc/offer", include_in_schema=False)
+async def custom_offer(request: Request):
+    """Intercept offer to capture settings"""
+    body = await request.json()
+    
+    webrtc_id = body.get("webrtc_id")
+    web_search_enabled = body.get("web_search_enabled", False)
+    target_language = body.get("target_language", "")
+    system_prompt = body.get("system_prompt", "")
+    
+    print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
+    print(f"[OFFER] web_search_enabled: {web_search_enabled}")
+    print(f"[OFFER] target_language: {target_language}")
+    
+    # Store settings with timestamp
+    if webrtc_id:
+        connection_settings[webrtc_id] = {
+            'web_search_enabled': web_search_enabled,
+            'target_language': target_language,
+            'system_prompt': system_prompt,
+            'timestamp': asyncio.get_event_loop().time()
+        }
+        
+        print(f"[OFFER] Stored settings for {webrtc_id}:")
+        print(f"[OFFER] {connection_settings[webrtc_id]}")
+    
+    # Remove our custom route temporarily
+    custom_route = None
+    for i, route in enumerate(app.routes):
+        if hasattr(route, 'path') and route.path == "/webrtc/offer" and route.endpoint == custom_offer:
+            custom_route = app.routes.pop(i)
+            break
+    
+    # Forward to stream's offer handler
+    print(f"[OFFER] Forwarding to stream.offer()")
+    response = await stream.offer(body)
+    
+    # Re-add our custom route
+    if custom_route:
+        app.routes.insert(0, custom_route)
+    
+    print(f"[OFFER] Response status: {response.get('status', 'unknown') if isinstance(response, dict) else 'OK'}")
+    
+    return response
+
+
+@app.post("/chat/text")
+async def chat_text(request: Request):
+    """Handle text chat messages using GPT-4o-mini"""
+    try:
+        body = await request.json()
+        message = body.get("message", "")
+        web_search_enabled = body.get("web_search_enabled", False)
+        target_language = body.get("target_language", "")
+        system_prompt = body.get("system_prompt", "")
+        
+        if not message:
+            return {"error": "메시지가 비어있습니다."}
+        
+        # Process text chat
+        result = await process_text_chat(message, web_search_enabled, target_language, system_prompt)
+        
+        return result
+        
+    except Exception as e:
+        print(f"Error in chat_text endpoint: {e}")
+        return {"error": "채팅 처리 중 오류가 발생했습니다."}
+
+
+@app.post("/text_message/{webrtc_id}")
+async def receive_text_message(webrtc_id: str, request: Request):
+    """Receive text message from client"""
+    body = await request.json()
+    message = body.get("content", "")
+    
+    # Find the handler for this connection
+    if webrtc_id in stream.handlers:
+        handler = stream.handlers[webrtc_id]
+        # Queue the text message for processing
+        await handler.output_queue.put({
+            'type': 'text_message',
+            'content': message
+        })
+    
+    return {"status": "ok"}
+
+
+@app.get("/outputs")
+async def outputs(webrtc_id: str):
+    """Stream outputs including search events"""
+    async def output_stream():
+        async for output in stream.output_stream(webrtc_id):
+            if hasattr(output, 'args') and output.args:
+                # Check if it's a search event
+                if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
+                    yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
+                # Regular transcript event with language info
+                elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
+                    event_data = output.args[0]
+                    if 'event' in event_data and hasattr(event_data['event'], 'transcript'):
+                        data = {
+                            "role": "assistant", 
+                            "content": event_data['event'].transcript,
+                            "language": event_data.get('language', '')
+                        }
+                        yield f"event: output\ndata: {json.dumps(data)}\n\n"
+
+    return StreamingResponse(output_stream(), media_type="text/event-stream")
+
+
+# Whisper endpoints
+@app.post("/whisper/transcribe")
+async def whisper_transcribe(audio: UploadFile = File(...)):
+    """Transcribe audio using Whisper"""
+    try:
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
+            content = await audio.read()
+            tmp.write(content)
+            tmp_path = tmp.name
+        
+        # Transcribe
+        text = await asyncio.get_event_loop().run_in_executor(
+            whisper_executor, gpu_transcribe_whisper, tmp_path
+        )
+        
+        # Translate
+        translation = _translate_text_4langs(text)
+        
+        # Clean up
+        os.unlink(tmp_path)
+        
+        return {"text": text, "translation": translation}
+        
+    except Exception as e:
+        print(f"Whisper transcribe error: {e}")
+        return {"error": str(e)}
+
+
+@app.post("/whisper/audio")
+async def whisper_audio(audio: UploadFile = File(...)):
+    """Process audio file"""
+    try:
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp:
+            content = await audio.read()
+            tmp.write(content)
+            tmp_path = tmp.name
+        
+        # Transcribe
+        text = await asyncio.get_event_loop().run_in_executor(
+            whisper_executor, gpu_transcribe_whisper, tmp_path
+        )
+        
+        # Translate
+        translation = _translate_text_4langs(text)
+        
+        # Clean up
+        os.unlink(tmp_path)
+        
+        return {"text": text, "translation": translation}
+        
+    except Exception as e:
+        print(f"Whisper audio error: {e}")
+        return {"error": str(e)}
+
+
+@app.post("/whisper/video")
+async def whisper_video(video: UploadFile = File(...)):
+    """Process video file"""
+    try:
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(video.filename).suffix) as tmp:
+            content = await video.read()
+            tmp.write(content)
+            tmp_path = tmp.name
+        
+        # Extract audio
+        audio_path = await asyncio.get_event_loop().run_in_executor(
+            None, extract_audio_from_video, tmp_path
+        )
+        
+        # Transcribe
+        text = await asyncio.get_event_loop().run_in_executor(
+            whisper_executor, gpu_transcribe_whisper, audio_path
+        )
+        
+        # Translate
+        translation = _translate_text_4langs(text)
+        
+        # Clean up
+        os.unlink(tmp_path)
+        os.unlink(audio_path)
+        
+        return {"text": text, "translation": translation}
+        
+    except Exception as e:
+        print(f"Whisper video error: {e}")
+        return {"error": str(e)}
+
+
+@app.post("/whisper/pdf")
+async def whisper_pdf(pdf: UploadFile = File(...), max_pages: int = Form(10)):
+    """Process PDF file"""
+    try:
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
+            content = await pdf.read()
+            tmp.write(content)
+            tmp_path = tmp.name
+        
+        # Extract text
+        extracted = []
+        with pdfplumber.open(tmp_path) as pdf_doc:
+            pages = pdf_doc.pages[:max_pages]
+            for idx, pg in enumerate(pages, start=1):
+                txt = pg.extract_text() or ""
+                if txt.strip():
+                    extracted.append(f"[Page {idx}]\n{txt}")
+        
+        full_text = "\n\n".join(extracted)
+        
+        # Translate each page
+        translated = []
+        for page_text in extracted:
+            trans = _translate_text_4langs(page_text.split('\n', 1)[1])  # Skip page header
+            translated.append(page_text.split('\n')[0] + "\n" + trans)
+        
+        # Clean up
+        os.unlink(tmp_path)
+        
+        return {"text": full_text, "translation": "\n\n".join(translated)}
+        
+    except Exception as e:
+        print(f"Whisper PDF error: {e}")
+        return {"error": str(e)}
+
+
+@app.post("/whisper/realtime/start")
+async def whisper_realtime_start():
+    """Start realtime transcription session"""
+    session_id = os.urandom(16).hex()
+    realtime_sessions[session_id] = {
+        "buffer": [],
+        "queue": asyncio.Queue(),
+        "active": True
+    }
+    return {"session_id": session_id}
+
+
+@app.post("/whisper/realtime/process")
+async def whisper_realtime_process(
+    audio: UploadFile = File(...),
+    session_id: str = Form(...)
+):
+    """Process realtime audio chunk"""
+    if session_id not in realtime_sessions:
+        return {"error": "Invalid session"}
+    
+    try:
+        # Read audio data
+        content = await audio.read()
+        audio_array = np.frombuffer(content, dtype=np.int16).astype(np.float32) / 32768.0
+        
+        # Process in executor
+        result = await asyncio.get_event_loop().run_in_executor(
+            whisper_executor, gpu_asr_translate_whisper, audio_array, WHISPER_SAMPLE_RATE
+        )
+        
+        # Parse result
+        lines = result.split('<br>')
+        timestamp = lines[0].strip('[]') if lines else ""
+        text = lines[1].replace('[KO]', '').strip() if len(lines) > 1 else ""
+        translation = '<br>'.join(lines[2:-2]) if len(lines) > 3 else ""
+        
+        # Queue result
+        await realtime_sessions[session_id]["queue"].put({
+            "timestamp": timestamp,
+            "text": text,
+            "translation": translation
+        })
+        
+        return {"status": "ok"}
+        
+    except Exception as e:
+        print(f"Realtime process error: {e}")
+        return {"error": str(e)}
+
+
+@app.get("/whisper/realtime/stream")
+async def whisper_realtime_stream(session_id: str):
+    """Stream realtime results"""
+    if session_id not in realtime_sessions:
+        return JSONResponse({"error": "Invalid session"}, status_code=404)
+    
+    async def stream_results():
+        session = realtime_sessions[session_id]
+        try:
+            while session["active"]:
+                try:
+                    result = await asyncio.wait_for(session["queue"].get(), timeout=1.0)
+                    yield f"data: {json.dumps(result)}\n\n"
+                except asyncio.TimeoutError:
+                    yield f"data: {json.dumps({'keepalive': True})}\n\n"
+        except Exception as e:
+            print(f"Stream error: {e}")
+        finally:
+            # Cleanup session
+            if session_id in realtime_sessions:
+                del realtime_sessions[session_id]
+    
+    return StreamingResponse(stream_results(), media_type="text/event-stream")
+
+
+@app.get("/")
+async def index():
+    """Serve the HTML page"""
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
+    html_content = HTML_CONTENT.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
+    return HTMLResponse(content=html_content)
+
+
+if __name__ == "__main__":
+    import uvicorn
+    
+    mode = os.getenv("MODE")
+    if mode == "UI":
+        stream.ui.launch(server_port=7860)
+    elif mode == "PHONE":
+        stream.fastphone(host="0.0.0.0", port=7860)
+    else:
+        uvicorn.run(app, host="0.0.0.0", port=7860)
\ No newline at end of file