Private-AI

Running

App Files Files Community

Private-AI / app-backup4.py

seawolf2357

Rename app.py to app-backup4.py

64081dd verified 2 months ago

raw

history blame

106 kB

	import asyncio
	import base64
	import json
	from pathlib import Path
	import os
	import numpy as np
	import openai
	from dotenv import load_dotenv
	from fastapi import FastAPI, Request, UploadFile, File, Form
	from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
	from fastrtc import (
	AdditionalOutputs,
	AsyncStreamHandler,
	Stream,
	get_twilio_turn_credentials,
	wait_for_item,
	)
	from gradio.utils import get_space
	from openai.types.beta.realtime import ResponseAudioTranscriptDoneEvent
	import httpx
	from typing import Optional, List, Dict
	import gradio as gr
	import io
	from scipy import signal
	import wave
	import torch
	from transformers import pipeline
	import tempfile
	import subprocess
	import pdfplumber
	import scipy.signal as sps
	from datetime import datetime
	from zoneinfo import ZoneInfo
	import concurrent.futures

	load_dotenv()

	SAMPLE_RATE = 24000
	WHISPER_SAMPLE_RATE = 16000
	SEOUL_TZ = ZoneInfo("Asia/Seoul")

	# Whisper model settings
	WHISPER_MODEL_NAME = "openai/whisper-large-v3-turbo"
	WHISPER_BATCH_SIZE = 8

	# Real-time segmentation parameters
	MIN_SEG_SEC = 10
	MAX_SEG_SEC = 15
	SILENCE_SEC = 0.6
	SILENCE_THRESH = 1e-4

	# CPU-side pool for Whisper tasks
	whisper_executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
	whisper_futures_queue: list[concurrent.futures.Future] = []

	# Supported languages for OpenAI Realtime API
	SUPPORTED_LANGUAGES = {
	"ko": "한국어 (Korean)",
	"en": "English",
	"es": "Español (Spanish)",
	"fr": "Français (French)",
	"de": "Deutsch (German)",
	"it": "Italiano (Italian)",
	"pt": "Português (Portuguese)",
	"ru": "Русский (Russian)",
	"ja": "日本語 (Japanese)",
	"zh": "中文 (Chinese)",
	"ar": "العربية (Arabic)",
	"hi": "हिन्दी (Hindi)",
	"nl": "Nederlands (Dutch)",
	"pl": "Polski (Polish)",
	"tr": "Türkçe (Turkish)",
	"vi": "Tiếng Việt (Vietnamese)",
	"th": "ไทย (Thai)",
	"id": "Bahasa Indonesia",
	"sv": "Svenska (Swedish)",
	"da": "Dansk (Danish)",
	"no": "Norsk (Norwegian)",
	"fi": "Suomi (Finnish)",
	"he": "עברית (Hebrew)",
	"uk": "Українська (Ukrainian)",
	"cs": "Čeština (Czech)",
	"el": "Ελληνικά (Greek)",
	"ro": "Română (Romanian)",
	"hu": "Magyar (Hungarian)",
	"ms": "Bahasa Melayu (Malay)"
	}

	# HTML content embedded as a string (extended with new tabs)
	HTML_CONTENT = """<!DOCTYPE html>
	<html lang="ko">

	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Mouth of 'MOUSE' - Extended</title>
	<style>
	:root {
	--primary-color: #6f42c1;
	--secondary-color: #563d7c;
	--dark-bg: #121212;
	--card-bg: #1e1e1e;
	--text-color: #f8f9fa;
	--border-color: #333;
	--hover-color: #8a5cf6;
	}
	body {
	font-family: "SF Pro Display", -apple-system, BlinkMacSystemFont, sans-serif;
	background-color: var(--dark-bg);
	color: var(--text-color);
	margin: 0;
	padding: 0;
	height: 100vh;
	display: flex;
	flex-direction: column;
	overflow: hidden;
	}
	.container {
	max-width: 1400px;
	margin: 0 auto;
	padding: 20px;
	flex-grow: 1;
	display: flex;
	flex-direction: column;
	width: 100%;
	height: 100vh;
	box-sizing: border-box;
	overflow: hidden;
	}
	.header {
	text-align: center;
	padding: 15px 0;
	border-bottom: 1px solid var(--border-color);
	margin-bottom: 20px;
	flex-shrink: 0;
	background-color: var(--card-bg);
	}
	.tabs-container {
	display: flex;
	gap: 10px;
	margin-bottom: 20px;
	border-bottom: 2px solid var(--border-color);
	padding-bottom: 10px;
	overflow-x: auto;
	scrollbar-width: thin;
	scrollbar-color: var(--primary-color) var(--card-bg);
	}
	.tab-button {
	padding: 10px 20px;
	background-color: var(--card-bg);
	color: var(--text-color);
	border: 1px solid var(--border-color);
	border-radius: 8px 8px 0 0;
	cursor: pointer;
	transition: all 0.3s;
	white-space: nowrap;
	font-size: 14px;
	}
	.tab-button:hover {
	background-color: var(--secondary-color);
	}
	.tab-button.active {
	background-color: var(--primary-color);
	border-bottom: 2px solid var(--primary-color);
	}
	.tab-content {
	display: none;
	flex-grow: 1;
	overflow: hidden;
	flex-direction: column;
	}
	.tab-content.active {
	display: flex;
	}
	.main-content {
	display: flex;
	gap: 20px;
	flex-grow: 1;
	min-height: 0;
	overflow: hidden;
	}
	.sidebar {
	width: 350px;
	flex-shrink: 0;
	display: flex;
	flex-direction: column;
	gap: 20px;
	overflow-y: auto;
	max-height: calc(100vh - 120px);
	}
	.chat-section {
	flex-grow: 1;
	display: flex;
	flex-direction: column;
	min-width: 0;
	}
	.logo {
	display: flex;
	align-items: center;
	justify-content: center;
	gap: 10px;
	}
	.logo h1 {
	margin: 0;
	background: linear-gradient(135deg, var(--primary-color), #a78bfa);
	-webkit-background-clip: text;
	background-clip: text;
	color: transparent;
	font-size: 32px;
	letter-spacing: 1px;
	}
	/* Settings section */
	.settings-section {
	background-color: var(--card-bg);
	border-radius: 12px;
	padding: 20px;
	border: 1px solid var(--border-color);
	overflow-y: auto;
	flex-grow: 1;
	}
	.settings-grid {
	display: flex;
	flex-direction: column;
	gap: 15px;
	margin-bottom: 15px;
	}
	.setting-item {
	display: flex;
	align-items: center;
	justify-content: space-between;
	gap: 10px;
	}
	.setting-label {
	font-size: 14px;
	color: #aaa;
	min-width: 60px;
	}
	/* Toggle switch */
	.toggle-switch {
	position: relative;
	width: 50px;
	height: 26px;
	background-color: #ccc;
	border-radius: 13px;
	cursor: pointer;
	transition: background-color 0.3s;
	}
	.toggle-switch.active {
	background-color: var(--primary-color);
	}
	.toggle-slider {
	position: absolute;
	top: 3px;
	left: 3px;
	width: 20px;
	height: 20px;
	background-color: white;
	border-radius: 50%;
	transition: transform 0.3s;
	}
	.toggle-switch.active .toggle-slider {
	transform: translateX(24px);
	}
	/* Select dropdown */
	select {
	background-color: var(--card-bg);
	color: var(--text-color);
	border: 1px solid var(--border-color);
	padding: 8px 12px;
	border-radius: 6px;
	font-size: 14px;
	cursor: pointer;
	min-width: 120px;
	max-width: 200px;
	}
	select:focus {
	outline: none;
	border-color: var(--primary-color);
	}
	/* Text inputs */
	.text-input-section {
	margin-top: 15px;
	}
	input[type="text"], input[type="file"], textarea {
	width: 100%;
	background-color: var(--dark-bg);
	color: var(--text-color);
	border: 1px solid var(--border-color);
	padding: 10px;
	border-radius: 6px;
	font-size: 14px;
	box-sizing: border-box;
	margin-top: 5px;
	}
	input[type="text"]:focus, textarea:focus {
	outline: none;
	border-color: var(--primary-color);
	}
	textarea {
	resize: vertical;
	min-height: 80px;
	}
	.chat-container {
	border-radius: 12px;
	background-color: var(--card-bg);
	box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
	padding: 20px;
	flex-grow: 1;
	display: flex;
	flex-direction: column;
	border: 1px solid var(--border-color);
	overflow: hidden;
	min-height: 0;
	height: 100%;
	}
	.chat-messages {
	flex-grow: 1;
	overflow-y: auto;
	padding: 15px;
	scrollbar-width: thin;
	scrollbar-color: var(--primary-color) var(--card-bg);
	min-height: 0;
	max-height: calc(100vh - 250px);
	}
	.chat-messages::-webkit-scrollbar {
	width: 6px;
	}
	.chat-messages::-webkit-scrollbar-thumb {
	background-color: var(--primary-color);
	border-radius: 6px;
	}
	.message {
	margin-bottom: 15px;
	padding: 12px 16px;
	border-radius: 8px;
	font-size: 15px;
	line-height: 1.5;
	position: relative;
	max-width: 85%;
	animation: fade-in 0.3s ease-out;
	word-wrap: break-word;
	}
	@keyframes fade-in {
	from {
	opacity: 0;
	transform: translateY(10px);
	}
	to {
	opacity: 1;
	transform: translateY(0);
	}
	}
	.message.user {
	background: linear-gradient(135deg, #2c3e50, #34495e);
	margin-left: auto;
	border-bottom-right-radius: 2px;
	}
	.message.assistant {
	background: linear-gradient(135deg, var(--secondary-color), var(--primary-color));
	margin-right: auto;
	border-bottom-left-radius: 2px;
	}
	.message.search-result {
	background: linear-gradient(135deg, #1a5a3e, #2e7d32);
	font-size: 14px;
	padding: 10px;
	margin-bottom: 10px;
	}
	.language-info {
	font-size: 12px;
	color: #888;
	margin-left: 5px;
	}
	.controls {
	text-align: center;
	margin-top: auto;
	display: flex;
	justify-content: center;
	gap: 10px;
	flex-shrink: 0;
	padding-top: 20px;
	}
	/* Whisper Tab Styles */
	.whisper-container {
	padding: 20px;
	background-color: var(--card-bg);
	border-radius: 12px;
	border: 1px solid var(--border-color);
	margin: 20px auto;
	max-width: 800px;
	width: 100%;
	}
	.whisper-input-section {
	margin-bottom: 20px;
	}
	.whisper-output-section {
	display: grid;
	grid-template-columns: 1fr 1fr;
	gap: 20px;
	margin-top: 20px;
	}
	.whisper-output {
	background-color: var(--dark-bg);
	padding: 15px;
	border-radius: 8px;
	border: 1px solid var(--border-color);
	max-height: 300px;
	overflow-y: auto;
	}
	.whisper-output h4 {
	margin-top: 0;
	color: var(--primary-color);
	}
	.file-upload-area {
	border: 2px dashed var(--border-color);
	border-radius: 8px;
	padding: 30px;
	text-align: center;
	cursor: pointer;
	transition: all 0.3s;
	}
	.file-upload-area:hover {
	border-color: var(--primary-color);
	background-color: rgba(111, 66, 193, 0.1);
	}
	.file-upload-area.drag-over {
	border-color: var(--primary-color);
	background-color: rgba(111, 66, 193, 0.2);
	}
	.realtime-output {
	background-color: var(--dark-bg);
	padding: 20px;
	border-radius: 8px;
	margin-top: 20px;
	min-height: 200px;
	max-height: 400px;
	overflow-y: auto;
	}
	.recording-indicator {
	display: inline-flex;
	align-items: center;
	gap: 10px;
	padding: 10px 20px;
	background-color: #f44336;
	color: white;
	border-radius: 20px;
	animation: pulse 1.5s infinite;
	}
	.recording-dot {
	width: 10px;
	height: 10px;
	background-color: white;
	border-radius: 50%;
	animation: blink 1s infinite;
	}
	@keyframes blink {
	0%, 50% { opacity: 1; }
	51%, 100% { opacity: 0; }
	}
	/* Responsive design */
	@media (max-width: 1024px) {
	.sidebar {
	width: 300px;
	}
	.whisper-output-section {
	grid-template-columns: 1fr;
	}
	}
	@media (max-width: 768px) {
	.main-content {
	flex-direction: column;
	}
	.sidebar {
	width: 100%;
	margin-bottom: 20px;
	}
	.chat-section {
	height: 400px;
	}
	.tabs-container {
	flex-wrap: wrap;
	}
	}
	button {
	background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
	color: white;
	border: none;
	padding: 14px 28px;
	font-family: inherit;
	font-size: 16px;
	cursor: pointer;
	transition: all 0.3s;
	text-transform: uppercase;
	letter-spacing: 1px;
	border-radius: 50px;
	display: flex;
	align-items: center;
	justify-content: center;
	gap: 10px;
	box-shadow: 0 4px 10px rgba(111, 66, 193, 0.3);
	}
	button:hover {
	transform: translateY(-2px);
	box-shadow: 0 6px 15px rgba(111, 66, 193, 0.5);
	background: linear-gradient(135deg, var(--hover-color), var(--primary-color));
	}
	button:active {
	transform: translateY(1px);
	}
	#send-button {
	background: linear-gradient(135deg, #2ecc71, #27ae60);
	padding: 10px 20px;
	font-size: 14px;
	flex-shrink: 0;
	}
	#send-button:hover {
	background: linear-gradient(135deg, #27ae60, #229954);
	}
	#audio-output {
	display: none;
	}
	.icon-with-spinner {
	display: flex;
	align-items: center;
	justify-content: center;
	gap: 12px;
	min-width: 180px;
	}
	.spinner {
	width: 20px;
	height: 20px;
	border: 2px solid #ffffff;
	border-top-color: transparent;
	border-radius: 50%;
	animation: spin 1s linear infinite;
	flex-shrink: 0;
	}
	@keyframes spin {
	to {
	transform: rotate(360deg);
	}
	}
	.audio-visualizer {
	display: flex;
	align-items: center;
	justify-content: center;
	gap: 5px;
	min-width: 80px;
	height: 25px;
	}
	.visualizer-bar {
	width: 4px;
	height: 100%;
	background-color: rgba(255, 255, 255, 0.7);
	border-radius: 2px;
	transform-origin: bottom;
	transform: scaleY(0.1);
	transition: transform 0.1s ease;
	}
	.toast {
	position: fixed;
	top: 20px;
	left: 50%;
	transform: translateX(-50%);
	padding: 16px 24px;
	border-radius: 8px;
	font-size: 14px;
	z-index: 1000;
	display: none;
	box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
	}
	.toast.error {
	background-color: #f44336;
	color: white;
	}
	.toast.warning {
	background-color: #ff9800;
	color: white;
	}
	.toast.success {
	background-color: #4caf50;
	color: white;
	}
	.status-indicator {
	display: inline-flex;
	align-items: center;
	margin-top: 10px;
	font-size: 14px;
	color: #aaa;
	}
	.status-dot {
	width: 8px;
	height: 8px;
	border-radius: 50%;
	margin-right: 8px;
	}
	.status-dot.connected {
	background-color: #4caf50;
	}
	.status-dot.disconnected {
	background-color: #f44336;
	}
	.status-dot.connecting {
	background-color: #ff9800;
	animation: pulse 1.5s infinite;
	}
	@keyframes pulse {
	0% {
	opacity: 0.6;
	}
	50% {
	opacity: 1;
	}
	100% {
	opacity: 0.6;
	}
	}
	.mouse-logo {
	position: relative;
	width: 40px;
	height: 40px;
	}
	.mouse-ears {
	position: absolute;
	width: 15px;
	height: 15px;
	background-color: var(--primary-color);
	border-radius: 50%;
	}
	.mouse-ear-left {
	top: 0;
	left: 5px;
	}
	.mouse-ear-right {
	top: 0;
	right: 5px;
	}
	.mouse-face {
	position: absolute;
	top: 10px;
	left: 5px;
	width: 30px;
	height: 30px;
	background-color: var(--secondary-color);
	border-radius: 50%;
	}
	</style>
	</head>

	<body>
	<div id="error-toast" class="toast"></div>
	<div class="container">
	<div class="header">
	<div class="logo">
	<div class="mouse-logo">
	<div class="mouse-ears mouse-ear-left"></div>
	<div class="mouse-ears mouse-ear-right"></div>
	<div class="mouse-face"></div>
	</div>
	<h1>MOUSE Extended</h1>
	</div>
	<div class="status-indicator">
	<div id="status-dot" class="status-dot disconnected"></div>
	<span id="status-text">연결 대기 중</span>
	</div>
	</div>

	<div class="tabs-container">
	<button class="tab-button active" onclick="switchTab('voice-chat')">음성 채팅</button>
	<button class="tab-button" onclick="switchTab('mic-whisper')">마이크 전사</button>
	<button class="tab-button" onclick="switchTab('audio-whisper')">오디오 파일</button>
	<button class="tab-button" onclick="switchTab('video-whisper')">비디오 파일</button>
	<button class="tab-button" onclick="switchTab('pdf-whisper')">PDF 번역</button>
	<button class="tab-button" onclick="switchTab('realtime-whisper')">실시간 통역</button>
	</div>

	<!-- Voice Chat Tab (Original) -->
	<div id="voice-chat" class="tab-content active">
	<div class="main-content">
	<div class="sidebar">
	<div class="settings-section">
	<h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정(텍스트 채팅에만 적용)</h3>
	<div class="settings-grid">
	<div class="setting-item">
	<span class="setting-label">웹 검색</span>
	<div id="search-toggle" class="toggle-switch">
	<div class="toggle-slider"></div>
	</div>
	</div>
	<div class="setting-item">
	<span class="setting-label">다국어 번역 채팅</span>
	<select id="language-select">
	<option value="">비활성화</option>
	<option value="ko">한국어 (Korean)</option>
	<option value="en">English</option>
	<option value="es">Español (Spanish)</option>
	<option value="fr">Français (French)</option>
	<option value="de">Deutsch (German)</option>
	<option value="it">Italiano (Italian)</option>
	<option value="pt">Português (Portuguese)</option>
	<option value="ru">Русский (Russian)</option>
	<option value="ja">日本語 (Japanese)</option>
	<option value="zh">中文 (Chinese)</option>
	<option value="ar">العربية (Arabic)</option>
	<option value="hi">हिन्दी (Hindi)</option>
	<option value="nl">Nederlands (Dutch)</option>
	<option value="pl">Polski (Polish)</option>
	<option value="tr">Türkçe (Turkish)</option>
	<option value="vi">Tiếng Việt (Vietnamese)</option>
	<option value="th">ไทย (Thai)</option>
	<option value="id">Bahasa Indonesia</option>
	<option value="sv">Svenska (Swedish)</option>
	<option value="da">Dansk (Danish)</option>
	<option value="no">Norsk (Norwegian)</option>
	<option value="fi">Suomi (Finnish)</option>
	<option value="he">עברית (Hebrew)</option>
	<option value="uk">Українська (Ukrainian)</option>
	<option value="cs">Čeština (Czech)</option>
	<option value="el">Ελληνικά (Greek)</option>
	<option value="ro">Română (Romanian)</option>
	<option value="hu">Magyar (Hungarian)</option>
	<option value="ms">Bahasa Melayu (Malay)</option>
	</select>
	</div>
	</div>
	<div class="text-input-section">
	<label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
	<textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
	</div>
	</div>

	<div class="controls">
	<button id="start-button">대화 시작</button>
	</div>
	</div>

	<div class="chat-section">
	<div class="chat-container">
	<h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화</h3>
	<div class="chat-messages" id="chat-messages"></div>
	<div class="text-input-section" style="margin-top: 10px;">
	<div style="display: flex; gap: 10px;">
	<input type="text" id="text-input" placeholder="텍스트 메시지를 입력하세요..." style="flex-grow: 1;" />
	<button id="send-button" style="display: none;">전송</button>
	</div>
	</div>
	</div>
	</div>
	</div>
	</div>

	<!-- Microphone Whisper Tab -->
	<div id="mic-whisper" class="tab-content">
	<div class="whisper-container">
	<h3>마이크 녹음 → 전사 및 4개 언어 번역</h3>
	<div class="whisper-input-section">
	<button id="mic-record-btn" onclick="toggleMicRecording()">녹음 시작</button>
	<div id="mic-status" style="margin-top: 10px;"></div>
	</div>
	<div class="whisper-output-section">
	<div class="whisper-output">
	<h4>원문</h4>
	<div id="mic-original"></div>
	</div>
	<div class="whisper-output">
	<h4>번역 (EN/ZH/TH/RU)</h4>
	<div id="mic-translation"></div>
	</div>
	</div>
	</div>
	</div>

	<!-- Audio File Whisper Tab -->
	<div id="audio-whisper" class="tab-content">
	<div class="whisper-container">
	<h3>오디오 파일 → 전사 및 4개 언어 번역</h3>
	<div class="whisper-input-section">
	<div class="file-upload-area" id="audio-upload-area">
	<p>오디오 파일을 드래그하거나 클릭하여 업로드</p>
	<input type="file" id="audio-file-input" accept="audio/*" style="display: none;">
	</div>
	</div>
	<div class="whisper-output-section">
	<div class="whisper-output">
	<h4>원문</h4>
	<div id="audio-original"></div>
	</div>
	<div class="whisper-output">
	<h4>번역 (EN/ZH/TH/RU)</h4>
	<div id="audio-translation"></div>
	</div>
	</div>
	</div>
	</div>

	<!-- Video File Whisper Tab -->
	<div id="video-whisper" class="tab-content">
	<div class="whisper-container">
	<h3>비디오 파일 → 오디오 추출 → 전사 및 4개 언어 번역</h3>
	<div class="whisper-input-section">
	<div class="file-upload-area" id="video-upload-area">
	<p>비디오 파일을 드래그하거나 클릭하여 업로드</p>
	<input type="file" id="video-file-input" accept="video/*" style="display: none;">
	</div>
	</div>
	<div class="whisper-output-section">
	<div class="whisper-output">
	<h4>원문</h4>
	<div id="video-original"></div>
	</div>
	<div class="whisper-output">
	<h4>번역 (EN/ZH/TH/RU)</h4>
	<div id="video-translation"></div>
	</div>
	</div>
	</div>
	</div>

	<!-- PDF Whisper Tab -->
	<div id="pdf-whisper" class="tab-content">
	<div class="whisper-container">
	<h3>PDF 파일 → 텍스트 추출 → 4개 언어 번역</h3>
	<div class="whisper-input-section">
	<div class="file-upload-area" id="pdf-upload-area">
	<p>PDF 파일을 드래그하거나 클릭하여 업로드</p>
	<input type="file" id="pdf-file-input" accept=".pdf" style="display: none;">
	</div>
	<div class="setting-item" style="margin-top: 15px;">
	<span class="setting-label">최대 페이지:</span>
	<input type="number" id="pdf-max-pages" value="10" min="1" max="50" style="width: 80px;">
	</div>
	</div>
	<div class="whisper-output-section">
	<div class="whisper-output">
	<h4>추출된 텍스트</h4>
	<div id="pdf-original"></div>
	</div>
	<div class="whisper-output">
	<h4>번역 (EN/ZH/TH/RU)</h4>
	<div id="pdf-translation"></div>
	</div>
	</div>
	</div>
	</div>

	<!-- Realtime Whisper Tab -->
	<div id="realtime-whisper" class="tab-content">
	<div class="whisper-container">
	<h3>실시간 통역 (Korean → EN/ZH/TH/RU)</h3>
	<p>10-15초 문장 단위로 자동 전환 — 최신 내용이 위에 표시됩니다.</p>
	<div class="whisper-input-section">
	<button id="realtime-start-btn" onclick="toggleRealtimeRecording()">실시간 통역 시작</button>
	<div id="realtime-status" style="margin-top: 10px;"></div>
	</div>
	<div class="realtime-output" id="realtime-output"></div>
	</div>
	</div>
	</div>
	<audio id="audio-output"></audio>

	<script>
	// Tab switching functionality - 맨 앞에 배치
	function switchTab(tabName) {
	console.log('Switching to tab:', tabName);

	// Hide all tabs
	document.querySelectorAll('.tab-content').forEach(tab => {
	tab.style.display = 'none';
	tab.classList.remove('active');
	});

	console.log('All initialized!');

	// Remove active from all buttons
	document.querySelectorAll('.tab-button').forEach(btn => {
	btn.classList.remove('active');
	});

	// Show selected tab
	const selectedTab = document.getElementById(tabName);
	if (selectedTab) {
	selectedTab.style.display = 'flex';
	selectedTab.classList.add('active');
	}

	// Mark button as active
	event.target.classList.add('active');
	}

	// Global variables
	let peerConnection = null;
	let webrtc_id = null;
	let webSearchEnabled = false;
	let selectedLanguage = "";
	let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
	let audioLevel = 0;
	let animationFrame = null;
	let audioContext = null;
	let analyser = null;
	let audioSource = null;
	let dataChannel = null;
	let isVoiceActive = false;

	// Whisper variables
	let micRecorder = null;
	let isRecording = false;
	let realtimeRecorder = null;
	let isRealtimeRecording = false;
	let realtimeStream = null;

	async function sendTextMessage() {
	const textInput = document.getElementById('text-input');
	const chatMessages = document.getElementById('chat-messages');
	const message = textInput.value.trim();
	if (!message) return;

	// Add user message to chat
	addMessage('user', message);
	textInput.value = '';

	// Show sending indicator
	const typingIndicator = document.createElement('div');
	typingIndicator.classList.add('message', 'assistant');
	typingIndicator.textContent = '입력 중...';
	typingIndicator.id = 'typing-indicator';
	chatMessages.appendChild(typingIndicator);
	chatMessages.scrollTop = chatMessages.scrollHeight;

	try {
	// Send to text chat endpoint
	const response = await fetch('/chat/text', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({
	message: message,
	web_search_enabled: webSearchEnabled,
	target_language: selectedLanguage,
	system_prompt: systemPrompt
	})
	});

	const data = await response.json();

	// Remove typing indicator
	const indicator = document.getElementById('typing-indicator');
	if (indicator) indicator.remove();

	if (data.error) {
	showError(data.error);
	} else {
	// Add assistant response
	let content = data.response;
	if (selectedLanguage && data.language) {
	content += ` <span class="language-info">[${data.language}]</span>`;
	}
	addMessage('assistant', content);
	}
	} catch (error) {
	console.error('Error sending text message:', error);
	const indicator = document.getElementById('typing-indicator');
	if (indicator) indicator.remove();
	showError('메시지 전송 중 오류가 발생했습니다.');
	}
	}

	function updateStatus(state) {
	const statusDot = document.getElementById('status-dot');
	const statusText = document.getElementById('status-text');
	const sendButton = document.getElementById('send-button');

	statusDot.className = 'status-dot ' + state;
	if (state === 'connected') {
	statusText.textContent = '연결됨';
	if (sendButton) sendButton.style.display = 'block';
	isVoiceActive = true;
	} else if (state === 'connecting') {
	statusText.textContent = '연결 중...';
	if (sendButton) sendButton.style.display = 'none';
	} else {
	statusText.textContent = '연결 대기 중';
	if (sendButton) sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
	isVoiceActive = false;
	}
	}

	function updateButtonState() {
	const button = document.getElementById('start-button');
	if (peerConnection && (peerConnection.connectionState === 'connecting' \|\| peerConnection.connectionState === 'new')) {
	button.innerHTML = `
	<div class="icon-with-spinner">
	<div class="spinner"></div>
	<span>연결 중...</span>
	</div>
	`;
	updateStatus('connecting');
	} else if (peerConnection && peerConnection.connectionState === 'connected') {
	button.innerHTML = `
	<div class="icon-with-spinner">
	<div class="audio-visualizer" id="audio-visualizer">
	<div class="visualizer-bar"></div>
	<div class="visualizer-bar"></div>
	<div class="visualizer-bar"></div>
	<div class="visualizer-bar"></div>
	<div class="visualizer-bar"></div>
	</div>
	<span>대화 종료</span>
	</div>
	`;
	updateStatus('connected');
	} else {
	button.innerHTML = '대화 시작';
	updateStatus('disconnected');
	}
	}

	function setupAudioVisualization(stream) {
	audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();
	analyser = audioContext.createAnalyser();
	audioSource = audioContext.createMediaStreamSource(stream);
	audioSource.connect(analyser);
	analyser.fftSize = 256;
	const bufferLength = analyser.frequencyBinCount;
	const dataArray = new Uint8Array(bufferLength);

	const visualizerBars = document.querySelectorAll('.visualizer-bar');
	const barCount = visualizerBars.length;

	function updateAudioLevel() {
	analyser.getByteFrequencyData(dataArray);

	for (let i = 0; i < barCount; i++) {
	const start = Math.floor(i * (bufferLength / barCount));
	const end = Math.floor((i + 1) * (bufferLength / barCount));

	let sum = 0;
	for (let j = start; j < end; j++) {
	sum += dataArray[j];
	}

	const average = sum / (end - start) / 255;
	const scaleY = 0.1 + average * 0.9;
	visualizerBars[i].style.transform = `scaleY(${scaleY})`;
	}

	animationFrame = requestAnimationFrame(updateAudioLevel);
	}

	updateAudioLevel();
	}

	function showError(message) {
	const toast = document.getElementById('error-toast');
	toast.textContent = message;
	toast.className = 'toast error';
	toast.style.display = 'block';
	setTimeout(() => {
	toast.style.display = 'none';
	}, 5000);
	}

	function showSuccess(message) {
	const toast = document.getElementById('error-toast');
	toast.textContent = message;
	toast.className = 'toast success';
	toast.style.display = 'block';
	setTimeout(() => {
	toast.style.display = 'none';
	}, 3000);
	}

	async function setupWebRTC() {
	const audioOutput = document.getElementById('audio-output');
	const config = typeof __RTC_CONFIGURATION__ !== 'undefined' ? __RTC_CONFIGURATION__ : {iceServers: [{urls: 'stun:stun.l.google.com:19302'}]};
	peerConnection = new RTCPeerConnection(config);
	const timeoutId = setTimeout(() => {
	const toast = document.getElementById('error-toast');
	toast.textContent = "연결이 평소보다 오래 걸리고 있습니다. VPN을 사용 중이신가요?";
	toast.className = 'toast warning';
	toast.style.display = 'block';
	setTimeout(() => {
	toast.style.display = 'none';
	}, 5000);
	}, 5000);
	try {
	const stream = await navigator.mediaDevices.getUserMedia({
	audio: true
	});
	setupAudioVisualization(stream);
	stream.getTracks().forEach(track => {
	peerConnection.addTrack(track, stream);
	});
	peerConnection.addEventListener('track', (evt) => {
	if (audioOutput.srcObject !== evt.streams[0]) {
	audioOutput.srcObject = evt.streams[0];
	audioOutput.play();
	}
	});

	// Create data channel for text messages
	dataChannel = peerConnection.createDataChannel('text');
	dataChannel.onopen = () => {
	console.log('Data channel opened');
	};
	dataChannel.onmessage = (event) => {
	const eventJson = JSON.parse(event.data);
	if (eventJson.type === "error") {
	showError(eventJson.message);
	}
	};

	const offer = await peerConnection.createOffer();
	await peerConnection.setLocalDescription(offer);
	await new Promise((resolve) => {
	if (peerConnection.iceGatheringState === "complete") {
	resolve();
	} else {
	const checkState = () => {
	if (peerConnection.iceGatheringState === "complete") {
	peerConnection.removeEventListener("icegatheringstatechange", checkState);
	resolve();
	}
	};
	peerConnection.addEventListener("icegatheringstatechange", checkState);
	}
	});
	peerConnection.addEventListener('connectionstatechange', () => {
	console.log('connectionstatechange', peerConnection.connectionState);
	if (peerConnection.connectionState === 'connected') {
	clearTimeout(timeoutId);
	const toast = document.getElementById('error-toast');
	toast.style.display = 'none';
	}
	updateButtonState();
	});
	webrtc_id = Math.random().toString(36).substring(7);

	const response = await fetch('/webrtc/offer', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({
	sdp: peerConnection.localDescription.sdp,
	type: peerConnection.localDescription.type,
	webrtc_id: webrtc_id,
	web_search_enabled: webSearchEnabled,
	target_language: selectedLanguage,
	system_prompt: systemPrompt
	})
	});
	const serverResponse = await response.json();
	if (serverResponse.status === 'failed') {
	showError(serverResponse.meta.error === 'concurrency_limit_reached'
	? `너무 많은 연결입니다. 최대 한도는 ${serverResponse.meta.limit} 입니다.`
	: serverResponse.meta.error);
	stop();
	return;
	}
	await peerConnection.setRemoteDescription(serverResponse);
	const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
	eventSource.addEventListener("output", (event) => {
	const eventJson = JSON.parse(event.data);
	let content = eventJson.content;

	if (selectedLanguage && eventJson.language) {
	content += ` <span class="language-info">[${eventJson.language}]</span>`;
	}
	addMessage("assistant", content);
	});
	eventSource.addEventListener("search", (event) => {
	const eventJson = JSON.parse(event.data);
	if (eventJson.query) {
	addMessage("search-result", `웹 검색 중: "${eventJson.query}"`);
	}
	});
	} catch (err) {
	clearTimeout(timeoutId);
	console.error('Error setting up WebRTC:', err);
	showError('연결을 설정하지 못했습니다. 다시 시도해 주세요.');
	stop();
	}
	}

	function addMessage(role, content) {
	const chatMessages = document.getElementById('chat-messages');
	const messageDiv = document.createElement('div');
	messageDiv.classList.add('message', role);

	if (content.includes('<span')) {
	messageDiv.innerHTML = content;
	} else {
	messageDiv.textContent = content;
	}
	chatMessages.appendChild(messageDiv);
	chatMessages.scrollTop = chatMessages.scrollHeight;
	}

	function stop() {
	console.log('[STOP] Stopping connection...');

	// Cancel animation frame first
	if (animationFrame) {
	cancelAnimationFrame(animationFrame);
	animationFrame = null;
	}

	// Close audio context
	if (audioContext) {
	audioContext.close();
	audioContext = null;
	analyser = null;
	audioSource = null;
	}

	// Close data channel
	if (dataChannel) {
	dataChannel.close();
	dataChannel = null;
	}

	// Close peer connection
	if (peerConnection) {
	console.log('[STOP] Current connection state:', peerConnection.connectionState);

	// Stop all transceivers
	if (peerConnection.getTransceivers) {
	peerConnection.getTransceivers().forEach(transceiver => {
	if (transceiver.stop) {
	transceiver.stop();
	}
	});
	}

	// Stop all senders
	if (peerConnection.getSenders) {
	peerConnection.getSenders().forEach(sender => {
	if (sender.track) {
	sender.track.stop();
	}
	});
	}

	// Stop all receivers
	if (peerConnection.getReceivers) {
	peerConnection.getReceivers().forEach(receiver => {
	if (receiver.track) {
	receiver.track.stop();
	}
	});
	}

	// Close the connection
	peerConnection.close();

	// Clear the reference
	peerConnection = null;

	console.log('[STOP] Connection closed');
	}

	// Reset audio level
	audioLevel = 0;
	isVoiceActive = false;

	// Update UI
	updateButtonState();

	// Clear any existing webrtc_id
	if (webrtc_id) {
	console.log('[STOP] Clearing webrtc_id:', webrtc_id);
	webrtc_id = null;
	}
	}

	// Whisper Tab Functions

	// Microphone recording
	async function toggleMicRecording() {
	const btn = document.getElementById('mic-record-btn');
	const status = document.getElementById('mic-status');

	if (!isRecording) {
	try {
	const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
	micRecorder = new MediaRecorder(stream);
	const chunks = [];

	micRecorder.ondataavailable = (e) => chunks.push(e.data);
	micRecorder.onstop = async () => {
	const blob = new Blob(chunks, { type: 'audio/webm' });
	await processAudioBlob(blob, 'mic');
	stream.getTracks().forEach(track => track.stop());
	};

	micRecorder.start();
	isRecording = true;
	btn.textContent = '녹음 중지';
	status.innerHTML = '<div class="recording-indicator"><div class="recording-dot"></div>녹음 중...</div>';
	} catch (err) {
	showError('마이크 접근 권한이 필요합니다.');
	}
	} else {
	micRecorder.stop();
	isRecording = false;
	btn.textContent = '녹음 시작';
	status.textContent = '처리 중...';
	}
	}

	// Process audio blob (for microphone recording)
	async function processAudioBlob(blob, type) {
	const formData = new FormData();
	formData.append('audio', blob, 'recording.webm');

	try {
	const response = await fetch('/whisper/transcribe', {
	method: 'POST',
	body: formData
	});

	const result = await response.json();
	if (result.error) {
	showError(result.error);
	} else {
	document.getElementById(`${type}-original`).textContent = result.text;
	document.getElementById(`${type}-translation`).innerHTML = result.translation.replace(/\n/g, '<br>');
	document.getElementById(`${type}-status`).textContent = '';
	}
	} catch (error) {
	showError('처리 중 오류가 발생했습니다.');
	document.getElementById(`${type}-status`).textContent = '';
	}
	}

	// Process audio file
	async function processAudioFile(file) {
	const formData = new FormData();
	formData.append('audio', file);

	try {
	showSuccess('오디오 파일 처리 중...');
	const response = await fetch('/whisper/audio', {
	method: 'POST',
	body: formData
	});

	const result = await response.json();
	if (result.error) {
	showError(result.error);
	} else {
	document.getElementById('audio-original').textContent = result.text;
	document.getElementById('audio-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
	}
	} catch (error) {
	showError('오디오 파일 처리 중 오류가 발생했습니다.');
	}
	}

	// Process video file
	async function processVideoFile(file) {
	const formData = new FormData();
	formData.append('video', file);

	try {
	showSuccess('비디오 파일 처리 중... (시간이 걸릴 수 있습니다)');
	const response = await fetch('/whisper/video', {
	method: 'POST',
	body: formData
	});

	const result = await response.json();
	if (result.error) {
	showError(result.error);
	} else {
	document.getElementById('video-original').textContent = result.text;
	document.getElementById('video-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
	}
	} catch (error) {
	showError('비디오 파일 처리 중 오류가 발생했습니다.');
	}
	}

	// Process PDF file
	async function processPDFFile(file) {
	const formData = new FormData();
	formData.append('pdf', file);
	formData.append('max_pages', document.getElementById('pdf-max-pages').value);

	try {
	showSuccess('PDF 파일 처리 중...');
	const response = await fetch('/whisper/pdf', {
	method: 'POST',
	body: formData
	});

	const result = await response.json();
	if (result.error) {
	showError(result.error);
	} else {
	document.getElementById('pdf-original').textContent = result.text;
	document.getElementById('pdf-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
	}
	} catch (error) {
	showError('PDF 파일 처리 중 오류가 발생했습니다.');
	}
	}

	// Realtime recording
	let realtimeEventSource = null;

	async function toggleRealtimeRecording() {
	const btn = document.getElementById('realtime-start-btn');
	const status = document.getElementById('realtime-status');
	const output = document.getElementById('realtime-output');

	if (!isRealtimeRecording) {
	try {
	const response = await fetch('/whisper/realtime/start', {
	method: 'POST'
	});

	const result = await response.json();
	if (result.session_id) {
	// Start listening for results
	realtimeEventSource = new EventSource(`/whisper/realtime/stream?session_id=${result.session_id}`);

	realtimeEventSource.onmessage = (event) => {
	const data = JSON.parse(event.data);
	if (data.timestamp) {
	const segment = document.createElement('div');
	segment.style.borderBottom = '1px solid #333';
	segment.style.paddingBottom = '15px';
	segment.style.marginBottom = '15px';
	segment.innerHTML = `
	<strong>[${data.timestamp}]</strong><br>
	<strong>[KO]</strong> ${data.text}<br>
	${data.translation.replace(/\n/g, '<br>')}
	`;
	output.insertBefore(segment, output.firstChild);
	}
	};

	realtimeEventSource.onerror = () => {
	stopRealtimeRecording();
	showError('실시간 통역 연결이 끊어졌습니다.');
	};

	// Start recording
	realtimeStream = await navigator.mediaDevices.getUserMedia({ audio: true });

	isRealtimeRecording = true;
	btn.textContent = '통역 중지';
	status.innerHTML = '<div class="recording-indicator"><div class="recording-dot"></div>실시간 통역 중...</div>';

	// Send audio data periodically
	startRealtimeAudioCapture(result.session_id);
	}
	} catch (err) {
	showError('실시간 통역을 시작할 수 없습니다.');
	}
	} else {
	stopRealtimeRecording();
	}
	}

	function stopRealtimeRecording() {
	if (realtimeEventSource) {
	realtimeEventSource.close();
	realtimeEventSource = null;
	}

	if (realtimeStream) {
	realtimeStream.getTracks().forEach(track => track.stop());
	realtimeStream = null;
	}

	isRealtimeRecording = false;
	document.getElementById('realtime-start-btn').textContent = '실시간 통역 시작';
	document.getElementById('realtime-status').textContent = '';
	}

	async function startRealtimeAudioCapture(sessionId) {
	const audioContext = new AudioContext({ sampleRate: 16000 });
	const source = audioContext.createMediaStreamSource(realtimeStream);
	const processor = audioContext.createScriptProcessor(4096, 1, 1);

	let audioBuffer = [];
	let silenceFrames = 0;
	const SILENCE_THRESHOLD = 0.01;
	const MIN_BUFFER_SIZE = 16000 * 2; // 2 seconds minimum
	const MAX_BUFFER_SIZE = 16000 * 15; // 15 seconds maximum

	processor.onaudioprocess = async (e) => {
	if (!isRealtimeRecording) return;

	const inputData = e.inputBuffer.getChannelData(0);
	audioBuffer.push(...inputData);

	// Simple voice activity detection
	const rms = Math.sqrt(inputData.reduce((sum, val) => sum + val * val, 0) / inputData.length);

	if (rms < SILENCE_THRESHOLD) {
	silenceFrames++;
	} else {
	silenceFrames = 0;
	}

	// Send audio when we have enough silence or max buffer reached
	if ((silenceFrames > 20 && audioBuffer.length > MIN_BUFFER_SIZE) \|\|
	audioBuffer.length > MAX_BUFFER_SIZE) {

	const audioData = new Float32Array(audioBuffer);
	audioBuffer = [];
	silenceFrames = 0;

	// Convert to 16-bit PCM
	const pcmData = new Int16Array(audioData.length);
	for (let i = 0; i < audioData.length; i++) {
	pcmData[i] = Math.max(-32768, Math.min(32767, audioData[i] * 32768));
	}

	// Send to server
	const formData = new FormData();
	formData.append('audio', new Blob([pcmData.buffer], { type: 'audio/pcm' }));
	formData.append('session_id', sessionId);

	fetch('/whisper/realtime/process', {
	method: 'POST',
	body: formData
	}).catch(err => console.error('Error sending audio:', err));
	}
	};

	source.connect(processor);
	processor.connect(audioContext.destination);
	}

	// Simple initialization
	window.onload = function() {
	console.log('Page loaded!');

	// Web search toggle
	document.getElementById('search-toggle').onclick = function() {
	webSearchEnabled = !webSearchEnabled;
	this.classList.toggle('active', webSearchEnabled);
	console.log('Web search:', webSearchEnabled);
	};

	// Language select
	document.getElementById('language-select').onchange = function() {
	selectedLanguage = this.value;
	console.log('Language:', selectedLanguage);
	};

	// System prompt
	document.getElementById('system-prompt').oninput = function() {
	systemPrompt = this.value \|\| "You are a helpful assistant. Respond in a friendly and professional manner.";
	};

	// Text input enter key
	document.getElementById('text-input').onkeypress = function(e) {
	if (e.key === 'Enter' && !e.shiftKey) {
	e.preventDefault();
	sendTextMessage();
	}
	};

	// Send button
	document.getElementById('send-button').onclick = sendTextMessage;
	document.getElementById('send-button').style.display = 'block';

	// Start button
	document.getElementById('start-button').onclick = function() {
	if (!peerConnection \|\| peerConnection.connectionState !== 'connected') {
	setupWebRTC();
	} else {
	stop();
	}
	};

	// File upload areas
	const audioUploadArea = document.getElementById('audio-upload-area');
	if (audioUploadArea) {
	audioUploadArea.onclick = function() {
	document.getElementById('audio-file-input').click();
	};
	}

	const videoUploadArea = document.getElementById('video-upload-area');
	if (videoUploadArea) {
	videoUploadArea.onclick = function() {
	document.getElementById('video-file-input').click();
	};
	}

	const pdfUploadArea = document.getElementById('pdf-upload-area');
	if (pdfUploadArea) {
	pdfUploadArea.onclick = function() {
	document.getElementById('pdf-file-input').click();
	};
	}

	// File input handlers
	const audioFileInput = document.getElementById('audio-file-input');
	if (audioFileInput) {
	audioFileInput.onchange = function(e) {
	if (e.target.files[0]) processAudioFile(e.target.files[0]);
	};
	}

	const videoFileInput = document.getElementById('video-file-input');
	if (videoFileInput) {
	videoFileInput.onchange = function(e) {
	if (e.target.files[0]) processVideoFile(e.target.files[0]);
	};
	}

	const pdfFileInput = document.getElementById('pdf-file-input');
	if (pdfFileInput) {
	pdfFileInput.onchange = function(e) {
	if (e.target.files[0]) processPDFFile(e.target.files[0]);
	};
	}

	// Drag and drop handlers
	['audio', 'video', 'pdf'].forEach(type => {
	const area = document.getElementById(`${type}-upload-area`);
	if (area) {
	area.ondragover = function(e) {
	e.preventDefault();
	area.classList.add('drag-over');
	};

	area.ondragleave = function() {
	area.classList.remove('drag-over');
	};

	area.ondrop = function(e) {
	e.preventDefault();
	area.classList.remove('drag-over');
	const file = e.dataTransfer.files[0];
	if (file) {
	if (type === 'audio') processAudioFile(file);
	else if (type === 'video') processVideoFile(file);
	else if (type === 'pdf') processPDFFile(file);
	}
	};
	}
	});
	};
	</script>
	</body>

	</html>"""

	# Whisper model loader
	def _get_whisper_pipe():
	"""Lazy load Whisper pipeline"""
	if not hasattr(_get_whisper_pipe, "pipe"):
	device = 0 if torch.cuda.is_available() else "cpu"
	_get_whisper_pipe.pipe = pipeline(
	task="automatic-speech-recognition",
	model=WHISPER_MODEL_NAME,
	chunk_length_s=30,
	device=device,
	)
	return _get_whisper_pipe.pipe

	# Audio helpers for Whisper
	def _ensure_16k_whisper(y: np.ndarray, sr: int) -> tuple[np.ndarray, int]:
	"""Resample audio to 16kHz for Whisper"""
	if sr == WHISPER_SAMPLE_RATE:
	return y.astype(np.float32), WHISPER_SAMPLE_RATE
	g = np.gcd(sr, WHISPER_SAMPLE_RATE)
	y = sps.resample_poly(y, WHISPER_SAMPLE_RATE // g, sr // g).astype(np.float32)
	return y, WHISPER_SAMPLE_RATE

	def _should_flush_whisper(buffer: np.ndarray, sr: int) -> bool:
	"""Check if audio buffer should be flushed for processing"""
	dur = len(buffer) / sr
	if dur < MIN_SEG_SEC:
	return False
	tail_len = int(SILENCE_SEC * sr)
	tail = buffer[-tail_len:]
	rms = np.sqrt(np.mean(tail ** 2)) if len(tail) else 1.0
	end_of_sentence = rms < SILENCE_THRESH
	return end_of_sentence or dur >= MAX_SEG_SEC

	# Translation helper
	def _translate_text_4langs(text: str) -> str:
	"""Translate text to 4 languages using OpenAI"""
	try:
	client = openai.OpenAI()
	prompt = (
	"Translate the following text into English (EN), Chinese (ZH), Thai (TH) and Russian (RU).\n"
	"Return ONLY the translations in this format (one per line):\n"
	"EN: <english>\nZH: <chinese>\nTH: <thai>\nRU: <russian>\n\n"
	f"Text: {text}"
	)

	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are a professional translator."},
	{"role": "user", "content": prompt}
	],
	temperature=0.7,
	max_tokens=512
	)

	return response.choices[0].message.content.strip()
	except Exception as e:
	print(f"Translation error: {e}")
	return f"Translation error: {str(e)}"

	# ffmpeg check
	def _check_ffmpeg() -> bool:
	try:
	subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
	return True
	except Exception:
	return False

	_HAS_FFMPEG = _check_ffmpeg()

	def extract_audio_from_video(video_path: str) -> str:
	"""Extract audio from video file"""
	tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	tmp.close()

	if _HAS_FFMPEG:
	cmd = [
	"ffmpeg", "-i", video_path, "-vn",
	"-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", "-y", tmp.name
	]
	result = subprocess.run(cmd, capture_output=True)
	if result.returncode != 0:
	os.unlink(tmp.name)
	raise RuntimeError("ffmpeg error extracting audio")
	return tmp.name
	else:
	raise RuntimeError("ffmpeg is required for video processing")

	# GPU workers for Whisper
	def gpu_transcribe_whisper(audio_path: str) -> str:
	"""Transcribe audio using Whisper on GPU"""
	pipe = _get_whisper_pipe()
	result = pipe(audio_path, batch_size=WHISPER_BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)
	return result["text"].strip()

	def gpu_asr_translate_whisper(audio: np.ndarray, sr: int) -> str:
	"""Transcribe and translate audio for realtime"""
	pipe = _get_whisper_pipe()
	ko = pipe({"array": audio, "sampling_rate": sr}, batch_size=WHISPER_BATCH_SIZE)["text"].strip()
	trans = _translate_text_4langs(ko).replace("\n", "<br>")
	ts = datetime.now(SEOUL_TZ).strftime("%Y-%m-%d %H:%M:%S")
	return f"[{ts}]<br>[KO] {ko}<br>{trans}<br>{'-'*40}<br>"

	class BraveSearchClient:
	"""Brave Search API client"""
	def __init__(self, api_key: str):
	self.api_key = api_key
	self.base_url = "https://api.search.brave.com/res/v1/web/search"

	async def search(self, query: str, count: int = 10) -> List[Dict]:
	"""Perform a web search using Brave Search API"""
	if not self.api_key:
	return []

	headers = {
	"Accept": "application/json",
	"X-Subscription-Token": self.api_key
	}
	params = {
	"q": query,
	"count": count,
	"lang": "ko"
	}

	async with httpx.AsyncClient() as client:
	try:
	response = await client.get(self.base_url, headers=headers, params=params)
	response.raise_for_status()
	data = response.json()

	results = []
	if "web" in data and "results" in data["web"]:
	for result in data["web"]["results"][:count]:
	results.append({
	"title": result.get("title", ""),
	"url": result.get("url", ""),
	"description": result.get("description", "")
	})
	return results
	except Exception as e:
	print(f"Brave Search error: {e}")
	return []


	# Initialize search client globally
	brave_api_key = os.getenv("BSEARCH_API")
	search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
	print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")

	# Store connection settings
	connection_settings = {}

	# Store realtime sessions
	realtime_sessions = {}

	# Initialize OpenAI client for text chat
	client = openai.AsyncOpenAI()

	def get_translation_instructions(target_language: str) -> str:
	"""Get instructions for translation based on target language"""
	if not target_language:
	return ""

	language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)
	return (
	f"\n\nIMPORTANT: You must respond in {language_name} ({target_language}). "
	f"Translate all your responses to {language_name}."
	)

	def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
	chatbot.append({"role": "assistant", "content": response.transcript})
	return chatbot


	async def process_text_chat(message: str, web_search_enabled: bool, target_language: str,
	system_prompt: str) -> Dict[str, str]:
	"""Process text chat using GPT-4o-mini model"""
	try:
	# If target language is set, override system prompt completely
	if target_language:
	language_name = SUPPORTED_LANGUAGES.get(target_language, target_language)

	# Create system prompt in target language
	if target_language == "en":
	base_instructions = f"You are a helpful assistant. You speak ONLY English. Never use Korean or any other language. {system_prompt}"
	user_prefix = "Please respond in English: "
	elif target_language == "ja":
	base_instructions = f"あなたは親切なアシスタントです。日本語のみを話します。韓国語や他の言語は絶対に使用しません。{system_prompt}"
	user_prefix = "日本語で答えてください: "
	elif target_language == "zh":
	base_instructions = f"你是一个乐于助人的助手。你只说中文。绝不使用韩语或其他语言。{system_prompt}"
	user_prefix = "请用中文回答: "
	elif target_language == "es":
	base_instructions = f"Eres un asistente útil. Solo hablas español. Nunca uses coreano u otros idiomas. {system_prompt}"
	user_prefix = "Por favor responde en español: "
	else:
	base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}. {system_prompt}"
	user_prefix = f"Please respond in {language_name}: "
	else:
	base_instructions = system_prompt or "You are a helpful assistant."
	user_prefix = ""

	messages = [
	{"role": "system", "content": base_instructions}
	]

	# Handle web search if enabled
	if web_search_enabled and search_client:
	# Check if the message requires web search
	search_keywords = ["날씨", "기온", "비", "눈", "뉴스", "소식", "현재", "최근",
	"오늘", "지금", "가격", "환율", "주가", "weather", "news",
	"current", "today", "price", "2024", "2025"]

	should_search = any(keyword in message.lower() for keyword in search_keywords)

	if should_search:
	# Perform web search
	search_results = await search_client.search(message)
	if search_results:
	search_context = "웹 검색 결과:\n\n"
	for i, result in enumerate(search_results[:5], 1):
	search_context += f"{i}. {result['title']}\n{result['description']}\n\n"

	# Add search context in target language if set
	if target_language:
	search_instruction = f"Use this search information but respond in {SUPPORTED_LANGUAGES.get(target_language, target_language)} only: "
	else:
	search_instruction = "다음 웹 검색 결과를 참고하여 답변하세요: "

	messages.append({
	"role": "system",
	"content": search_instruction + "\n\n" + search_context
	})

	# Add user message with language prefix
	messages.append({"role": "user", "content": user_prefix + message})

	# Call GPT-4o-mini
	response = await client.chat.completions.create(
	model="gpt-4o-mini",
	messages=messages,
	temperature=0.7,
	max_tokens=2000
	)

	response_text = response.choices[0].message.content

	# Final check - remove any Korean if target language is not Korean
	if target_language and target_language != "ko":
	import re
	if re.search(r'[가-힣]', response_text):
	print(f"[TEXT CHAT] WARNING: Korean detected in response for {target_language}")
	# Try again with stronger prompt
	messages[-1] = {"role": "user", "content": f"ONLY {SUPPORTED_LANGUAGES.get(target_language, target_language)}, NO KOREAN: {message}"}
	retry_response = await client.chat.completions.create(
	model="gpt-4o-mini",
	messages=messages,
	temperature=0.3,
	max_tokens=2000
	)
	response_text = retry_response.choices[0].message.content

	print(f"[TEXT CHAT] Target language: {target_language}")
	print(f"[TEXT CHAT] Response preview: {response_text[:100]}...")

	return {
	"response": response_text,
	"language": SUPPORTED_LANGUAGES.get(target_language, "") if target_language else ""
	}

	except Exception as e:
	print(f"Error in text chat: {e}")
	return {"error": str(e)}


	class OpenAIHandler(AsyncStreamHandler):
	def __init__(self, web_search_enabled: bool = False, target_language: str = "",
	system_prompt: str = "", webrtc_id: str = None) -> None:
	super().__init__(
	expected_layout="mono",
	output_sample_rate=SAMPLE_RATE,
	output_frame_size=480,
	input_sample_rate=SAMPLE_RATE,
	)
	self.connection = None
	self.output_queue = asyncio.Queue()
	self.search_client = search_client
	self.function_call_in_progress = False
	self.current_function_args = ""
	self.current_call_id = None
	self.webrtc_id = webrtc_id
	self.web_search_enabled = web_search_enabled
	self.target_language = target_language
	self.system_prompt = system_prompt

	print(f"[INIT] Handler created with web_search={web_search_enabled}, "
	f"target_language={target_language}")

	def copy(self):
	# Get the most recent settings
	if connection_settings:
	# Get the most recent webrtc_id
	recent_ids = sorted(connection_settings.keys(),
	key=lambda k: connection_settings[k].get('timestamp', 0),
	reverse=True)
	if recent_ids:
	recent_id = recent_ids[0]
	settings = connection_settings[recent_id]

	# Log the settings being copied
	print(f"[COPY] Copying settings from {recent_id}:")

	return OpenAIHandler(
	web_search_enabled=settings.get('web_search_enabled', False),
	target_language=settings.get('target_language', ''),
	system_prompt=settings.get('system_prompt', ''),
	webrtc_id=recent_id
	)

	print(f"[COPY] No settings found, creating default handler")
	return OpenAIHandler(web_search_enabled=False)

	async def search_web(self, query: str) -> str:
	"""Perform web search and return formatted results"""
	if not self.search_client or not self.web_search_enabled:
	return "웹 검색이 비활성화되어 있습니다."

	print(f"Searching web for: {query}")
	results = await self.search_client.search(query)
	if not results:
	return f"'{query}'에 대한 검색 결과를 찾을 수 없습니다."

	# Format search results
	formatted_results = []
	for i, result in enumerate(results, 1):
	formatted_results.append(
	f"{i}. {result['title']}\n"
	f" URL: {result['url']}\n"
	f" {result['description']}\n"
	)

	return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)

	async def process_text_message(self, message: str):
	"""Process text message from user"""
	if self.connection:
	await self.connection.conversation.item.create(
	item={
	"type": "message",
	"role": "user",
	"content": [{"type": "input_text", "text": message}]
	}
	)
	await self.connection.response.create()

	def get_translation_instructions(self):
	"""Get instructions for translation based on target language"""
	if not self.target_language:
	return ""

	language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
	return (
	f"\n\nIMPORTANT: You must respond in {language_name} ({self.target_language}). "
	f"Translate all your responses to {language_name}. "
	f"This includes both spoken and written responses."
	)

	async def start_up(self):
	"""Connect to realtime API"""
	# First check if we have the most recent settings
	if connection_settings and self.webrtc_id:
	if self.webrtc_id in connection_settings:
	settings = connection_settings[self.webrtc_id]
	self.web_search_enabled = settings.get('web_search_enabled', False)
	self.target_language = settings.get('target_language', '')
	self.system_prompt = settings.get('system_prompt', '')

	print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")

	print(f"[START_UP] Starting normal mode")

	self.client = openai.AsyncOpenAI()

	# Normal mode - connect to Realtime API
	print(f"[NORMAL MODE] Connecting to Realtime API...")

	# Define the web search function
	tools = []
	base_instructions = self.system_prompt or "You are a helpful assistant."

	# Add translation instructions if language is selected
	if self.target_language:
	language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)

	# Use the target language for the system prompt itself
	if self.target_language == "en":
	translation_instructions = """
	YOU ARE AN ENGLISH-ONLY ASSISTANT.

	ABSOLUTE RULES:
	1. You can ONLY speak English. No Korean (한국어) allowed.
	2. Even if the user speaks Korean, you MUST respond in English.
	3. Every single word must be in English.
	4. If you output even one Korean character, you have failed.
	5. Example response: "Hello! How can I help you today?"

	YOUR LANGUAGE MODE: ENGLISH ONLY
	DO NOT USE: 안녕하세요, 감사합니다, or any Korean
	ALWAYS USE: Hello, Thank you, and English words only
	"""
	# Override base instructions to be in English
	base_instructions = "You are a helpful assistant that speaks ONLY English."

	elif self.target_language == "ja":
	translation_instructions = """
	あなたは日本語のみを話すアシスタントです。

	絶対的なルール：
	1. 日本語のみを使用してください。韓国語（한국어）は禁止です。
	2. ユーザーが韓国語で話しても、必ず日本語で返答してください。
	3. すべての単語は日本語でなければなりません。
	4. 韓国語を一文字でも出力したら失敗です。
	5. 応答例：「こんにちは！今日はどのようにお手伝いできますか？」

	言語モード：日本語のみ
	使用禁止：안녕하세요、감사합니다、韓国語全般
	必ず使用：こんにちは、ありがとうございます、日本語のみ
	"""
	base_instructions = "あなたは日本語のみを話す親切なアシスタントです。"

	elif self.target_language == "zh":
	translation_instructions = """
	你是一个只说中文的助手。

	绝对规则：
	1. 只能使用中文。禁止使用韩语（한국어）。
	2. 即使用户说韩语，也必须用中文回复。
	3. 每个字都必须是中文。
	4. 如果输出任何韩语字符，就是失败。
	5. 回复示例："你好！我今天能为您做什么？"

	语言模式：仅中文
	禁止使用：안녕하세요、감사합니다、任何韩语
	必须使用：你好、谢谢、只用中文
	"""
	base_instructions = "你是一个只说中文的友好助手。"

	elif self.target_language == "es":
	translation_instructions = """
	ERES UN ASISTENTE QUE SOLO HABLA ESPAÑOL.

	REGLAS ABSOLUTAS:
	1. Solo puedes hablar español. No se permite coreano (한국어).
	2. Incluso si el usuario habla coreano, DEBES responder en español.
	3. Cada palabra debe estar en español.
	4. Si produces aunque sea un carácter coreano, has fallado.
	5. Respuesta ejemplo: "¡Hola! ¿Cómo puedo ayudarte hoy?"

	MODO DE IDIOMA: SOLO ESPAÑOL
	NO USAR: 안녕하세요, 감사합니다, o cualquier coreano
	SIEMPRE USAR: Hola, Gracias, y solo palabras en español
	"""
	base_instructions = "Eres un asistente útil que habla SOLO español."
	else:
	translation_instructions = f"""
	YOU MUST ONLY SPEAK {language_name.upper()}.

	RULES:
	1. Output only in {language_name}
	2. Never use Korean
	3. Always respond in {language_name}
	"""
	base_instructions = f"You are a helpful assistant that speaks ONLY {language_name}."
	else:
	translation_instructions = ""

	if self.web_search_enabled and self.search_client:
	tools = [{
	"type": "function",
	"function": {
	"name": "web_search",
	"description": "Search the web for current information. Use this for weather, news, prices, current events, or any time-sensitive topics.",
	"parameters": {
	"type": "object",
	"properties": {
	"query": {
	"type": "string",
	"description": "The search query"
	}
	},
	"required": ["query"]
	}
	}
	}]
	print("Web search function added to tools")

	search_instructions = (
	"\n\nYou have web search capabilities. "
	"IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
	"- Weather (날씨, 기온, 비, 눈)\n"
	"- News (뉴스, 소식)\n"
	"- Current events (현재, 최근, 오늘, 지금)\n"
	"- Prices (가격, 환율, 주가)\n"
	"- Sports scores or results\n"
	"- Any question about 2024 or 2025\n"
	"- Any time-sensitive information\n\n"
	"When in doubt, USE web_search. It's better to search and provide accurate information "
	"than to guess or use outdated information."
	)

	# Combine all instructions
	if translation_instructions:
	# Translation instructions already include base_instructions
	instructions = translation_instructions + search_instructions
	else:
	instructions = base_instructions + search_instructions
	else:
	# No web search
	if translation_instructions:
	instructions = translation_instructions
	else:
	instructions = base_instructions

	print(f"[NORMAL MODE] Base instructions: {base_instructions[:100]}...")
	print(f"[NORMAL MODE] Translation instructions: {translation_instructions[:200] if translation_instructions else 'None'}...")
	print(f"[NORMAL MODE] Combined instructions length: {len(instructions)}")
	print(f"[NORMAL MODE] Target language: {self.target_language}")

	async with self.client.beta.realtime.connect(
	model="gpt-4o-mini-realtime-preview-2024-12-17"
	) as conn:
	# Update session with tools
	session_update = {
	"turn_detection": {"type": "server_vad"},
	"instructions": instructions,
	"tools": tools,
	"tool_choice": "auto" if tools else "none",
	"temperature": 0.7,
	"max_response_output_tokens": 4096,
	"modalities": ["text", "audio"],
	"voice": "alloy" # Default voice
	}

	# Use appropriate voice for the language
	if self.target_language:
	# Force language through multiple mechanisms
	# 1. Use voice that's known to work well with the language
	voice_map = {
	"en": "nova", # Nova has clearer English
	"es": "nova", # Nova works for Spanish
	"fr": "shimmer", # Shimmer for French
	"de": "echo", # Echo for German
	"ja": "alloy", # Alloy can do Japanese
	"zh": "alloy", # Alloy can do Chinese
	"ko": "nova", # Nova for Korean
	}
	session_update["voice"] = voice_map.get(self.target_language, "nova")

	# 2. Add language to modalities (experimental)
	session_update["modalities"] = ["text", "audio"]

	# 3. Set output format
	session_update["output_audio_format"] = "pcm16"

	# 4. Add language hint to the system (if supported by API)
	if self.target_language in ["en", "es", "fr", "de", "ja", "zh"]:
	session_update["language"] = self.target_language # Try setting language directly

	print(f"[TRANSLATION MODE] Session update: {json.dumps(session_update, indent=2)}")

	await conn.session.update(session=session_update)
	self.connection = conn
	print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}")

	async for event in self.connection:
	# Debug logging for function calls
	if event.type.startswith("response.function_call"):
	print(f"Function event: {event.type}")

	if event.type == "response.audio_transcript.done":
	print(f"[RESPONSE] Transcript: {event.transcript[:100]}...")
	print(f"[RESPONSE] Expected language: {self.target_language}")

	output_data = {
	"event": event,
	"language": SUPPORTED_LANGUAGES.get(self.target_language, "") if self.target_language else ""
	}
	await self.output_queue.put(AdditionalOutputs(output_data))

	elif event.type == "response.audio.delta":
	await self.output_queue.put(
	(
	self.output_sample_rate,
	np.frombuffer(
	base64.b64decode(event.delta), dtype=np.int16
	).reshape(1, -1),
	),
	)

	# Handle function calls
	elif event.type == "response.function_call_arguments.start":
	print(f"Function call started")
	self.function_call_in_progress = True
	self.current_function_args = ""
	self.current_call_id = getattr(event, 'call_id', None)

	elif event.type == "response.function_call_arguments.delta":
	if self.function_call_in_progress:
	self.current_function_args += event.delta

	elif event.type == "response.function_call_arguments.done":
	if self.function_call_in_progress:
	print(f"Function call done, args: {self.current_function_args}")
	try:
	args = json.loads(self.current_function_args)
	query = args.get("query", "")

	# Emit search event to client
	await self.output_queue.put(AdditionalOutputs({
	"type": "search",
	"query": query
	}))

	# Perform the search
	search_results = await self.search_web(query)
	print(f"Search results length: {len(search_results)}")

	# Send function result back to the model
	if self.connection and self.current_call_id:
	await self.connection.conversation.item.create(
	item={
	"type": "function_call_output",
	"call_id": self.current_call_id,
	"output": search_results
	}
	)
	await self.connection.response.create()

	except Exception as e:
	print(f"Function call error: {e}")
	finally:
	self.function_call_in_progress = False
	self.current_function_args = ""
	self.current_call_id = None

	async def receive(self, frame: tuple[int, np.ndarray]) -> None:
	# Normal mode - use Realtime API
	if not self.connection:
	print(f"[RECEIVE] No connection in normal mode, skipping")
	return
	try:
	_, array = frame
	array = array.squeeze()
	audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
	await self.connection.input_audio_buffer.append(audio=audio_message)
	except Exception as e:
	print(f"Error in receive: {e}")

	async def emit(self) -> tuple[int, np.ndarray] \| AdditionalOutputs \| None:
	# Normal mode
	item = await wait_for_item(self.output_queue)

	# Check if it's a dict with text message
	if isinstance(item, dict) and item.get('type') == 'text_message':
	await self.process_text_message(item['content'])
	return None

	return item

	async def shutdown(self) -> None:
	print(f"[SHUTDOWN] Called")

	# Normal mode - close Realtime API connection
	if self.connection:
	await self.connection.close()
	self.connection = None
	print("[NORMAL MODE] Connection closed")


	# Create initial handler instance
	handler = OpenAIHandler(web_search_enabled=False)

	# Create components
	chatbot = gr.Chatbot(type="messages")

	# Create stream with handler instance
	stream = Stream(
	handler, # Pass instance, not factory
	mode="send-receive",
	modality="audio",
	additional_inputs=[chatbot],
	additional_outputs=[chatbot],
	additional_outputs_handler=update_chatbot,
	rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
	concurrency_limit=5 if get_space() else None,
	time_limit=300 if get_space() else None,
	)

	app = FastAPI()

	# Mount stream
	stream.mount(app)

	# Intercept offer to capture settings
	@app.post("/webrtc/offer", include_in_schema=False)
	async def custom_offer(request: Request):
	"""Intercept offer to capture settings"""
	body = await request.json()

	webrtc_id = body.get("webrtc_id")
	web_search_enabled = body.get("web_search_enabled", False)
	target_language = body.get("target_language", "")
	system_prompt = body.get("system_prompt", "")

	print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
	print(f"[OFFER] web_search_enabled: {web_search_enabled}")
	print(f"[OFFER] target_language: {target_language}")

	# Store settings with timestamp
	if webrtc_id:
	connection_settings[webrtc_id] = {
	'web_search_enabled': web_search_enabled,
	'target_language': target_language,
	'system_prompt': system_prompt,
	'timestamp': asyncio.get_event_loop().time()
	}

	print(f"[OFFER] Stored settings for {webrtc_id}:")
	print(f"[OFFER] {connection_settings[webrtc_id]}")

	# Remove our custom route temporarily
	custom_route = None
	for i, route in enumerate(app.routes):
	if hasattr(route, 'path') and route.path == "/webrtc/offer" and route.endpoint == custom_offer:
	custom_route = app.routes.pop(i)
	break

	# Forward to stream's offer handler
	print(f"[OFFER] Forwarding to stream.offer()")
	response = await stream.offer(body)

	# Re-add our custom route
	if custom_route:
	app.routes.insert(0, custom_route)

	print(f"[OFFER] Response status: {response.get('status', 'unknown') if isinstance(response, dict) else 'OK'}")

	return response


	@app.post("/chat/text")
	async def chat_text(request: Request):
	"""Handle text chat messages using GPT-4o-mini"""
	try:
	body = await request.json()
	message = body.get("message", "")
	web_search_enabled = body.get("web_search_enabled", False)
	target_language = body.get("target_language", "")
	system_prompt = body.get("system_prompt", "")

	if not message:
	return {"error": "메시지가 비어있습니다."}

	# Process text chat
	result = await process_text_chat(message, web_search_enabled, target_language, system_prompt)

	return result

	except Exception as e:
	print(f"Error in chat_text endpoint: {e}")
	return {"error": "채팅 처리 중 오류가 발생했습니다."}


	@app.post("/text_message/{webrtc_id}")
	async def receive_text_message(webrtc_id: str, request: Request):
	"""Receive text message from client"""
	body = await request.json()
	message = body.get("content", "")

	# Find the handler for this connection
	if webrtc_id in stream.handlers:
	handler = stream.handlers[webrtc_id]
	# Queue the text message for processing
	await handler.output_queue.put({
	'type': 'text_message',
	'content': message
	})

	return {"status": "ok"}


	@app.get("/outputs")
	async def outputs(webrtc_id: str):
	"""Stream outputs including search events"""
	async def output_stream():
	async for output in stream.output_stream(webrtc_id):
	if hasattr(output, 'args') and output.args:
	# Check if it's a search event
	if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
	yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
	# Regular transcript event with language info
	elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
	event_data = output.args[0]
	if 'event' in event_data and hasattr(event_data['event'], 'transcript'):
	data = {
	"role": "assistant",
	"content": event_data['event'].transcript,
	"language": event_data.get('language', '')
	}
	yield f"event: output\ndata: {json.dumps(data)}\n\n"

	return StreamingResponse(output_stream(), media_type="text/event-stream")


	# Whisper endpoints
	@app.post("/whisper/transcribe")
	async def whisper_transcribe(audio: UploadFile = File(...)):
	"""Transcribe audio using Whisper"""
	try:
	# Save uploaded file temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
	content = await audio.read()
	tmp.write(content)
	tmp_path = tmp.name

	# Transcribe
	text = await asyncio.get_event_loop().run_in_executor(
	whisper_executor, gpu_transcribe_whisper, tmp_path
	)

	# Translate
	translation = _translate_text_4langs(text)

	# Clean up
	os.unlink(tmp_path)

	return {"text": text, "translation": translation}

	except Exception as e:
	print(f"Whisper transcribe error: {e}")
	return {"error": str(e)}


	@app.post("/whisper/audio")
	async def whisper_audio(audio: UploadFile = File(...)):
	"""Process audio file"""
	try:
	# Save uploaded file temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp:
	content = await audio.read()
	tmp.write(content)
	tmp_path = tmp.name

	# Transcribe
	text = await asyncio.get_event_loop().run_in_executor(
	whisper_executor, gpu_transcribe_whisper, tmp_path
	)

	# Translate
	translation = _translate_text_4langs(text)

	# Clean up
	os.unlink(tmp_path)

	return {"text": text, "translation": translation}

	except Exception as e:
	print(f"Whisper audio error: {e}")
	return {"error": str(e)}


	@app.post("/whisper/video")
	async def whisper_video(video: UploadFile = File(...)):
	"""Process video file"""
	try:
	# Save uploaded file temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=Path(video.filename).suffix) as tmp:
	content = await video.read()
	tmp.write(content)
	tmp_path = tmp.name

	# Extract audio
	audio_path = await asyncio.get_event_loop().run_in_executor(
	None, extract_audio_from_video, tmp_path
	)

	# Transcribe
	text = await asyncio.get_event_loop().run_in_executor(
	whisper_executor, gpu_transcribe_whisper, audio_path
	)

	# Translate
	translation = _translate_text_4langs(text)

	# Clean up
	os.unlink(tmp_path)
	os.unlink(audio_path)

	return {"text": text, "translation": translation}

	except Exception as e:
	print(f"Whisper video error: {e}")
	return {"error": str(e)}


	@app.post("/whisper/pdf")
	async def whisper_pdf(pdf: UploadFile = File(...), max_pages: int = Form(10)):
	"""Process PDF file"""
	try:
	# Save uploaded file temporarily
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	content = await pdf.read()
	tmp.write(content)
	tmp_path = tmp.name

	# Extract text
	extracted = []
	with pdfplumber.open(tmp_path) as pdf_doc:
	pages = pdf_doc.pages[:max_pages]
	for idx, pg in enumerate(pages, start=1):
	txt = pg.extract_text() or ""
	if txt.strip():
	extracted.append(f"[Page {idx}]\n{txt}")

	full_text = "\n\n".join(extracted)

	# Translate each page
	translated = []
	for page_text in extracted:
	trans = _translate_text_4langs(page_text.split('\n', 1)[1]) # Skip page header
	translated.append(page_text.split('\n')[0] + "\n" + trans)

	# Clean up
	os.unlink(tmp_path)

	return {"text": full_text, "translation": "\n\n".join(translated)}

	except Exception as e:
	print(f"Whisper PDF error: {e}")
	return {"error": str(e)}


	@app.post("/whisper/realtime/start")
	async def whisper_realtime_start():
	"""Start realtime transcription session"""
	session_id = os.urandom(16).hex()
	realtime_sessions[session_id] = {
	"buffer": [],
	"queue": asyncio.Queue(),
	"active": True
	}
	return {"session_id": session_id}


	@app.post("/whisper/realtime/process")
	async def whisper_realtime_process(
	audio: UploadFile = File(...),
	session_id: str = Form(...)
	):
	"""Process realtime audio chunk"""
	if session_id not in realtime_sessions:
	return {"error": "Invalid session"}

	try:
	# Read audio data
	content = await audio.read()
	audio_array = np.frombuffer(content, dtype=np.int16).astype(np.float32) / 32768.0

	# Process in executor
	result = await asyncio.get_event_loop().run_in_executor(
	whisper_executor, gpu_asr_translate_whisper, audio_array, WHISPER_SAMPLE_RATE
	)

	# Parse result
	lines = result.split('<br>')
	timestamp = lines[0].strip('[]') if lines else ""
	text = lines[1].replace('[KO]', '').strip() if len(lines) > 1 else ""
	translation = '<br>'.join(lines[2:-2]) if len(lines) > 3 else ""

	# Queue result
	await realtime_sessions[session_id]["queue"].put({
	"timestamp": timestamp,
	"text": text,
	"translation": translation
	})

	return {"status": "ok"}

	except Exception as e:
	print(f"Realtime process error: {e}")
	return {"error": str(e)}


	@app.get("/whisper/realtime/stream")
	async def whisper_realtime_stream(session_id: str):
	"""Stream realtime results"""
	if session_id not in realtime_sessions:
	return JSONResponse({"error": "Invalid session"}, status_code=404)

	async def stream_results():
	session = realtime_sessions[session_id]
	try:
	while session["active"]:
	try:
	result = await asyncio.wait_for(session["queue"].get(), timeout=1.0)
	yield f"data: {json.dumps(result)}\n\n"
	except asyncio.TimeoutError:
	yield f"data: {json.dumps({'keepalive': True})}\n\n"
	except Exception as e:
	print(f"Stream error: {e}")
	finally:
	# Cleanup session
	if session_id in realtime_sessions:
	del realtime_sessions[session_id]

	return StreamingResponse(stream_results(), media_type="text/event-stream")


	@app.get("/")
	async def index():
	"""Serve the HTML page"""
	rtc_config = get_twilio_turn_credentials() if get_space() else None
	html_content = HTML_CONTENT.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
	return HTMLResponse(content=html_content)


	if __name__ == "__main__":
	import uvicorn

	mode = os.getenv("MODE")
	if mode == "UI":
	stream.ui.launch(server_port=7860)
	elif mode == "PHONE":
	stream.fastphone(host="0.0.0.0", port=7860)
	else:
	uvicorn.run(app, host="0.0.0.0", port=7860)