Spaces:
Running
Running
Update app-backup4.py
Browse files- app-backup4.py +131 -1085
app-backup4.py
CHANGED
@@ -6,8 +6,8 @@ import os
|
|
6 |
import numpy as np
|
7 |
import openai
|
8 |
from dotenv import load_dotenv
|
9 |
-
from fastapi import FastAPI, Request
|
10 |
-
from fastapi.responses import HTMLResponse, StreamingResponse
|
11 |
from fastrtc import (
|
12 |
AdditionalOutputs,
|
13 |
AsyncStreamHandler,
|
@@ -23,35 +23,10 @@ import gradio as gr
|
|
23 |
import io
|
24 |
from scipy import signal
|
25 |
import wave
|
26 |
-
import torch
|
27 |
-
from transformers import pipeline
|
28 |
-
import tempfile
|
29 |
-
import subprocess
|
30 |
-
import pdfplumber
|
31 |
-
import scipy.signal as sps
|
32 |
-
from datetime import datetime
|
33 |
-
from zoneinfo import ZoneInfo
|
34 |
-
import concurrent.futures
|
35 |
|
36 |
load_dotenv()
|
37 |
|
38 |
SAMPLE_RATE = 24000
|
39 |
-
WHISPER_SAMPLE_RATE = 16000
|
40 |
-
SEOUL_TZ = ZoneInfo("Asia/Seoul")
|
41 |
-
|
42 |
-
# Whisper model settings
|
43 |
-
WHISPER_MODEL_NAME = "openai/whisper-large-v3-turbo"
|
44 |
-
WHISPER_BATCH_SIZE = 8
|
45 |
-
|
46 |
-
# Real-time segmentation parameters
|
47 |
-
MIN_SEG_SEC = 10
|
48 |
-
MAX_SEG_SEC = 15
|
49 |
-
SILENCE_SEC = 0.6
|
50 |
-
SILENCE_THRESH = 1e-4
|
51 |
-
|
52 |
-
# CPU-side pool for Whisper tasks
|
53 |
-
whisper_executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
|
54 |
-
whisper_futures_queue: list[concurrent.futures.Future] = []
|
55 |
|
56 |
# Supported languages for OpenAI Realtime API
|
57 |
SUPPORTED_LANGUAGES = {
|
@@ -86,14 +61,14 @@ SUPPORTED_LANGUAGES = {
|
|
86 |
"ms": "Bahasa Melayu (Malay)"
|
87 |
}
|
88 |
|
89 |
-
# HTML content embedded as a string
|
90 |
HTML_CONTENT = """<!DOCTYPE html>
|
91 |
<html lang="ko">
|
92 |
|
93 |
<head>
|
94 |
<meta charset="UTF-8">
|
95 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
96 |
-
<title>Mouth of 'MOUSE'
|
97 |
<style>
|
98 |
:root {
|
99 |
--primary-color: #6f42c1;
|
@@ -135,43 +110,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
135 |
flex-shrink: 0;
|
136 |
background-color: var(--card-bg);
|
137 |
}
|
138 |
-
.tabs-container {
|
139 |
-
display: flex;
|
140 |
-
gap: 10px;
|
141 |
-
margin-bottom: 20px;
|
142 |
-
border-bottom: 2px solid var(--border-color);
|
143 |
-
padding-bottom: 10px;
|
144 |
-
overflow-x: auto;
|
145 |
-
scrollbar-width: thin;
|
146 |
-
scrollbar-color: var(--primary-color) var(--card-bg);
|
147 |
-
}
|
148 |
-
.tab-button {
|
149 |
-
padding: 10px 20px;
|
150 |
-
background-color: var(--card-bg);
|
151 |
-
color: var(--text-color);
|
152 |
-
border: 1px solid var(--border-color);
|
153 |
-
border-radius: 8px 8px 0 0;
|
154 |
-
cursor: pointer;
|
155 |
-
transition: all 0.3s;
|
156 |
-
white-space: nowrap;
|
157 |
-
font-size: 14px;
|
158 |
-
}
|
159 |
-
.tab-button:hover {
|
160 |
-
background-color: var(--secondary-color);
|
161 |
-
}
|
162 |
-
.tab-button.active {
|
163 |
-
background-color: var(--primary-color);
|
164 |
-
border-bottom: 2px solid var(--primary-color);
|
165 |
-
}
|
166 |
-
.tab-content {
|
167 |
-
display: none;
|
168 |
-
flex-grow: 1;
|
169 |
-
overflow: hidden;
|
170 |
-
flex-direction: column;
|
171 |
-
}
|
172 |
-
.tab-content.active {
|
173 |
-
display: flex;
|
174 |
-
}
|
175 |
.main-content {
|
176 |
display: flex;
|
177 |
gap: 20px;
|
@@ -281,7 +219,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
281 |
.text-input-section {
|
282 |
margin-top: 15px;
|
283 |
}
|
284 |
-
input[type="text"],
|
285 |
width: 100%;
|
286 |
background-color: var(--dark-bg);
|
287 |
color: var(--text-color);
|
@@ -380,91 +318,11 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
380 |
flex-shrink: 0;
|
381 |
padding-top: 20px;
|
382 |
}
|
383 |
-
/* Whisper Tab Styles */
|
384 |
-
.whisper-container {
|
385 |
-
padding: 20px;
|
386 |
-
background-color: var(--card-bg);
|
387 |
-
border-radius: 12px;
|
388 |
-
border: 1px solid var(--border-color);
|
389 |
-
margin: 20px auto;
|
390 |
-
max-width: 800px;
|
391 |
-
width: 100%;
|
392 |
-
}
|
393 |
-
.whisper-input-section {
|
394 |
-
margin-bottom: 20px;
|
395 |
-
}
|
396 |
-
.whisper-output-section {
|
397 |
-
display: grid;
|
398 |
-
grid-template-columns: 1fr 1fr;
|
399 |
-
gap: 20px;
|
400 |
-
margin-top: 20px;
|
401 |
-
}
|
402 |
-
.whisper-output {
|
403 |
-
background-color: var(--dark-bg);
|
404 |
-
padding: 15px;
|
405 |
-
border-radius: 8px;
|
406 |
-
border: 1px solid var(--border-color);
|
407 |
-
max-height: 300px;
|
408 |
-
overflow-y: auto;
|
409 |
-
}
|
410 |
-
.whisper-output h4 {
|
411 |
-
margin-top: 0;
|
412 |
-
color: var(--primary-color);
|
413 |
-
}
|
414 |
-
.file-upload-area {
|
415 |
-
border: 2px dashed var(--border-color);
|
416 |
-
border-radius: 8px;
|
417 |
-
padding: 30px;
|
418 |
-
text-align: center;
|
419 |
-
cursor: pointer;
|
420 |
-
transition: all 0.3s;
|
421 |
-
}
|
422 |
-
.file-upload-area:hover {
|
423 |
-
border-color: var(--primary-color);
|
424 |
-
background-color: rgba(111, 66, 193, 0.1);
|
425 |
-
}
|
426 |
-
.file-upload-area.drag-over {
|
427 |
-
border-color: var(--primary-color);
|
428 |
-
background-color: rgba(111, 66, 193, 0.2);
|
429 |
-
}
|
430 |
-
.realtime-output {
|
431 |
-
background-color: var(--dark-bg);
|
432 |
-
padding: 20px;
|
433 |
-
border-radius: 8px;
|
434 |
-
margin-top: 20px;
|
435 |
-
min-height: 200px;
|
436 |
-
max-height: 400px;
|
437 |
-
overflow-y: auto;
|
438 |
-
}
|
439 |
-
.recording-indicator {
|
440 |
-
display: inline-flex;
|
441 |
-
align-items: center;
|
442 |
-
gap: 10px;
|
443 |
-
padding: 10px 20px;
|
444 |
-
background-color: #f44336;
|
445 |
-
color: white;
|
446 |
-
border-radius: 20px;
|
447 |
-
animation: pulse 1.5s infinite;
|
448 |
-
}
|
449 |
-
.recording-dot {
|
450 |
-
width: 10px;
|
451 |
-
height: 10px;
|
452 |
-
background-color: white;
|
453 |
-
border-radius: 50%;
|
454 |
-
animation: blink 1s infinite;
|
455 |
-
}
|
456 |
-
@keyframes blink {
|
457 |
-
0%, 50% { opacity: 1; }
|
458 |
-
51%, 100% { opacity: 0; }
|
459 |
-
}
|
460 |
/* Responsive design */
|
461 |
@media (max-width: 1024px) {
|
462 |
.sidebar {
|
463 |
width: 300px;
|
464 |
}
|
465 |
-
.whisper-output-section {
|
466 |
-
grid-template-columns: 1fr;
|
467 |
-
}
|
468 |
}
|
469 |
@media (max-width: 768px) {
|
470 |
.main-content {
|
@@ -477,9 +335,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
477 |
.chat-section {
|
478 |
height: 400px;
|
479 |
}
|
480 |
-
.tabs-container {
|
481 |
-
flex-wrap: wrap;
|
482 |
-
}
|
483 |
}
|
484 |
button {
|
485 |
background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
|
@@ -577,10 +432,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
577 |
background-color: #ff9800;
|
578 |
color: white;
|
579 |
}
|
580 |
-
.toast.success {
|
581 |
-
background-color: #4caf50;
|
582 |
-
color: white;
|
583 |
-
}
|
584 |
.status-indicator {
|
585 |
display: inline-flex;
|
586 |
align-items: center;
|
@@ -657,7 +508,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
657 |
<div class="mouse-ears mouse-ear-right"></div>
|
658 |
<div class="mouse-face"></div>
|
659 |
</div>
|
660 |
-
<h1>MOUSE
|
661 |
</div>
|
662 |
<div class="status-indicator">
|
663 |
<div id="status-dot" class="status-dot disconnected"></div>
|
@@ -665,252 +516,131 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
665 |
</div>
|
666 |
</div>
|
667 |
|
668 |
-
<div class="
|
669 |
-
<
|
670 |
-
|
671 |
-
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
<!-- Voice Chat Tab (Original) -->
|
678 |
-
<div id="voice-chat" class="tab-content active">
|
679 |
-
<div class="main-content">
|
680 |
-
<div class="sidebar">
|
681 |
-
<div class="settings-section">
|
682 |
-
<h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정(텍스트 채팅에만 적용)</h3>
|
683 |
-
<div class="settings-grid">
|
684 |
-
<div class="setting-item">
|
685 |
-
<span class="setting-label">웹 검색</span>
|
686 |
-
<div id="search-toggle" class="toggle-switch">
|
687 |
-
<div class="toggle-slider"></div>
|
688 |
-
</div>
|
689 |
-
</div>
|
690 |
-
<div class="setting-item">
|
691 |
-
<span class="setting-label">다국어 번역 채팅</span>
|
692 |
-
<select id="language-select">
|
693 |
-
<option value="">비활성화</option>
|
694 |
-
<option value="ko">한국어 (Korean)</option>
|
695 |
-
<option value="en">English</option>
|
696 |
-
<option value="es">Español (Spanish)</option>
|
697 |
-
<option value="fr">Français (French)</option>
|
698 |
-
<option value="de">Deutsch (German)</option>
|
699 |
-
<option value="it">Italiano (Italian)</option>
|
700 |
-
<option value="pt">Português (Portuguese)</option>
|
701 |
-
<option value="ru">Русский (Russian)</option>
|
702 |
-
<option value="ja">日本語 (Japanese)</option>
|
703 |
-
<option value="zh">中文 (Chinese)</option>
|
704 |
-
<option value="ar">العربية (Arabic)</option>
|
705 |
-
<option value="hi">हिन्दी (Hindi)</option>
|
706 |
-
<option value="nl">Nederlands (Dutch)</option>
|
707 |
-
<option value="pl">Polski (Polish)</option>
|
708 |
-
<option value="tr">Türkçe (Turkish)</option>
|
709 |
-
<option value="vi">Tiếng Việt (Vietnamese)</option>
|
710 |
-
<option value="th">ไทย (Thai)</option>
|
711 |
-
<option value="id">Bahasa Indonesia</option>
|
712 |
-
<option value="sv">Svenska (Swedish)</option>
|
713 |
-
<option value="da">Dansk (Danish)</option>
|
714 |
-
<option value="no">Norsk (Norwegian)</option>
|
715 |
-
<option value="fi">Suomi (Finnish)</option>
|
716 |
-
<option value="he">עברית (Hebrew)</option>
|
717 |
-
<option value="uk">Українська (Ukrainian)</option>
|
718 |
-
<option value="cs">Čeština (Czech)</option>
|
719 |
-
<option value="el">Ελληνικά (Greek)</option>
|
720 |
-
<option value="ro">Română (Romanian)</option>
|
721 |
-
<option value="hu">Magyar (Hungarian)</option>
|
722 |
-
<option value="ms">Bahasa Melayu (Malay)</option>
|
723 |
-
</select>
|
724 |
</div>
|
725 |
</div>
|
726 |
-
<div class="
|
727 |
-
<
|
728 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
729 |
</div>
|
730 |
</div>
|
731 |
-
|
732 |
-
|
733 |
-
<
|
734 |
</div>
|
735 |
</div>
|
736 |
|
737 |
-
<div class="
|
738 |
-
<
|
739 |
-
<h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화</h3>
|
740 |
-
<div class="chat-messages" id="chat-messages"></div>
|
741 |
-
<div class="text-input-section" style="margin-top: 10px;">
|
742 |
-
<div style="display: flex; gap: 10px;">
|
743 |
-
<input type="text" id="text-input" placeholder="텍스트 메시지를 입력하세요..." style="flex-grow: 1;" />
|
744 |
-
<button id="send-button" style="display: none;">전송</button>
|
745 |
-
</div>
|
746 |
-
</div>
|
747 |
-
</div>
|
748 |
-
</div>
|
749 |
-
</div>
|
750 |
-
</div>
|
751 |
-
|
752 |
-
<!-- Microphone Whisper Tab -->
|
753 |
-
<div id="mic-whisper" class="tab-content">
|
754 |
-
<div class="whisper-container">
|
755 |
-
<h3>마이크 녹음 → 전사 및 4개 언어 번역</h3>
|
756 |
-
<div class="whisper-input-section">
|
757 |
-
<button id="mic-record-btn" onclick="toggleMicRecording()">녹음 시작</button>
|
758 |
-
<div id="mic-status" style="margin-top: 10px;"></div>
|
759 |
-
</div>
|
760 |
-
<div class="whisper-output-section">
|
761 |
-
<div class="whisper-output">
|
762 |
-
<h4>원문</h4>
|
763 |
-
<div id="mic-original"></div>
|
764 |
-
</div>
|
765 |
-
<div class="whisper-output">
|
766 |
-
<h4>번역 (EN/ZH/TH/RU)</h4>
|
767 |
-
<div id="mic-translation"></div>
|
768 |
-
</div>
|
769 |
</div>
|
770 |
</div>
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
</div>
|
782 |
-
</div>
|
783 |
-
<div class="whisper-output-section">
|
784 |
-
<div class="whisper-output">
|
785 |
-
<h4>원문</h4>
|
786 |
-
<div id="audio-original"></div>
|
787 |
-
</div>
|
788 |
-
<div class="whisper-output">
|
789 |
-
<h4>번역 (EN/ZH/TH/RU)</h4>
|
790 |
-
<div id="audio-translation"></div>
|
791 |
-
</div>
|
792 |
-
</div>
|
793 |
-
</div>
|
794 |
-
</div>
|
795 |
-
|
796 |
-
<!-- Video File Whisper Tab -->
|
797 |
-
<div id="video-whisper" class="tab-content">
|
798 |
-
<div class="whisper-container">
|
799 |
-
<h3>비디오 파일 → 오디오 추출 → 전사 및 4개 언어 번역</h3>
|
800 |
-
<div class="whisper-input-section">
|
801 |
-
<div class="file-upload-area" id="video-upload-area">
|
802 |
-
<p>비디오 파일을 드래그하거나 클릭하여 업로드</p>
|
803 |
-
<input type="file" id="video-file-input" accept="video/*" style="display: none;">
|
804 |
-
</div>
|
805 |
-
</div>
|
806 |
-
<div class="whisper-output-section">
|
807 |
-
<div class="whisper-output">
|
808 |
-
<h4>원문</h4>
|
809 |
-
<div id="video-original"></div>
|
810 |
-
</div>
|
811 |
-
<div class="whisper-output">
|
812 |
-
<h4>번역 (EN/ZH/TH/RU)</h4>
|
813 |
-
<div id="video-translation"></div>
|
814 |
-
</div>
|
815 |
-
</div>
|
816 |
-
</div>
|
817 |
-
</div>
|
818 |
-
|
819 |
-
<!-- PDF Whisper Tab -->
|
820 |
-
<div id="pdf-whisper" class="tab-content">
|
821 |
-
<div class="whisper-container">
|
822 |
-
<h3>PDF 파일 → 텍스트 추출 → 4개 언어 번역</h3>
|
823 |
-
<div class="whisper-input-section">
|
824 |
-
<div class="file-upload-area" id="pdf-upload-area">
|
825 |
-
<p>PDF 파일을 드래그하거나 클릭하여 업로드</p>
|
826 |
-
<input type="file" id="pdf-file-input" accept=".pdf" style="display: none;">
|
827 |
-
</div>
|
828 |
-
<div class="setting-item" style="margin-top: 15px;">
|
829 |
-
<span class="setting-label">최대 페이지:</span>
|
830 |
-
<input type="number" id="pdf-max-pages" value="10" min="1" max="50" style="width: 80px;">
|
831 |
-
</div>
|
832 |
-
</div>
|
833 |
-
<div class="whisper-output-section">
|
834 |
-
<div class="whisper-output">
|
835 |
-
<h4>추출된 텍스트</h4>
|
836 |
-
<div id="pdf-original"></div>
|
837 |
-
</div>
|
838 |
-
<div class="whisper-output">
|
839 |
-
<h4>번역 (EN/ZH/TH/RU)</h4>
|
840 |
-
<div id="pdf-translation"></div>
|
841 |
</div>
|
842 |
</div>
|
843 |
</div>
|
844 |
</div>
|
845 |
-
|
846 |
-
<!-- Realtime Whisper Tab -->
|
847 |
-
<div id="realtime-whisper" class="tab-content">
|
848 |
-
<div class="whisper-container">
|
849 |
-
<h3>실시간 통역 (Korean → EN/ZH/TH/RU)</h3>
|
850 |
-
<p>10-15초 문장 단위로 자동 전환 — 최신 내용이 위에 표시됩니다.</p>
|
851 |
-
<div class="whisper-input-section">
|
852 |
-
<button id="realtime-start-btn" onclick="toggleRealtimeRecording()">실시간 통역 시작</button>
|
853 |
-
<div id="realtime-status" style="margin-top: 10px;"></div>
|
854 |
-
</div>
|
855 |
-
<div class="realtime-output" id="realtime-output"></div>
|
856 |
-
</div>
|
857 |
-
</div>
|
858 |
</div>
|
859 |
<audio id="audio-output"></audio>
|
860 |
|
861 |
<script>
|
862 |
-
|
863 |
-
|
864 |
-
console.log('Switching to tab:', tabName);
|
865 |
-
|
866 |
-
// Hide all tabs
|
867 |
-
document.querySelectorAll('.tab-content').forEach(tab => {
|
868 |
-
tab.style.display = 'none';
|
869 |
-
tab.classList.remove('active');
|
870 |
-
});
|
871 |
-
|
872 |
-
console.log('All initialized!');
|
873 |
-
|
874 |
-
// Remove active from all buttons
|
875 |
-
document.querySelectorAll('.tab-button').forEach(btn => {
|
876 |
-
btn.classList.remove('active');
|
877 |
-
});
|
878 |
-
|
879 |
-
// Show selected tab
|
880 |
-
const selectedTab = document.getElementById(tabName);
|
881 |
-
if (selectedTab) {
|
882 |
-
selectedTab.style.display = 'flex';
|
883 |
-
selectedTab.classList.add('active');
|
884 |
-
}
|
885 |
-
|
886 |
-
// Mark button as active
|
887 |
-
event.target.classList.add('active');
|
888 |
-
}
|
889 |
-
|
890 |
-
// Global variables
|
891 |
-
let peerConnection = null;
|
892 |
-
let webrtc_id = null;
|
893 |
let webSearchEnabled = false;
|
894 |
let selectedLanguage = "";
|
895 |
let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
896 |
let audioLevel = 0;
|
897 |
-
let animationFrame
|
898 |
-
let audioContext
|
899 |
-
let analyser = null;
|
900 |
-
let audioSource = null;
|
901 |
let dataChannel = null;
|
902 |
let isVoiceActive = false;
|
903 |
|
904 |
-
//
|
905 |
-
|
906 |
-
|
907 |
-
|
908 |
-
|
909 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
|
911 |
async function sendTextMessage() {
|
912 |
-
const textInput = document.getElementById('text-input');
|
913 |
-
const chatMessages = document.getElementById('chat-messages');
|
914 |
const message = textInput.value.trim();
|
915 |
if (!message) return;
|
916 |
|
@@ -964,25 +694,20 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
964 |
}
|
965 |
|
966 |
function updateStatus(state) {
|
967 |
-
const statusDot = document.getElementById('status-dot');
|
968 |
-
const statusText = document.getElementById('status-text');
|
969 |
-
const sendButton = document.getElementById('send-button');
|
970 |
-
|
971 |
statusDot.className = 'status-dot ' + state;
|
972 |
if (state === 'connected') {
|
973 |
statusText.textContent = '연결됨';
|
974 |
-
|
975 |
isVoiceActive = true;
|
976 |
} else if (state === 'connecting') {
|
977 |
statusText.textContent = '연결 중...';
|
978 |
-
|
979 |
} else {
|
980 |
statusText.textContent = '연결 대기 중';
|
981 |
-
|
982 |
isVoiceActive = false;
|
983 |
}
|
984 |
}
|
985 |
-
|
986 |
function updateButtonState() {
|
987 |
const button = document.getElementById('start-button');
|
988 |
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
|
@@ -1012,7 +737,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1012 |
updateStatus('disconnected');
|
1013 |
}
|
1014 |
}
|
1015 |
-
|
1016 |
function setupAudioVisualization(stream) {
|
1017 |
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
1018 |
analyser = audioContext.createAnalyser();
|
@@ -1047,7 +771,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1047 |
|
1048 |
updateAudioLevel();
|
1049 |
}
|
1050 |
-
|
1051 |
function showError(message) {
|
1052 |
const toast = document.getElementById('error-toast');
|
1053 |
toast.textContent = message;
|
@@ -1057,20 +780,8 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1057 |
toast.style.display = 'none';
|
1058 |
}, 5000);
|
1059 |
}
|
1060 |
-
|
1061 |
-
function showSuccess(message) {
|
1062 |
-
const toast = document.getElementById('error-toast');
|
1063 |
-
toast.textContent = message;
|
1064 |
-
toast.className = 'toast success';
|
1065 |
-
toast.style.display = 'block';
|
1066 |
-
setTimeout(() => {
|
1067 |
-
toast.style.display = 'none';
|
1068 |
-
}, 3000);
|
1069 |
-
}
|
1070 |
-
|
1071 |
async function setupWebRTC() {
|
1072 |
-
const
|
1073 |
-
const config = typeof __RTC_CONFIGURATION__ !== 'undefined' ? __RTC_CONFIGURATION__ : {iceServers: [{urls: 'stun:stun.l.google.com:19302'}]};
|
1074 |
peerConnection = new RTCPeerConnection(config);
|
1075 |
const timeoutId = setTimeout(() => {
|
1076 |
const toast = document.getElementById('error-toast');
|
@@ -1134,6 +845,14 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1134 |
});
|
1135 |
webrtc_id = Math.random().toString(36).substring(7);
|
1136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1137 |
const response = await fetch('/webrtc/offer', {
|
1138 |
method: 'POST',
|
1139 |
headers: { 'Content-Type': 'application/json' },
|
@@ -1178,9 +897,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1178 |
stop();
|
1179 |
}
|
1180 |
}
|
1181 |
-
|
1182 |
function addMessage(role, content) {
|
1183 |
-
const chatMessages = document.getElementById('chat-messages');
|
1184 |
const messageDiv = document.createElement('div');
|
1185 |
messageDiv.classList.add('message', role);
|
1186 |
|
@@ -1269,490 +986,26 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1269 |
webrtc_id = null;
|
1270 |
}
|
1271 |
}
|
1272 |
-
|
1273 |
-
|
1274 |
-
|
1275 |
-
|
1276 |
-
|
1277 |
-
const btn = document.getElementById('mic-record-btn');
|
1278 |
-
const status = document.getElementById('mic-status');
|
1279 |
-
|
1280 |
-
if (!isRecording) {
|
1281 |
-
try {
|
1282 |
-
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
1283 |
-
micRecorder = new MediaRecorder(stream);
|
1284 |
-
const chunks = [];
|
1285 |
-
|
1286 |
-
micRecorder.ondataavailable = (e) => chunks.push(e.data);
|
1287 |
-
micRecorder.onstop = async () => {
|
1288 |
-
const blob = new Blob(chunks, { type: 'audio/webm' });
|
1289 |
-
await processAudioBlob(blob, 'mic');
|
1290 |
-
stream.getTracks().forEach(track => track.stop());
|
1291 |
-
};
|
1292 |
-
|
1293 |
-
micRecorder.start();
|
1294 |
-
isRecording = true;
|
1295 |
-
btn.textContent = '녹음 중지';
|
1296 |
-
status.innerHTML = '<div class="recording-indicator"><div class="recording-dot"></div>녹음 중...</div>';
|
1297 |
-
} catch (err) {
|
1298 |
-
showError('마이크 접근 권한이 필요합니다.');
|
1299 |
-
}
|
1300 |
} else {
|
1301 |
-
|
1302 |
-
|
1303 |
-
btn.textContent = '녹음 시작';
|
1304 |
-
status.textContent = '처리 중...';
|
1305 |
-
}
|
1306 |
-
}
|
1307 |
-
|
1308 |
-
// Process audio blob (for microphone recording)
|
1309 |
-
async function processAudioBlob(blob, type) {
|
1310 |
-
const formData = new FormData();
|
1311 |
-
formData.append('audio', blob, 'recording.webm');
|
1312 |
-
|
1313 |
-
try {
|
1314 |
-
const response = await fetch('/whisper/transcribe', {
|
1315 |
-
method: 'POST',
|
1316 |
-
body: formData
|
1317 |
-
});
|
1318 |
-
|
1319 |
-
const result = await response.json();
|
1320 |
-
if (result.error) {
|
1321 |
-
showError(result.error);
|
1322 |
-
} else {
|
1323 |
-
document.getElementById(`${type}-original`).textContent = result.text;
|
1324 |
-
document.getElementById(`${type}-translation`).innerHTML = result.translation.replace(/\n/g, '<br>');
|
1325 |
-
document.getElementById(`${type}-status`).textContent = '';
|
1326 |
-
}
|
1327 |
-
} catch (error) {
|
1328 |
-
showError('처리 중 오류가 발생했습니다.');
|
1329 |
-
document.getElementById(`${type}-status`).textContent = '';
|
1330 |
-
}
|
1331 |
-
}
|
1332 |
-
|
1333 |
-
// Process audio file
|
1334 |
-
async function processAudioFile(file) {
|
1335 |
-
const formData = new FormData();
|
1336 |
-
formData.append('audio', file);
|
1337 |
-
|
1338 |
-
try {
|
1339 |
-
showSuccess('오디오 파일 처리 중...');
|
1340 |
-
const response = await fetch('/whisper/audio', {
|
1341 |
-
method: 'POST',
|
1342 |
-
body: formData
|
1343 |
-
});
|
1344 |
-
|
1345 |
-
const result = await response.json();
|
1346 |
-
if (result.error) {
|
1347 |
-
showError(result.error);
|
1348 |
-
} else {
|
1349 |
-
document.getElementById('audio-original').textContent = result.text;
|
1350 |
-
document.getElementById('audio-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
|
1351 |
-
}
|
1352 |
-
} catch (error) {
|
1353 |
-
showError('오디오 파일 처리 중 오류가 발생했습니다.');
|
1354 |
-
}
|
1355 |
-
}
|
1356 |
-
|
1357 |
-
// Process video file
|
1358 |
-
async function processVideoFile(file) {
|
1359 |
-
const formData = new FormData();
|
1360 |
-
formData.append('video', file);
|
1361 |
-
|
1362 |
-
try {
|
1363 |
-
showSuccess('비디오 파일 처리 중... (시간이 걸릴 수 있습니다)');
|
1364 |
-
const response = await fetch('/whisper/video', {
|
1365 |
-
method: 'POST',
|
1366 |
-
body: formData
|
1367 |
-
});
|
1368 |
-
|
1369 |
-
const result = await response.json();
|
1370 |
-
if (result.error) {
|
1371 |
-
showError(result.error);
|
1372 |
-
} else {
|
1373 |
-
document.getElementById('video-original').textContent = result.text;
|
1374 |
-
document.getElementById('video-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
|
1375 |
-
}
|
1376 |
-
} catch (error) {
|
1377 |
-
showError('비디오 파일 처리 중 오류가 발생했습니다.');
|
1378 |
-
}
|
1379 |
-
}
|
1380 |
-
|
1381 |
-
// Process PDF file
|
1382 |
-
async function processPDFFile(file) {
|
1383 |
-
const formData = new FormData();
|
1384 |
-
formData.append('pdf', file);
|
1385 |
-
formData.append('max_pages', document.getElementById('pdf-max-pages').value);
|
1386 |
-
|
1387 |
-
try {
|
1388 |
-
showSuccess('PDF 파일 처리 중...');
|
1389 |
-
const response = await fetch('/whisper/pdf', {
|
1390 |
-
method: 'POST',
|
1391 |
-
body: formData
|
1392 |
-
});
|
1393 |
-
|
1394 |
-
const result = await response.json();
|
1395 |
-
if (result.error) {
|
1396 |
-
showError(result.error);
|
1397 |
-
} else {
|
1398 |
-
document.getElementById('pdf-original').textContent = result.text;
|
1399 |
-
document.getElementById('pdf-translation').innerHTML = result.translation.replace(/\n/g, '<br>');
|
1400 |
-
}
|
1401 |
-
} catch (error) {
|
1402 |
-
showError('PDF 파일 처리 중 오류가 발생했습니다.');
|
1403 |
-
}
|
1404 |
-
}
|
1405 |
-
|
1406 |
-
// Realtime recording
|
1407 |
-
let realtimeEventSource = null;
|
1408 |
-
|
1409 |
-
async function toggleRealtimeRecording() {
|
1410 |
-
const btn = document.getElementById('realtime-start-btn');
|
1411 |
-
const status = document.getElementById('realtime-status');
|
1412 |
-
const output = document.getElementById('realtime-output');
|
1413 |
-
|
1414 |
-
if (!isRealtimeRecording) {
|
1415 |
-
try {
|
1416 |
-
const response = await fetch('/whisper/realtime/start', {
|
1417 |
-
method: 'POST'
|
1418 |
-
});
|
1419 |
-
|
1420 |
-
const result = await response.json();
|
1421 |
-
if (result.session_id) {
|
1422 |
-
// Start listening for results
|
1423 |
-
realtimeEventSource = new EventSource(`/whisper/realtime/stream?session_id=${result.session_id}`);
|
1424 |
-
|
1425 |
-
realtimeEventSource.onmessage = (event) => {
|
1426 |
-
const data = JSON.parse(event.data);
|
1427 |
-
if (data.timestamp) {
|
1428 |
-
const segment = document.createElement('div');
|
1429 |
-
segment.style.borderBottom = '1px solid #333';
|
1430 |
-
segment.style.paddingBottom = '15px';
|
1431 |
-
segment.style.marginBottom = '15px';
|
1432 |
-
segment.innerHTML = `
|
1433 |
-
<strong>[${data.timestamp}]</strong><br>
|
1434 |
-
<strong>[KO]</strong> ${data.text}<br>
|
1435 |
-
${data.translation.replace(/\n/g, '<br>')}
|
1436 |
-
`;
|
1437 |
-
output.insertBefore(segment, output.firstChild);
|
1438 |
-
}
|
1439 |
-
};
|
1440 |
-
|
1441 |
-
realtimeEventSource.onerror = () => {
|
1442 |
-
stopRealtimeRecording();
|
1443 |
-
showError('실시간 통역 연결이 끊어졌습니다.');
|
1444 |
-
};
|
1445 |
-
|
1446 |
-
// Start recording
|
1447 |
-
realtimeStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
1448 |
-
|
1449 |
-
isRealtimeRecording = true;
|
1450 |
-
btn.textContent = '통역 중지';
|
1451 |
-
status.innerHTML = '<div class="recording-indicator"><div class="recording-dot"></div>실시간 통역 중...</div>';
|
1452 |
-
|
1453 |
-
// Send audio data periodically
|
1454 |
-
startRealtimeAudioCapture(result.session_id);
|
1455 |
-
}
|
1456 |
-
} catch (err) {
|
1457 |
-
showError('실시간 통역을 시작할 수 없습니다.');
|
1458 |
-
}
|
1459 |
-
} else {
|
1460 |
-
stopRealtimeRecording();
|
1461 |
-
}
|
1462 |
-
}
|
1463 |
-
|
1464 |
-
function stopRealtimeRecording() {
|
1465 |
-
if (realtimeEventSource) {
|
1466 |
-
realtimeEventSource.close();
|
1467 |
-
realtimeEventSource = null;
|
1468 |
-
}
|
1469 |
-
|
1470 |
-
if (realtimeStream) {
|
1471 |
-
realtimeStream.getTracks().forEach(track => track.stop());
|
1472 |
-
realtimeStream = null;
|
1473 |
}
|
1474 |
-
|
1475 |
-
isRealtimeRecording = false;
|
1476 |
-
document.getElementById('realtime-start-btn').textContent = '실시간 통역 시작';
|
1477 |
-
document.getElementById('realtime-status').textContent = '';
|
1478 |
-
}
|
1479 |
|
1480 |
-
|
1481 |
-
|
1482 |
-
|
1483 |
-
|
1484 |
-
|
1485 |
-
let audioBuffer = [];
|
1486 |
-
let silenceFrames = 0;
|
1487 |
-
const SILENCE_THRESHOLD = 0.01;
|
1488 |
-
const MIN_BUFFER_SIZE = 16000 * 2; // 2 seconds minimum
|
1489 |
-
const MAX_BUFFER_SIZE = 16000 * 15; // 15 seconds maximum
|
1490 |
-
|
1491 |
-
processor.onaudioprocess = async (e) => {
|
1492 |
-
if (!isRealtimeRecording) return;
|
1493 |
-
|
1494 |
-
const inputData = e.inputBuffer.getChannelData(0);
|
1495 |
-
audioBuffer.push(...inputData);
|
1496 |
-
|
1497 |
-
// Simple voice activity detection
|
1498 |
-
const rms = Math.sqrt(inputData.reduce((sum, val) => sum + val * val, 0) / inputData.length);
|
1499 |
-
|
1500 |
-
if (rms < SILENCE_THRESHOLD) {
|
1501 |
-
silenceFrames++;
|
1502 |
-
} else {
|
1503 |
-
silenceFrames = 0;
|
1504 |
-
}
|
1505 |
-
|
1506 |
-
// Send audio when we have enough silence or max buffer reached
|
1507 |
-
if ((silenceFrames > 20 && audioBuffer.length > MIN_BUFFER_SIZE) ||
|
1508 |
-
audioBuffer.length > MAX_BUFFER_SIZE) {
|
1509 |
-
|
1510 |
-
const audioData = new Float32Array(audioBuffer);
|
1511 |
-
audioBuffer = [];
|
1512 |
-
silenceFrames = 0;
|
1513 |
-
|
1514 |
-
// Convert to 16-bit PCM
|
1515 |
-
const pcmData = new Int16Array(audioData.length);
|
1516 |
-
for (let i = 0; i < audioData.length; i++) {
|
1517 |
-
pcmData[i] = Math.max(-32768, Math.min(32767, audioData[i] * 32768));
|
1518 |
-
}
|
1519 |
-
|
1520 |
-
// Send to server
|
1521 |
-
const formData = new FormData();
|
1522 |
-
formData.append('audio', new Blob([pcmData.buffer], { type: 'audio/pcm' }));
|
1523 |
-
formData.append('session_id', sessionId);
|
1524 |
-
|
1525 |
-
fetch('/whisper/realtime/process', {
|
1526 |
-
method: 'POST',
|
1527 |
-
body: formData
|
1528 |
-
}).catch(err => console.error('Error sending audio:', err));
|
1529 |
-
}
|
1530 |
-
};
|
1531 |
-
|
1532 |
-
source.connect(processor);
|
1533 |
-
processor.connect(audioContext.destination);
|
1534 |
-
}
|
1535 |
-
|
1536 |
-
// Simple initialization
|
1537 |
-
window.onload = function() {
|
1538 |
-
console.log('Page loaded!');
|
1539 |
-
|
1540 |
-
// Web search toggle
|
1541 |
-
document.getElementById('search-toggle').onclick = function() {
|
1542 |
-
webSearchEnabled = !webSearchEnabled;
|
1543 |
-
this.classList.toggle('active', webSearchEnabled);
|
1544 |
-
console.log('Web search:', webSearchEnabled);
|
1545 |
-
};
|
1546 |
-
|
1547 |
-
// Language select
|
1548 |
-
document.getElementById('language-select').onchange = function() {
|
1549 |
-
selectedLanguage = this.value;
|
1550 |
-
console.log('Language:', selectedLanguage);
|
1551 |
-
};
|
1552 |
-
|
1553 |
-
// System prompt
|
1554 |
-
document.getElementById('system-prompt').oninput = function() {
|
1555 |
-
systemPrompt = this.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
|
1556 |
-
};
|
1557 |
-
|
1558 |
-
// Text input enter key
|
1559 |
-
document.getElementById('text-input').onkeypress = function(e) {
|
1560 |
-
if (e.key === 'Enter' && !e.shiftKey) {
|
1561 |
-
e.preventDefault();
|
1562 |
-
sendTextMessage();
|
1563 |
-
}
|
1564 |
-
};
|
1565 |
-
|
1566 |
-
// Send button
|
1567 |
-
document.getElementById('send-button').onclick = sendTextMessage;
|
1568 |
-
document.getElementById('send-button').style.display = 'block';
|
1569 |
-
|
1570 |
-
// Start button
|
1571 |
-
document.getElementById('start-button').onclick = function() {
|
1572 |
-
if (!peerConnection || peerConnection.connectionState !== 'connected') {
|
1573 |
-
setupWebRTC();
|
1574 |
-
} else {
|
1575 |
-
stop();
|
1576 |
-
}
|
1577 |
-
};
|
1578 |
-
|
1579 |
-
// File upload areas
|
1580 |
-
const audioUploadArea = document.getElementById('audio-upload-area');
|
1581 |
-
if (audioUploadArea) {
|
1582 |
-
audioUploadArea.onclick = function() {
|
1583 |
-
document.getElementById('audio-file-input').click();
|
1584 |
-
};
|
1585 |
-
}
|
1586 |
-
|
1587 |
-
const videoUploadArea = document.getElementById('video-upload-area');
|
1588 |
-
if (videoUploadArea) {
|
1589 |
-
videoUploadArea.onclick = function() {
|
1590 |
-
document.getElementById('video-file-input').click();
|
1591 |
-
};
|
1592 |
-
}
|
1593 |
-
|
1594 |
-
const pdfUploadArea = document.getElementById('pdf-upload-area');
|
1595 |
-
if (pdfUploadArea) {
|
1596 |
-
pdfUploadArea.onclick = function() {
|
1597 |
-
document.getElementById('pdf-file-input').click();
|
1598 |
-
};
|
1599 |
-
}
|
1600 |
-
|
1601 |
-
// File input handlers
|
1602 |
-
const audioFileInput = document.getElementById('audio-file-input');
|
1603 |
-
if (audioFileInput) {
|
1604 |
-
audioFileInput.onchange = function(e) {
|
1605 |
-
if (e.target.files[0]) processAudioFile(e.target.files[0]);
|
1606 |
-
};
|
1607 |
-
}
|
1608 |
-
|
1609 |
-
const videoFileInput = document.getElementById('video-file-input');
|
1610 |
-
if (videoFileInput) {
|
1611 |
-
videoFileInput.onchange = function(e) {
|
1612 |
-
if (e.target.files[0]) processVideoFile(e.target.files[0]);
|
1613 |
-
};
|
1614 |
-
}
|
1615 |
-
|
1616 |
-
const pdfFileInput = document.getElementById('pdf-file-input');
|
1617 |
-
if (pdfFileInput) {
|
1618 |
-
pdfFileInput.onchange = function(e) {
|
1619 |
-
if (e.target.files[0]) processPDFFile(e.target.files[0]);
|
1620 |
-
};
|
1621 |
-
}
|
1622 |
-
|
1623 |
-
// Drag and drop handlers
|
1624 |
-
['audio', 'video', 'pdf'].forEach(type => {
|
1625 |
-
const area = document.getElementById(`${type}-upload-area`);
|
1626 |
-
if (area) {
|
1627 |
-
area.ondragover = function(e) {
|
1628 |
-
e.preventDefault();
|
1629 |
-
area.classList.add('drag-over');
|
1630 |
-
};
|
1631 |
-
|
1632 |
-
area.ondragleave = function() {
|
1633 |
-
area.classList.remove('drag-over');
|
1634 |
-
};
|
1635 |
-
|
1636 |
-
area.ondrop = function(e) {
|
1637 |
-
e.preventDefault();
|
1638 |
-
area.classList.remove('drag-over');
|
1639 |
-
const file = e.dataTransfer.files[0];
|
1640 |
-
if (file) {
|
1641 |
-
if (type === 'audio') processAudioFile(file);
|
1642 |
-
else if (type === 'video') processVideoFile(file);
|
1643 |
-
else if (type === 'pdf') processPDFFile(file);
|
1644 |
-
}
|
1645 |
-
};
|
1646 |
-
}
|
1647 |
-
});
|
1648 |
-
};
|
1649 |
</script>
|
1650 |
</body>
|
1651 |
|
1652 |
</html>"""
|
1653 |
|
1654 |
-
# Whisper model loader
|
1655 |
-
def _get_whisper_pipe():
|
1656 |
-
"""Lazy load Whisper pipeline"""
|
1657 |
-
if not hasattr(_get_whisper_pipe, "pipe"):
|
1658 |
-
device = 0 if torch.cuda.is_available() else "cpu"
|
1659 |
-
_get_whisper_pipe.pipe = pipeline(
|
1660 |
-
task="automatic-speech-recognition",
|
1661 |
-
model=WHISPER_MODEL_NAME,
|
1662 |
-
chunk_length_s=30,
|
1663 |
-
device=device,
|
1664 |
-
)
|
1665 |
-
return _get_whisper_pipe.pipe
|
1666 |
-
|
1667 |
-
# Audio helpers for Whisper
|
1668 |
-
def _ensure_16k_whisper(y: np.ndarray, sr: int) -> tuple[np.ndarray, int]:
|
1669 |
-
"""Resample audio to 16kHz for Whisper"""
|
1670 |
-
if sr == WHISPER_SAMPLE_RATE:
|
1671 |
-
return y.astype(np.float32), WHISPER_SAMPLE_RATE
|
1672 |
-
g = np.gcd(sr, WHISPER_SAMPLE_RATE)
|
1673 |
-
y = sps.resample_poly(y, WHISPER_SAMPLE_RATE // g, sr // g).astype(np.float32)
|
1674 |
-
return y, WHISPER_SAMPLE_RATE
|
1675 |
-
|
1676 |
-
def _should_flush_whisper(buffer: np.ndarray, sr: int) -> bool:
|
1677 |
-
"""Check if audio buffer should be flushed for processing"""
|
1678 |
-
dur = len(buffer) / sr
|
1679 |
-
if dur < MIN_SEG_SEC:
|
1680 |
-
return False
|
1681 |
-
tail_len = int(SILENCE_SEC * sr)
|
1682 |
-
tail = buffer[-tail_len:]
|
1683 |
-
rms = np.sqrt(np.mean(tail ** 2)) if len(tail) else 1.0
|
1684 |
-
end_of_sentence = rms < SILENCE_THRESH
|
1685 |
-
return end_of_sentence or dur >= MAX_SEG_SEC
|
1686 |
-
|
1687 |
-
# Translation helper
|
1688 |
-
def _translate_text_4langs(text: str) -> str:
|
1689 |
-
"""Translate text to 4 languages using OpenAI"""
|
1690 |
-
try:
|
1691 |
-
client = openai.OpenAI()
|
1692 |
-
prompt = (
|
1693 |
-
"Translate the following text into English (EN), Chinese (ZH), Thai (TH) and Russian (RU).\n"
|
1694 |
-
"Return ONLY the translations in this format (one per line):\n"
|
1695 |
-
"EN: <english>\nZH: <chinese>\nTH: <thai>\nRU: <russian>\n\n"
|
1696 |
-
f"Text: {text}"
|
1697 |
-
)
|
1698 |
-
|
1699 |
-
response = client.chat.completions.create(
|
1700 |
-
model="gpt-4o-mini",
|
1701 |
-
messages=[
|
1702 |
-
{"role": "system", "content": "You are a professional translator."},
|
1703 |
-
{"role": "user", "content": prompt}
|
1704 |
-
],
|
1705 |
-
temperature=0.7,
|
1706 |
-
max_tokens=512
|
1707 |
-
)
|
1708 |
-
|
1709 |
-
return response.choices[0].message.content.strip()
|
1710 |
-
except Exception as e:
|
1711 |
-
print(f"Translation error: {e}")
|
1712 |
-
return f"Translation error: {str(e)}"
|
1713 |
-
|
1714 |
-
# ffmpeg check
|
1715 |
-
def _check_ffmpeg() -> bool:
|
1716 |
-
try:
|
1717 |
-
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
|
1718 |
-
return True
|
1719 |
-
except Exception:
|
1720 |
-
return False
|
1721 |
-
|
1722 |
-
_HAS_FFMPEG = _check_ffmpeg()
|
1723 |
-
|
1724 |
-
def extract_audio_from_video(video_path: str) -> str:
|
1725 |
-
"""Extract audio from video file"""
|
1726 |
-
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
1727 |
-
tmp.close()
|
1728 |
-
|
1729 |
-
if _HAS_FFMPEG:
|
1730 |
-
cmd = [
|
1731 |
-
"ffmpeg", "-i", video_path, "-vn",
|
1732 |
-
"-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", "-y", tmp.name
|
1733 |
-
]
|
1734 |
-
result = subprocess.run(cmd, capture_output=True)
|
1735 |
-
if result.returncode != 0:
|
1736 |
-
os.unlink(tmp.name)
|
1737 |
-
raise RuntimeError("ffmpeg error extracting audio")
|
1738 |
-
return tmp.name
|
1739 |
-
else:
|
1740 |
-
raise RuntimeError("ffmpeg is required for video processing")
|
1741 |
-
|
1742 |
-
# GPU workers for Whisper
|
1743 |
-
def gpu_transcribe_whisper(audio_path: str) -> str:
|
1744 |
-
"""Transcribe audio using Whisper on GPU"""
|
1745 |
-
pipe = _get_whisper_pipe()
|
1746 |
-
result = pipe(audio_path, batch_size=WHISPER_BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)
|
1747 |
-
return result["text"].strip()
|
1748 |
-
|
1749 |
-
def gpu_asr_translate_whisper(audio: np.ndarray, sr: int) -> str:
|
1750 |
-
"""Transcribe and translate audio for realtime"""
|
1751 |
-
pipe = _get_whisper_pipe()
|
1752 |
-
ko = pipe({"array": audio, "sampling_rate": sr}, batch_size=WHISPER_BATCH_SIZE)["text"].strip()
|
1753 |
-
trans = _translate_text_4langs(ko).replace("\n", "<br>")
|
1754 |
-
ts = datetime.now(SEOUL_TZ).strftime("%Y-%m-%d %H:%M:%S")
|
1755 |
-
return f"[{ts}]<br>[KO] {ko}<br>{trans}<br>{'-'*40}<br>"
|
1756 |
|
1757 |
class BraveSearchClient:
|
1758 |
"""Brave Search API client"""
|
@@ -1803,9 +1056,6 @@ print(f"Search client initialized: {search_client is not None}, API key present:
|
|
1803 |
# Store connection settings
|
1804 |
connection_settings = {}
|
1805 |
|
1806 |
-
# Store realtime sessions
|
1807 |
-
realtime_sessions = {}
|
1808 |
-
|
1809 |
# Initialize OpenAI client for text chat
|
1810 |
client = openai.AsyncOpenAI()
|
1811 |
|
@@ -1890,7 +1140,7 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
|
|
1890 |
|
1891 |
# Call GPT-4o-mini
|
1892 |
response = await client.chat.completions.create(
|
1893 |
-
model="gpt-
|
1894 |
messages=messages,
|
1895 |
temperature=0.7,
|
1896 |
max_tokens=2000
|
@@ -1906,7 +1156,7 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
|
|
1906 |
# Try again with stronger prompt
|
1907 |
messages[-1] = {"role": "user", "content": f"ONLY {SUPPORTED_LANGUAGES.get(target_language, target_language)}, NO KOREAN: {message}"}
|
1908 |
retry_response = await client.chat.completions.create(
|
1909 |
-
model="gpt-
|
1910 |
messages=messages,
|
1911 |
temperature=0.3,
|
1912 |
max_tokens=2000
|
@@ -2465,210 +1715,6 @@ async def outputs(webrtc_id: str):
|
|
2465 |
return StreamingResponse(output_stream(), media_type="text/event-stream")
|
2466 |
|
2467 |
|
2468 |
-
# Whisper endpoints
|
2469 |
-
@app.post("/whisper/transcribe")
|
2470 |
-
async def whisper_transcribe(audio: UploadFile = File(...)):
|
2471 |
-
"""Transcribe audio using Whisper"""
|
2472 |
-
try:
|
2473 |
-
# Save uploaded file temporarily
|
2474 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
|
2475 |
-
content = await audio.read()
|
2476 |
-
tmp.write(content)
|
2477 |
-
tmp_path = tmp.name
|
2478 |
-
|
2479 |
-
# Transcribe
|
2480 |
-
text = await asyncio.get_event_loop().run_in_executor(
|
2481 |
-
whisper_executor, gpu_transcribe_whisper, tmp_path
|
2482 |
-
)
|
2483 |
-
|
2484 |
-
# Translate
|
2485 |
-
translation = _translate_text_4langs(text)
|
2486 |
-
|
2487 |
-
# Clean up
|
2488 |
-
os.unlink(tmp_path)
|
2489 |
-
|
2490 |
-
return {"text": text, "translation": translation}
|
2491 |
-
|
2492 |
-
except Exception as e:
|
2493 |
-
print(f"Whisper transcribe error: {e}")
|
2494 |
-
return {"error": str(e)}
|
2495 |
-
|
2496 |
-
|
2497 |
-
@app.post("/whisper/audio")
|
2498 |
-
async def whisper_audio(audio: UploadFile = File(...)):
|
2499 |
-
"""Process audio file"""
|
2500 |
-
try:
|
2501 |
-
# Save uploaded file temporarily
|
2502 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(audio.filename).suffix) as tmp:
|
2503 |
-
content = await audio.read()
|
2504 |
-
tmp.write(content)
|
2505 |
-
tmp_path = tmp.name
|
2506 |
-
|
2507 |
-
# Transcribe
|
2508 |
-
text = await asyncio.get_event_loop().run_in_executor(
|
2509 |
-
whisper_executor, gpu_transcribe_whisper, tmp_path
|
2510 |
-
)
|
2511 |
-
|
2512 |
-
# Translate
|
2513 |
-
translation = _translate_text_4langs(text)
|
2514 |
-
|
2515 |
-
# Clean up
|
2516 |
-
os.unlink(tmp_path)
|
2517 |
-
|
2518 |
-
return {"text": text, "translation": translation}
|
2519 |
-
|
2520 |
-
except Exception as e:
|
2521 |
-
print(f"Whisper audio error: {e}")
|
2522 |
-
return {"error": str(e)}
|
2523 |
-
|
2524 |
-
|
2525 |
-
@app.post("/whisper/video")
|
2526 |
-
async def whisper_video(video: UploadFile = File(...)):
|
2527 |
-
"""Process video file"""
|
2528 |
-
try:
|
2529 |
-
# Save uploaded file temporarily
|
2530 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(video.filename).suffix) as tmp:
|
2531 |
-
content = await video.read()
|
2532 |
-
tmp.write(content)
|
2533 |
-
tmp_path = tmp.name
|
2534 |
-
|
2535 |
-
# Extract audio
|
2536 |
-
audio_path = await asyncio.get_event_loop().run_in_executor(
|
2537 |
-
None, extract_audio_from_video, tmp_path
|
2538 |
-
)
|
2539 |
-
|
2540 |
-
# Transcribe
|
2541 |
-
text = await asyncio.get_event_loop().run_in_executor(
|
2542 |
-
whisper_executor, gpu_transcribe_whisper, audio_path
|
2543 |
-
)
|
2544 |
-
|
2545 |
-
# Translate
|
2546 |
-
translation = _translate_text_4langs(text)
|
2547 |
-
|
2548 |
-
# Clean up
|
2549 |
-
os.unlink(tmp_path)
|
2550 |
-
os.unlink(audio_path)
|
2551 |
-
|
2552 |
-
return {"text": text, "translation": translation}
|
2553 |
-
|
2554 |
-
except Exception as e:
|
2555 |
-
print(f"Whisper video error: {e}")
|
2556 |
-
return {"error": str(e)}
|
2557 |
-
|
2558 |
-
|
2559 |
-
@app.post("/whisper/pdf")
|
2560 |
-
async def whisper_pdf(pdf: UploadFile = File(...), max_pages: int = Form(10)):
|
2561 |
-
"""Process PDF file"""
|
2562 |
-
try:
|
2563 |
-
# Save uploaded file temporarily
|
2564 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
|
2565 |
-
content = await pdf.read()
|
2566 |
-
tmp.write(content)
|
2567 |
-
tmp_path = tmp.name
|
2568 |
-
|
2569 |
-
# Extract text
|
2570 |
-
extracted = []
|
2571 |
-
with pdfplumber.open(tmp_path) as pdf_doc:
|
2572 |
-
pages = pdf_doc.pages[:max_pages]
|
2573 |
-
for idx, pg in enumerate(pages, start=1):
|
2574 |
-
txt = pg.extract_text() or ""
|
2575 |
-
if txt.strip():
|
2576 |
-
extracted.append(f"[Page {idx}]\n{txt}")
|
2577 |
-
|
2578 |
-
full_text = "\n\n".join(extracted)
|
2579 |
-
|
2580 |
-
# Translate each page
|
2581 |
-
translated = []
|
2582 |
-
for page_text in extracted:
|
2583 |
-
trans = _translate_text_4langs(page_text.split('\n', 1)[1]) # Skip page header
|
2584 |
-
translated.append(page_text.split('\n')[0] + "\n" + trans)
|
2585 |
-
|
2586 |
-
# Clean up
|
2587 |
-
os.unlink(tmp_path)
|
2588 |
-
|
2589 |
-
return {"text": full_text, "translation": "\n\n".join(translated)}
|
2590 |
-
|
2591 |
-
except Exception as e:
|
2592 |
-
print(f"Whisper PDF error: {e}")
|
2593 |
-
return {"error": str(e)}
|
2594 |
-
|
2595 |
-
|
2596 |
-
@app.post("/whisper/realtime/start")
|
2597 |
-
async def whisper_realtime_start():
|
2598 |
-
"""Start realtime transcription session"""
|
2599 |
-
session_id = os.urandom(16).hex()
|
2600 |
-
realtime_sessions[session_id] = {
|
2601 |
-
"buffer": [],
|
2602 |
-
"queue": asyncio.Queue(),
|
2603 |
-
"active": True
|
2604 |
-
}
|
2605 |
-
return {"session_id": session_id}
|
2606 |
-
|
2607 |
-
|
2608 |
-
@app.post("/whisper/realtime/process")
|
2609 |
-
async def whisper_realtime_process(
|
2610 |
-
audio: UploadFile = File(...),
|
2611 |
-
session_id: str = Form(...)
|
2612 |
-
):
|
2613 |
-
"""Process realtime audio chunk"""
|
2614 |
-
if session_id not in realtime_sessions:
|
2615 |
-
return {"error": "Invalid session"}
|
2616 |
-
|
2617 |
-
try:
|
2618 |
-
# Read audio data
|
2619 |
-
content = await audio.read()
|
2620 |
-
audio_array = np.frombuffer(content, dtype=np.int16).astype(np.float32) / 32768.0
|
2621 |
-
|
2622 |
-
# Process in executor
|
2623 |
-
result = await asyncio.get_event_loop().run_in_executor(
|
2624 |
-
whisper_executor, gpu_asr_translate_whisper, audio_array, WHISPER_SAMPLE_RATE
|
2625 |
-
)
|
2626 |
-
|
2627 |
-
# Parse result
|
2628 |
-
lines = result.split('<br>')
|
2629 |
-
timestamp = lines[0].strip('[]') if lines else ""
|
2630 |
-
text = lines[1].replace('[KO]', '').strip() if len(lines) > 1 else ""
|
2631 |
-
translation = '<br>'.join(lines[2:-2]) if len(lines) > 3 else ""
|
2632 |
-
|
2633 |
-
# Queue result
|
2634 |
-
await realtime_sessions[session_id]["queue"].put({
|
2635 |
-
"timestamp": timestamp,
|
2636 |
-
"text": text,
|
2637 |
-
"translation": translation
|
2638 |
-
})
|
2639 |
-
|
2640 |
-
return {"status": "ok"}
|
2641 |
-
|
2642 |
-
except Exception as e:
|
2643 |
-
print(f"Realtime process error: {e}")
|
2644 |
-
return {"error": str(e)}
|
2645 |
-
|
2646 |
-
|
2647 |
-
@app.get("/whisper/realtime/stream")
|
2648 |
-
async def whisper_realtime_stream(session_id: str):
|
2649 |
-
"""Stream realtime results"""
|
2650 |
-
if session_id not in realtime_sessions:
|
2651 |
-
return JSONResponse({"error": "Invalid session"}, status_code=404)
|
2652 |
-
|
2653 |
-
async def stream_results():
|
2654 |
-
session = realtime_sessions[session_id]
|
2655 |
-
try:
|
2656 |
-
while session["active"]:
|
2657 |
-
try:
|
2658 |
-
result = await asyncio.wait_for(session["queue"].get(), timeout=1.0)
|
2659 |
-
yield f"data: {json.dumps(result)}\n\n"
|
2660 |
-
except asyncio.TimeoutError:
|
2661 |
-
yield f"data: {json.dumps({'keepalive': True})}\n\n"
|
2662 |
-
except Exception as e:
|
2663 |
-
print(f"Stream error: {e}")
|
2664 |
-
finally:
|
2665 |
-
# Cleanup session
|
2666 |
-
if session_id in realtime_sessions:
|
2667 |
-
del realtime_sessions[session_id]
|
2668 |
-
|
2669 |
-
return StreamingResponse(stream_results(), media_type="text/event-stream")
|
2670 |
-
|
2671 |
-
|
2672 |
@app.get("/")
|
2673 |
async def index():
|
2674 |
"""Serve the HTML page"""
|
|
|
6 |
import numpy as np
|
7 |
import openai
|
8 |
from dotenv import load_dotenv
|
9 |
+
from fastapi import FastAPI, Request
|
10 |
+
from fastapi.responses import HTMLResponse, StreamingResponse
|
11 |
from fastrtc import (
|
12 |
AdditionalOutputs,
|
13 |
AsyncStreamHandler,
|
|
|
23 |
import io
|
24 |
from scipy import signal
|
25 |
import wave
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
load_dotenv()
|
28 |
|
29 |
SAMPLE_RATE = 24000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
# Supported languages for OpenAI Realtime API
|
32 |
SUPPORTED_LANGUAGES = {
|
|
|
61 |
"ms": "Bahasa Melayu (Malay)"
|
62 |
}
|
63 |
|
64 |
+
# HTML content embedded as a string
|
65 |
HTML_CONTENT = """<!DOCTYPE html>
|
66 |
<html lang="ko">
|
67 |
|
68 |
<head>
|
69 |
<meta charset="UTF-8">
|
70 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
71 |
+
<title>Mouth of 'MOUSE'</title>
|
72 |
<style>
|
73 |
:root {
|
74 |
--primary-color: #6f42c1;
|
|
|
110 |
flex-shrink: 0;
|
111 |
background-color: var(--card-bg);
|
112 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
.main-content {
|
114 |
display: flex;
|
115 |
gap: 20px;
|
|
|
219 |
.text-input-section {
|
220 |
margin-top: 15px;
|
221 |
}
|
222 |
+
input[type="text"], textarea {
|
223 |
width: 100%;
|
224 |
background-color: var(--dark-bg);
|
225 |
color: var(--text-color);
|
|
|
318 |
flex-shrink: 0;
|
319 |
padding-top: 20px;
|
320 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
/* Responsive design */
|
322 |
@media (max-width: 1024px) {
|
323 |
.sidebar {
|
324 |
width: 300px;
|
325 |
}
|
|
|
|
|
|
|
326 |
}
|
327 |
@media (max-width: 768px) {
|
328 |
.main-content {
|
|
|
335 |
.chat-section {
|
336 |
height: 400px;
|
337 |
}
|
|
|
|
|
|
|
338 |
}
|
339 |
button {
|
340 |
background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
|
|
|
432 |
background-color: #ff9800;
|
433 |
color: white;
|
434 |
}
|
|
|
|
|
|
|
|
|
435 |
.status-indicator {
|
436 |
display: inline-flex;
|
437 |
align-items: center;
|
|
|
508 |
<div class="mouse-ears mouse-ear-right"></div>
|
509 |
<div class="mouse-face"></div>
|
510 |
</div>
|
511 |
+
<h1>MOUSE 음성 챗</h1>
|
512 |
</div>
|
513 |
<div class="status-indicator">
|
514 |
<div id="status-dot" class="status-dot disconnected"></div>
|
|
|
516 |
</div>
|
517 |
</div>
|
518 |
|
519 |
+
<div class="main-content">
|
520 |
+
<div class="sidebar">
|
521 |
+
<div class="settings-section">
|
522 |
+
<h3 style="margin: 0 0 15px 0; color: var(--primary-color);">설정(텍스트 채팅에만 적용)</h3>
|
523 |
+
<div class="settings-grid">
|
524 |
+
<div class="setting-item">
|
525 |
+
<span class="setting-label">웹 검색</span>
|
526 |
+
<div id="search-toggle" class="toggle-switch">
|
527 |
+
<div class="toggle-slider"></div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
</div>
|
529 |
</div>
|
530 |
+
<div class="setting-item">
|
531 |
+
<span class="setting-label">다국어 번역 채팅</span>
|
532 |
+
<select id="language-select">
|
533 |
+
<option value="">비활성화</option>
|
534 |
+
<option value="ko">한국어 (Korean)</option>
|
535 |
+
<option value="en">English</option>
|
536 |
+
<option value="es">Español (Spanish)</option>
|
537 |
+
<option value="fr">Français (French)</option>
|
538 |
+
<option value="de">Deutsch (German)</option>
|
539 |
+
<option value="it">Italiano (Italian)</option>
|
540 |
+
<option value="pt">Português (Portuguese)</option>
|
541 |
+
<option value="ru">Русский (Russian)</option>
|
542 |
+
<option value="ja">日本語 (Japanese)</option>
|
543 |
+
<option value="zh">中文 (Chinese)</option>
|
544 |
+
<option value="ar">العربية (Arabic)</option>
|
545 |
+
<option value="hi">हिन्दी (Hindi)</option>
|
546 |
+
<option value="nl">Nederlands (Dutch)</option>
|
547 |
+
<option value="pl">Polski (Polish)</option>
|
548 |
+
<option value="tr">Türkçe (Turkish)</option>
|
549 |
+
<option value="vi">Tiếng Việt (Vietnamese)</option>
|
550 |
+
<option value="th">ไทย (Thai)</option>
|
551 |
+
<option value="id">Bahasa Indonesia</option>
|
552 |
+
<option value="sv">Svenska (Swedish)</option>
|
553 |
+
<option value="da">Dansk (Danish)</option>
|
554 |
+
<option value="no">Norsk (Norwegian)</option>
|
555 |
+
<option value="fi">Suomi (Finnish)</option>
|
556 |
+
<option value="he">עברית (Hebrew)</option>
|
557 |
+
<option value="uk">Українська (Ukrainian)</option>
|
558 |
+
<option value="cs">Čeština (Czech)</option>
|
559 |
+
<option value="el">Ελληνικά (Greek)</option>
|
560 |
+
<option value="ro">Română (Romanian)</option>
|
561 |
+
<option value="hu">Magyar (Hungarian)</option>
|
562 |
+
<option value="ms">Bahasa Melayu (Malay)</option>
|
563 |
+
</select>
|
564 |
</div>
|
565 |
</div>
|
566 |
+
<div class="text-input-section">
|
567 |
+
<label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
|
568 |
+
<textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
|
569 |
</div>
|
570 |
</div>
|
571 |
|
572 |
+
<div class="controls">
|
573 |
+
<button id="start-button">대화 시작</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
574 |
</div>
|
575 |
</div>
|
576 |
+
|
577 |
+
<div class="chat-section">
|
578 |
+
<div class="chat-container">
|
579 |
+
<h3 style="margin: 0 0 15px 0; color: var(--primary-color);">대화</h3>
|
580 |
+
<div class="chat-messages" id="chat-messages"></div>
|
581 |
+
<div class="text-input-section" style="margin-top: 10px;">
|
582 |
+
<div style="display: flex; gap: 10px;">
|
583 |
+
<input type="text" id="text-input" placeholder="텍스트 메시지를 입력하세요..." style="flex-grow: 1;" />
|
584 |
+
<button id="send-button" style="display: none;">전송</button>
|
585 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
586 |
</div>
|
587 |
</div>
|
588 |
</div>
|
589 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
</div>
|
591 |
<audio id="audio-output"></audio>
|
592 |
|
593 |
<script>
|
594 |
+
let peerConnection;
|
595 |
+
let webrtc_id;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
let webSearchEnabled = false;
|
597 |
let selectedLanguage = "";
|
598 |
let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
|
599 |
+
const audioOutput = document.getElementById('audio-output');
|
600 |
+
const startButton = document.getElementById('start-button');
|
601 |
+
const sendButton = document.getElementById('send-button');
|
602 |
+
const chatMessages = document.getElementById('chat-messages');
|
603 |
+
const statusDot = document.getElementById('status-dot');
|
604 |
+
const statusText = document.getElementById('status-text');
|
605 |
+
const searchToggle = document.getElementById('search-toggle');
|
606 |
+
const languageSelect = document.getElementById('language-select');
|
607 |
+
const systemPromptInput = document.getElementById('system-prompt');
|
608 |
+
const textInput = document.getElementById('text-input');
|
609 |
let audioLevel = 0;
|
610 |
+
let animationFrame;
|
611 |
+
let audioContext, analyser, audioSource;
|
|
|
|
|
612 |
let dataChannel = null;
|
613 |
let isVoiceActive = false;
|
614 |
|
615 |
+
// Web search toggle functionality
|
616 |
+
searchToggle.addEventListener('click', () => {
|
617 |
+
webSearchEnabled = !webSearchEnabled;
|
618 |
+
searchToggle.classList.toggle('active', webSearchEnabled);
|
619 |
+
console.log('Web search enabled:', webSearchEnabled);
|
620 |
+
});
|
621 |
+
|
622 |
+
// Language selection
|
623 |
+
languageSelect.addEventListener('change', () => {
|
624 |
+
selectedLanguage = languageSelect.value;
|
625 |
+
console.log('Selected language:', selectedLanguage);
|
626 |
+
});
|
627 |
+
|
628 |
+
// System prompt update
|
629 |
+
systemPromptInput.addEventListener('input', () => {
|
630 |
+
systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
|
631 |
+
});
|
632 |
+
|
633 |
+
// Text input handling
|
634 |
+
textInput.addEventListener('keypress', (e) => {
|
635 |
+
if (e.key === 'Enter' && !e.shiftKey) {
|
636 |
+
e.preventDefault();
|
637 |
+
sendTextMessage();
|
638 |
+
}
|
639 |
+
});
|
640 |
+
|
641 |
+
sendButton.addEventListener('click', sendTextMessage);
|
642 |
|
643 |
async function sendTextMessage() {
|
|
|
|
|
644 |
const message = textInput.value.trim();
|
645 |
if (!message) return;
|
646 |
|
|
|
694 |
}
|
695 |
|
696 |
function updateStatus(state) {
|
|
|
|
|
|
|
|
|
697 |
statusDot.className = 'status-dot ' + state;
|
698 |
if (state === 'connected') {
|
699 |
statusText.textContent = '연결됨';
|
700 |
+
sendButton.style.display = 'block';
|
701 |
isVoiceActive = true;
|
702 |
} else if (state === 'connecting') {
|
703 |
statusText.textContent = '연결 중...';
|
704 |
+
sendButton.style.display = 'none';
|
705 |
} else {
|
706 |
statusText.textContent = '연결 대기 중';
|
707 |
+
sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
|
708 |
isVoiceActive = false;
|
709 |
}
|
710 |
}
|
|
|
711 |
function updateButtonState() {
|
712 |
const button = document.getElementById('start-button');
|
713 |
if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
|
|
|
737 |
updateStatus('disconnected');
|
738 |
}
|
739 |
}
|
|
|
740 |
function setupAudioVisualization(stream) {
|
741 |
audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
742 |
analyser = audioContext.createAnalyser();
|
|
|
771 |
|
772 |
updateAudioLevel();
|
773 |
}
|
|
|
774 |
function showError(message) {
|
775 |
const toast = document.getElementById('error-toast');
|
776 |
toast.textContent = message;
|
|
|
780 |
toast.style.display = 'none';
|
781 |
}, 5000);
|
782 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
783 |
async function setupWebRTC() {
|
784 |
+
const config = __RTC_CONFIGURATION__;
|
|
|
785 |
peerConnection = new RTCPeerConnection(config);
|
786 |
const timeoutId = setTimeout(() => {
|
787 |
const toast = document.getElementById('error-toast');
|
|
|
845 |
});
|
846 |
webrtc_id = Math.random().toString(36).substring(7);
|
847 |
|
848 |
+
// Log current settings before sending
|
849 |
+
console.log('Sending offer with settings:', {
|
850 |
+
webrtc_id: webrtc_id,
|
851 |
+
web_search_enabled: webSearchEnabled,
|
852 |
+
target_language: selectedLanguage,
|
853 |
+
system_prompt: systemPrompt
|
854 |
+
});
|
855 |
+
|
856 |
const response = await fetch('/webrtc/offer', {
|
857 |
method: 'POST',
|
858 |
headers: { 'Content-Type': 'application/json' },
|
|
|
897 |
stop();
|
898 |
}
|
899 |
}
|
|
|
900 |
function addMessage(role, content) {
|
|
|
901 |
const messageDiv = document.createElement('div');
|
902 |
messageDiv.classList.add('message', role);
|
903 |
|
|
|
986 |
webrtc_id = null;
|
987 |
}
|
988 |
}
|
989 |
+
startButton.addEventListener('click', () => {
|
990 |
+
console.log('clicked');
|
991 |
+
console.log(peerConnection, peerConnection?.connectionState);
|
992 |
+
if (!peerConnection || peerConnection.connectionState !== 'connected') {
|
993 |
+
setupWebRTC();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
994 |
} else {
|
995 |
+
console.log('stopping');
|
996 |
+
stop();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
997 |
}
|
998 |
+
});
|
|
|
|
|
|
|
|
|
999 |
|
1000 |
+
// Initialize on page load
|
1001 |
+
window.addEventListener('DOMContentLoaded', () => {
|
1002 |
+
sendButton.style.display = 'block';
|
1003 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1004 |
</script>
|
1005 |
</body>
|
1006 |
|
1007 |
</html>"""
|
1008 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1009 |
|
1010 |
class BraveSearchClient:
|
1011 |
"""Brave Search API client"""
|
|
|
1056 |
# Store connection settings
|
1057 |
connection_settings = {}
|
1058 |
|
|
|
|
|
|
|
1059 |
# Initialize OpenAI client for text chat
|
1060 |
client = openai.AsyncOpenAI()
|
1061 |
|
|
|
1140 |
|
1141 |
# Call GPT-4o-mini
|
1142 |
response = await client.chat.completions.create(
|
1143 |
+
model="gpt-4.1-mini",
|
1144 |
messages=messages,
|
1145 |
temperature=0.7,
|
1146 |
max_tokens=2000
|
|
|
1156 |
# Try again with stronger prompt
|
1157 |
messages[-1] = {"role": "user", "content": f"ONLY {SUPPORTED_LANGUAGES.get(target_language, target_language)}, NO KOREAN: {message}"}
|
1158 |
retry_response = await client.chat.completions.create(
|
1159 |
+
model="gpt-4.1-mini",
|
1160 |
messages=messages,
|
1161 |
temperature=0.3,
|
1162 |
max_tokens=2000
|
|
|
1715 |
return StreamingResponse(output_stream(), media_type="text/event-stream")
|
1716 |
|
1717 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1718 |
@app.get("/")
|
1719 |
async def index():
|
1720 |
"""Serve the HTML page"""
|