Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -162,20 +162,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
162 |
gap: 15px;
|
163 |
margin-bottom: 15px;
|
164 |
}
|
165 |
-
.interpretation-section {
|
166 |
-
display: flex;
|
167 |
-
flex-direction: column;
|
168 |
-
gap: 15px;
|
169 |
-
padding: 15px;
|
170 |
-
background-color: var(--dark-bg);
|
171 |
-
border-radius: 8px;
|
172 |
-
margin-top: 15px;
|
173 |
-
}
|
174 |
-
.interpretation-info {
|
175 |
-
font-size: 13px;
|
176 |
-
color: #999;
|
177 |
-
margin-top: 5px;
|
178 |
-
}
|
179 |
.setting-item {
|
180 |
display: flex;
|
181 |
align-items: center;
|
@@ -252,46 +238,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
252 |
resize: vertical;
|
253 |
min-height: 80px;
|
254 |
}
|
255 |
-
/* Multi-language selection */
|
256 |
-
.language-selection-grid {
|
257 |
-
display: grid;
|
258 |
-
grid-template-columns: repeat(2, 1fr);
|
259 |
-
gap: 10px;
|
260 |
-
margin-top: 10px;
|
261 |
-
max-height: 200px;
|
262 |
-
overflow-y: auto;
|
263 |
-
padding: 10px;
|
264 |
-
background-color: var(--dark-bg);
|
265 |
-
border-radius: 6px;
|
266 |
-
border: 1px solid var(--border-color);
|
267 |
-
}
|
268 |
-
.language-checkbox {
|
269 |
-
display: flex;
|
270 |
-
align-items: center;
|
271 |
-
gap: 8px;
|
272 |
-
font-size: 13px;
|
273 |
-
cursor: pointer;
|
274 |
-
padding: 5px;
|
275 |
-
border-radius: 4px;
|
276 |
-
transition: background-color 0.2s;
|
277 |
-
}
|
278 |
-
.language-checkbox:hover {
|
279 |
-
background-color: rgba(111, 66, 193, 0.1);
|
280 |
-
}
|
281 |
-
.language-checkbox input[type="checkbox"] {
|
282 |
-
width: 16px;
|
283 |
-
height: 16px;
|
284 |
-
cursor: pointer;
|
285 |
-
}
|
286 |
-
.language-checkbox.default {
|
287 |
-
font-weight: 500;
|
288 |
-
color: var(--primary-color);
|
289 |
-
}
|
290 |
-
.selected-languages {
|
291 |
-
margin-top: 10px;
|
292 |
-
font-size: 12px;
|
293 |
-
color: #999;
|
294 |
-
}
|
295 |
.chat-container {
|
296 |
border-radius: 12px;
|
297 |
background-color: var(--card-bg);
|
@@ -358,63 +304,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
358 |
padding: 10px;
|
359 |
margin-bottom: 10px;
|
360 |
}
|
361 |
-
.message.assistant.interpretation {
|
362 |
-
background: linear-gradient(135deg, #1a5a3e, #2e7d32);
|
363 |
-
font-style: normal;
|
364 |
-
}
|
365 |
-
.interpretation-result {
|
366 |
-
background: linear-gradient(135deg, #1e3a5f, #2c5282);
|
367 |
-
padding: 15px;
|
368 |
-
margin: 10px 0;
|
369 |
-
border-radius: 8px;
|
370 |
-
border: 1px solid rgba(66, 153, 225, 0.3);
|
371 |
-
}
|
372 |
-
.interpretation-header {
|
373 |
-
font-weight: bold;
|
374 |
-
color: #90cdf4;
|
375 |
-
margin-bottom: 10px;
|
376 |
-
display: flex;
|
377 |
-
align-items: center;
|
378 |
-
gap: 10px;
|
379 |
-
}
|
380 |
-
.interpretation-original {
|
381 |
-
font-size: 14px;
|
382 |
-
color: #cbd5e0;
|
383 |
-
margin-bottom: 15px;
|
384 |
-
padding: 10px;
|
385 |
-
background-color: rgba(0, 0, 0, 0.2);
|
386 |
-
border-radius: 4px;
|
387 |
-
}
|
388 |
-
.interpretation-translations {
|
389 |
-
display: flex;
|
390 |
-
flex-direction: column;
|
391 |
-
gap: 8px;
|
392 |
-
}
|
393 |
-
.translation-item {
|
394 |
-
display: flex;
|
395 |
-
align-items: baseline;
|
396 |
-
gap: 10px;
|
397 |
-
padding: 8px 12px;
|
398 |
-
background-color: rgba(255, 255, 255, 0.05);
|
399 |
-
border-radius: 4px;
|
400 |
-
border-left: 3px solid var(--primary-color);
|
401 |
-
}
|
402 |
-
.translation-lang {
|
403 |
-
font-weight: 500;
|
404 |
-
color: var(--primary-color);
|
405 |
-
min-width: 80px;
|
406 |
-
font-size: 13px;
|
407 |
-
}
|
408 |
-
.translation-text {
|
409 |
-
flex: 1;
|
410 |
-
color: var(--text-color);
|
411 |
-
font-size: 14px;
|
412 |
-
}
|
413 |
-
.interpretation-arrow {
|
414 |
-
color: #4caf50;
|
415 |
-
font-weight: bold;
|
416 |
-
margin: 0 10px;
|
417 |
-
}
|
418 |
.language-info {
|
419 |
font-size: 12px;
|
420 |
color: #888;
|
@@ -639,7 +528,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
639 |
</div>
|
640 |
</div>
|
641 |
<div class="setting-item">
|
642 |
-
<span class="setting-label"
|
643 |
<select id="language-select">
|
644 |
<option value="">비활성화</option>
|
645 |
<option value="ko">한국어 (Korean)</option>
|
@@ -674,30 +563,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
674 |
</select>
|
675 |
</div>
|
676 |
</div>
|
677 |
-
<div class="interpretation-section">
|
678 |
-
<div class="setting-item">
|
679 |
-
<span class="setting-label">자동 통역</span>
|
680 |
-
<div id="interpretation-toggle" class="toggle-switch">
|
681 |
-
<div class="toggle-slider"></div>
|
682 |
-
</div>
|
683 |
-
</div>
|
684 |
-
<div id="interpretation-languages-container" style="display: none;">
|
685 |
-
<div class="setting-label" style="margin-bottom: 5px;">통역 언어 선택 (최대 4개)</div>
|
686 |
-
<div class="language-selection-grid" id="language-selection-grid">
|
687 |
-
<!-- Languages will be populated by JavaScript -->
|
688 |
-
</div>
|
689 |
-
<div class="selected-languages" id="selected-languages-display">
|
690 |
-
선택된 언어: 없음
|
691 |
-
</div>
|
692 |
-
</div>
|
693 |
-
</div>
|
694 |
-
<div class="interpretation-info" id="interpretation-info" style="display: none;">
|
695 |
-
<strong>통역 모드 안내:</strong><br>
|
696 |
-
• 음성으로 말하면 선택한 언어들로 자동 통역됩니다<br>
|
697 |
-
• Whisper + GPT-4o-mini를 사용합니다<br>
|
698 |
-
• 말을 마치고 잠시 기다리면 통역이 시작됩니다<br>
|
699 |
-
• 번역된 텍스트만 화면에 표시됩니다
|
700 |
-
</div>
|
701 |
<div class="text-input-section">
|
702 |
<label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
|
703 |
<textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
|
@@ -730,8 +595,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
730 |
let webrtc_id;
|
731 |
let webSearchEnabled = false;
|
732 |
let selectedLanguage = "";
|
733 |
-
let interpretationMode = false;
|
734 |
-
let interpretationLanguages = [];
|
735 |
let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
|
736 |
const audioOutput = document.getElementById('audio-output');
|
737 |
const startButton = document.getElementById('start-button');
|
@@ -741,113 +604,14 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
741 |
const statusText = document.getElementById('status-text');
|
742 |
const searchToggle = document.getElementById('search-toggle');
|
743 |
const languageSelect = document.getElementById('language-select');
|
744 |
-
const interpretationToggle = document.getElementById('interpretation-toggle');
|
745 |
-
const interpretationLanguagesContainer = document.getElementById('interpretation-languages-container');
|
746 |
-
const interpretationInfo = document.getElementById('interpretation-info');
|
747 |
const systemPromptInput = document.getElementById('system-prompt');
|
748 |
const textInput = document.getElementById('text-input');
|
749 |
-
const languageSelectionGrid = document.getElementById('language-selection-grid');
|
750 |
-
const selectedLanguagesDisplay = document.getElementById('selected-languages-display');
|
751 |
let audioLevel = 0;
|
752 |
let animationFrame;
|
753 |
let audioContext, analyser, audioSource;
|
754 |
let dataChannel = null;
|
755 |
let isVoiceActive = false;
|
756 |
|
757 |
-
// Available languages for interpretation
|
758 |
-
const INTERPRETATION_LANGUAGES = {
|
759 |
-
"en": { name: "English", default: true },
|
760 |
-
"zh": { name: "中文 (Chinese)", default: true },
|
761 |
-
"th": { name: "ไทย (Thai)", default: true },
|
762 |
-
"ru": { name: "Русский (Russian)", default: true },
|
763 |
-
"ja": { name: "日本語 (Japanese)", default: false },
|
764 |
-
"es": { name: "Español (Spanish)", default: false },
|
765 |
-
"fr": { name: "Français (French)", default: false },
|
766 |
-
"de": { name: "Deutsch (German)", default: false },
|
767 |
-
"pt": { name: "Português (Portuguese)", default: false },
|
768 |
-
"ar": { name: "العربية (Arabic)", default: false },
|
769 |
-
"hi": { name: "हिन्दी (Hindi)", default: false },
|
770 |
-
"vi": { name: "Tiếng Việt (Vietnamese)", default: false },
|
771 |
-
"id": { name: "Bahasa Indonesia", default: false },
|
772 |
-
"it": { name: "Italiano (Italian)", default: false },
|
773 |
-
"nl": { name: "Nederlands (Dutch)", default: false },
|
774 |
-
"pl": { name: "Polski (Polish)", default: false },
|
775 |
-
"tr": { name: "Türkçe (Turkish)", default: false },
|
776 |
-
"sv": { name: "Svenska (Swedish)", default: false },
|
777 |
-
"da": { name: "Dansk (Danish)", default: false },
|
778 |
-
"no": { name: "Norsk (Norwegian)", default: false },
|
779 |
-
"fi": { name: "Suomi (Finnish)", default: false },
|
780 |
-
"he": { name: "עברית (Hebrew)", default: false },
|
781 |
-
"uk": { name: "Українська (Ukrainian)", default: false },
|
782 |
-
"cs": { name: "Čeština (Czech)", default: false },
|
783 |
-
"el": { name: "Ελληνικά (Greek)", default: false },
|
784 |
-
"ro": { name: "Română (Romanian)", default: false },
|
785 |
-
"hu": { name: "Magyar (Hungarian)", default: false },
|
786 |
-
"ms": { name: "Bahasa Melayu (Malay)", default: false }
|
787 |
-
};
|
788 |
-
|
789 |
-
// Initialize language selection grid
|
790 |
-
function initializeLanguageSelection() {
|
791 |
-
languageSelectionGrid.innerHTML = '';
|
792 |
-
|
793 |
-
// Sort languages: defaults first, then alphabetically
|
794 |
-
const sortedLanguages = Object.entries(INTERPRETATION_LANGUAGES).sort((a, b) => {
|
795 |
-
if (a[1].default && !b[1].default) return -1;
|
796 |
-
if (!a[1].default && b[1].default) return 1;
|
797 |
-
return a[1].name.localeCompare(b[1].name);
|
798 |
-
});
|
799 |
-
|
800 |
-
sortedLanguages.forEach(([code, lang]) => {
|
801 |
-
const label = document.createElement('label');
|
802 |
-
label.className = 'language-checkbox' + (lang.default ? ' default' : '');
|
803 |
-
|
804 |
-
const checkbox = document.createElement('input');
|
805 |
-
checkbox.type = 'checkbox';
|
806 |
-
checkbox.value = code;
|
807 |
-
checkbox.checked = lang.default;
|
808 |
-
checkbox.addEventListener('change', onLanguageCheckboxChange);
|
809 |
-
|
810 |
-
const text = document.createElement('span');
|
811 |
-
text.textContent = lang.name;
|
812 |
-
|
813 |
-
label.appendChild(checkbox);
|
814 |
-
label.appendChild(text);
|
815 |
-
languageSelectionGrid.appendChild(label);
|
816 |
-
});
|
817 |
-
|
818 |
-
// Initialize with default languages
|
819 |
-
updateSelectedLanguages();
|
820 |
-
}
|
821 |
-
|
822 |
-
function onLanguageCheckboxChange() {
|
823 |
-
const checkedBoxes = languageSelectionGrid.querySelectorAll('input[type="checkbox"]:checked');
|
824 |
-
|
825 |
-
// Limit to 4 languages
|
826 |
-
if (checkedBoxes.length > 4) {
|
827 |
-
this.checked = false;
|
828 |
-
showError('최대 4개 언어까지 선택할 수 있습니다.');
|
829 |
-
return;
|
830 |
-
}
|
831 |
-
|
832 |
-
updateSelectedLanguages();
|
833 |
-
}
|
834 |
-
|
835 |
-
function updateSelectedLanguages() {
|
836 |
-
const checkedBoxes = languageSelectionGrid.querySelectorAll('input[type="checkbox"]:checked');
|
837 |
-
interpretationLanguages = Array.from(checkedBoxes).map(cb => cb.value);
|
838 |
-
|
839 |
-
if (interpretationLanguages.length === 0) {
|
840 |
-
selectedLanguagesDisplay.textContent = '선택된 언어: 없음';
|
841 |
-
} else {
|
842 |
-
const langNames = interpretationLanguages.map(code =>
|
843 |
-
INTERPRETATION_LANGUAGES[code].name
|
844 |
-
).join(', ');
|
845 |
-
selectedLanguagesDisplay.textContent = `선택된 언어 (${interpretationLanguages.length}/4): ${langNames}`;
|
846 |
-
}
|
847 |
-
|
848 |
-
console.log('Selected interpretation languages:', interpretationLanguages);
|
849 |
-
}
|
850 |
-
|
851 |
// Web search toggle functionality
|
852 |
searchToggle.addEventListener('click', () => {
|
853 |
webSearchEnabled = !webSearchEnabled;
|
@@ -861,90 +625,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
861 |
console.log('Selected language:', selectedLanguage);
|
862 |
});
|
863 |
|
864 |
-
// Interpretation mode toggle - 수정된 버전
|
865 |
-
interpretationToggle.addEventListener('click', async () => {
|
866 |
-
if (!interpretationMode) {
|
867 |
-
// Turning ON interpretation mode
|
868 |
-
interpretationLanguagesContainer.style.display = 'block';
|
869 |
-
interpretationInfo.style.display = 'block';
|
870 |
-
|
871 |
-
// Check if any languages are selected
|
872 |
-
if (interpretationLanguages.length === 0) {
|
873 |
-
showError('통역 언어를 선택해주세요.');
|
874 |
-
interpretationToggle.classList.remove('active');
|
875 |
-
return;
|
876 |
-
}
|
877 |
-
|
878 |
-
// Enable interpretation mode
|
879 |
-
interpretationMode = true;
|
880 |
-
interpretationToggle.classList.add('active');
|
881 |
-
|
882 |
-
// Disable other features
|
883 |
-
languageSelect.value = '';
|
884 |
-
selectedLanguage = '';
|
885 |
-
languageSelect.disabled = true;
|
886 |
-
searchToggle.classList.remove('active');
|
887 |
-
webSearchEnabled = false;
|
888 |
-
searchToggle.style.opacity = '0.5';
|
889 |
-
searchToggle.style.pointerEvents = 'none';
|
890 |
-
textInput.disabled = true;
|
891 |
-
textInput.placeholder = '통역 모드에서는 텍스트 입력이 지원되지 않습니다';
|
892 |
-
sendButton.style.display = 'none';
|
893 |
-
|
894 |
-
console.log('[FRONTEND] Interpretation mode enabled with languages:', interpretationLanguages);
|
895 |
-
|
896 |
-
// If connected, stop and restart with interpretation mode
|
897 |
-
if (peerConnection && peerConnection.connectionState === 'connected') {
|
898 |
-
console.log('[FRONTEND] Stopping current connection for interpretation mode');
|
899 |
-
showError('통역 모드 설정을 적용하기 위해 연결을 다시 시작합니다.');
|
900 |
-
|
901 |
-
// Force stop the connection
|
902 |
-
stop();
|
903 |
-
|
904 |
-
// Wait a bit longer to ensure cleanup
|
905 |
-
await new Promise(resolve => setTimeout(resolve, 1000));
|
906 |
-
|
907 |
-
// Start new connection with interpretation mode
|
908 |
-
console.log('[FRONTEND] Starting new connection with interpretation mode');
|
909 |
-
setupWebRTC();
|
910 |
-
}
|
911 |
-
} else {
|
912 |
-
// Turning OFF interpretation mode
|
913 |
-
interpretationMode = false;
|
914 |
-
interpretationToggle.classList.remove('active');
|
915 |
-
interpretationLanguagesContainer.style.display = 'none';
|
916 |
-
interpretationInfo.style.display = 'none';
|
917 |
-
|
918 |
-
// Re-enable other features
|
919 |
-
languageSelect.disabled = false;
|
920 |
-
searchToggle.style.opacity = '1';
|
921 |
-
searchToggle.style.pointerEvents = 'auto';
|
922 |
-
textInput.disabled = false;
|
923 |
-
textInput.placeholder = '텍스트 메시지를 입력하세요...';
|
924 |
-
sendButton.style.display = 'block';
|
925 |
-
|
926 |
-
console.log('[FRONTEND] Interpretation mode disabled');
|
927 |
-
|
928 |
-
// If connected, restart to apply normal mode
|
929 |
-
if (peerConnection && peerConnection.connectionState === 'connected') {
|
930 |
-
console.log('[FRONTEND] Stopping current connection for normal mode');
|
931 |
-
showError('일반 모드로 전환하기 위해 연결을 다시 시작합니다.');
|
932 |
-
|
933 |
-
// Force stop the connection
|
934 |
-
stop();
|
935 |
-
|
936 |
-
// Wait a bit longer to ensure cleanup
|
937 |
-
await new Promise(resolve => setTimeout(resolve, 1000));
|
938 |
-
|
939 |
-
// Start new connection with normal mode
|
940 |
-
console.log('[FRONTEND] Starting new connection with normal mode');
|
941 |
-
setupWebRTC();
|
942 |
-
}
|
943 |
-
}
|
944 |
-
|
945 |
-
console.log('[FRONTEND] Final interpretation mode:', interpretationMode);
|
946 |
-
});
|
947 |
-
|
948 |
// System prompt update
|
949 |
systemPromptInput.addEventListener('input', () => {
|
950 |
systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
|
@@ -964,12 +644,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
964 |
const message = textInput.value.trim();
|
965 |
if (!message) return;
|
966 |
|
967 |
-
// Don't allow text messages in interpretation mode
|
968 |
-
if (interpretationMode) {
|
969 |
-
showError('통역 모드에서는 텍스트 입력이 지원되지 않습니다.');
|
970 |
-
return;
|
971 |
-
}
|
972 |
-
|
973 |
// Add user message to chat
|
974 |
addMessage('user', message);
|
975 |
textInput.value = '';
|
@@ -1023,18 +697,14 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1023 |
statusDot.className = 'status-dot ' + state;
|
1024 |
if (state === 'connected') {
|
1025 |
statusText.textContent = '연결됨';
|
1026 |
-
|
1027 |
-
sendButton.style.display = 'block';
|
1028 |
-
}
|
1029 |
isVoiceActive = true;
|
1030 |
} else if (state === 'connecting') {
|
1031 |
statusText.textContent = '연결 중...';
|
1032 |
sendButton.style.display = 'none';
|
1033 |
} else {
|
1034 |
statusText.textContent = '연결 대기 중';
|
1035 |
-
|
1036 |
-
sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
|
1037 |
-
}
|
1038 |
isVoiceActive = false;
|
1039 |
}
|
1040 |
}
|
@@ -1180,9 +850,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1180 |
webrtc_id: webrtc_id,
|
1181 |
web_search_enabled: webSearchEnabled,
|
1182 |
target_language: selectedLanguage,
|
1183 |
-
system_prompt: systemPrompt
|
1184 |
-
interpretation_mode: interpretationMode,
|
1185 |
-
interpretation_languages: interpretationLanguages
|
1186 |
});
|
1187 |
|
1188 |
const response = await fetch('/webrtc/offer', {
|
@@ -1194,9 +862,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1194 |
webrtc_id: webrtc_id,
|
1195 |
web_search_enabled: webSearchEnabled,
|
1196 |
target_language: selectedLanguage,
|
1197 |
-
system_prompt: systemPrompt
|
1198 |
-
interpretation_mode: interpretationMode,
|
1199 |
-
interpretation_languages: interpretationLanguages
|
1200 |
})
|
1201 |
});
|
1202 |
const serverResponse = await response.json();
|
@@ -1213,25 +879,10 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1213 |
const eventJson = JSON.parse(event.data);
|
1214 |
let content = eventJson.content;
|
1215 |
|
1216 |
-
// Debug logging for interpretation mode
|
1217 |
-
if (interpretationMode) {
|
1218 |
-
console.log('[INTERPRETATION OUTPUT]', {
|
1219 |
-
content: content,
|
1220 |
-
mode: eventJson.mode,
|
1221 |
-
translations: eventJson.translations
|
1222 |
-
});
|
1223 |
-
}
|
1224 |
-
|
1225 |
if (selectedLanguage && eventJson.language) {
|
1226 |
content += ` <span class="language-info">[${eventJson.language}]</span>`;
|
1227 |
-
} else if (interpretationMode && eventJson.mode === 'interpretation') {
|
1228 |
-
// Handle interpretation mode output
|
1229 |
-
if (eventJson.original && eventJson.translations) {
|
1230 |
-
addInterpretationResult(eventJson.original, eventJson.translations);
|
1231 |
-
}
|
1232 |
-
} else {
|
1233 |
-
addMessage("assistant", content);
|
1234 |
}
|
|
|
1235 |
});
|
1236 |
eventSource.addEventListener("search", (event) => {
|
1237 |
const eventJson = JSON.parse(event.data);
|
@@ -1259,44 +910,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1259 |
chatMessages.scrollTop = chatMessages.scrollHeight;
|
1260 |
}
|
1261 |
|
1262 |
-
function addInterpretationResult(original, translations) {
|
1263 |
-
const resultDiv = document.createElement('div');
|
1264 |
-
resultDiv.className = 'interpretation-result';
|
1265 |
-
|
1266 |
-
// Header
|
1267 |
-
const headerDiv = document.createElement('div');
|
1268 |
-
headerDiv.className = 'interpretation-header';
|
1269 |
-
headerDiv.innerHTML = `
|
1270 |
-
<span>🌐</span>
|
1271 |
-
<span>자동 통역 결과</span>
|
1272 |
-
`;
|
1273 |
-
resultDiv.appendChild(headerDiv);
|
1274 |
-
|
1275 |
-
// Original text
|
1276 |
-
const originalDiv = document.createElement('div');
|
1277 |
-
originalDiv.className = 'interpretation-original';
|
1278 |
-
originalDiv.innerHTML = `<strong>원문:</strong> ${original}`;
|
1279 |
-
resultDiv.appendChild(originalDiv);
|
1280 |
-
|
1281 |
-
// Translations
|
1282 |
-
const translationsDiv = document.createElement('div');
|
1283 |
-
translationsDiv.className = 'interpretation-translations';
|
1284 |
-
|
1285 |
-
translations.forEach(trans => {
|
1286 |
-
const transItem = document.createElement('div');
|
1287 |
-
transItem.className = 'translation-item';
|
1288 |
-
transItem.innerHTML = `
|
1289 |
-
<span class="translation-lang">${trans.language}:</span>
|
1290 |
-
<span class="translation-text">${trans.text}</span>
|
1291 |
-
`;
|
1292 |
-
translationsDiv.appendChild(transItem);
|
1293 |
-
});
|
1294 |
-
|
1295 |
-
resultDiv.appendChild(translationsDiv);
|
1296 |
-
chatMessages.appendChild(resultDiv);
|
1297 |
-
chatMessages.scrollTop = chatMessages.scrollHeight;
|
1298 |
-
}
|
1299 |
-
|
1300 |
function stop() {
|
1301 |
console.log('[STOP] Stopping connection...');
|
1302 |
|
@@ -1387,7 +1000,6 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
1387 |
// Initialize on page load
|
1388 |
window.addEventListener('DOMContentLoaded', () => {
|
1389 |
sendButton.style.display = 'block';
|
1390 |
-
initializeLanguageSelection();
|
1391 |
});
|
1392 |
</script>
|
1393 |
</body>
|
@@ -1566,8 +1178,7 @@ async def process_text_chat(message: str, web_search_enabled: bool, target_langu
|
|
1566 |
|
1567 |
class OpenAIHandler(AsyncStreamHandler):
|
1568 |
def __init__(self, web_search_enabled: bool = False, target_language: str = "",
|
1569 |
-
system_prompt: str = "", webrtc_id: str = None
|
1570 |
-
interpretation_mode: bool = False, interpretation_languages: List[str] = None) -> None:
|
1571 |
super().__init__(
|
1572 |
expected_layout="mono",
|
1573 |
output_sample_rate=SAMPLE_RATE,
|
@@ -1584,18 +1195,9 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
1584 |
self.web_search_enabled = web_search_enabled
|
1585 |
self.target_language = target_language
|
1586 |
self.system_prompt = system_prompt
|
1587 |
-
self.interpretation_mode = interpretation_mode
|
1588 |
-
self.interpretation_languages = interpretation_languages or []
|
1589 |
-
|
1590 |
-
# For interpretation mode
|
1591 |
-
self.audio_buffer = []
|
1592 |
-
self.is_recording = False
|
1593 |
-
self.silence_frames = 0
|
1594 |
-
self.silence_threshold = 20 # Reduced for faster response (20 frames = ~0.4 seconds)
|
1595 |
-
self.min_audio_length = 10 # Minimum frames to consider as speech
|
1596 |
|
1597 |
-
print(f"[INIT] Handler created with
|
1598 |
-
f"
|
1599 |
|
1600 |
def copy(self):
|
1601 |
# Get the most recent settings
|
@@ -1610,20 +1212,16 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
1610 |
|
1611 |
# Log the settings being copied
|
1612 |
print(f"[COPY] Copying settings from {recent_id}:")
|
1613 |
-
print(f"[COPY] interpretation_mode={settings.get('interpretation_mode', False)}")
|
1614 |
-
print(f"[COPY] interpretation_languages={settings.get('interpretation_languages', [])}")
|
1615 |
|
1616 |
return OpenAIHandler(
|
1617 |
web_search_enabled=settings.get('web_search_enabled', False),
|
1618 |
target_language=settings.get('target_language', ''),
|
1619 |
system_prompt=settings.get('system_prompt', ''),
|
1620 |
-
webrtc_id=recent_id
|
1621 |
-
interpretation_mode=settings.get('interpretation_mode', False),
|
1622 |
-
interpretation_languages=settings.get('interpretation_languages', [])
|
1623 |
)
|
1624 |
|
1625 |
print(f"[COPY] No settings found, creating default handler")
|
1626 |
-
return OpenAIHandler(web_search_enabled=False
|
1627 |
|
1628 |
async def search_web(self, query: str) -> str:
|
1629 |
"""Perform web search and return formatted results"""
|
@@ -1658,173 +1256,9 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
1658 |
)
|
1659 |
await self.connection.response.create()
|
1660 |
|
1661 |
-
async def _interpretation_loop(self):
|
1662 |
-
"""Keep the handler alive for interpretation mode"""
|
1663 |
-
while self.interpretation_mode:
|
1664 |
-
await asyncio.sleep(0.1)
|
1665 |
-
|
1666 |
-
async def process_interpretation(self):
|
1667 |
-
"""Process audio buffer for interpretation - text only output"""
|
1668 |
-
if not self.audio_buffer or not self.interpretation_languages:
|
1669 |
-
return
|
1670 |
-
|
1671 |
-
try:
|
1672 |
-
print(f"[INTERPRETATION] Processing audio buffer with {len(self.audio_buffer)} frames")
|
1673 |
-
|
1674 |
-
# Convert audio buffer to WAV format
|
1675 |
-
audio_data = np.concatenate(self.audio_buffer)
|
1676 |
-
|
1677 |
-
# Create WAV file in memory
|
1678 |
-
wav_buffer = io.BytesIO()
|
1679 |
-
with wave.open(wav_buffer, 'wb') as wav_file:
|
1680 |
-
wav_file.setnchannels(1) # Mono
|
1681 |
-
wav_file.setsampwidth(2) # 16-bit
|
1682 |
-
wav_file.setframerate(SAMPLE_RATE)
|
1683 |
-
wav_file.writeframes(audio_data.tobytes())
|
1684 |
-
|
1685 |
-
wav_buffer.seek(0)
|
1686 |
-
wav_buffer.name = "audio.wav"
|
1687 |
-
|
1688 |
-
# 1. Transcribe with Whisper
|
1689 |
-
print("[INTERPRETATION] Transcribing with Whisper...")
|
1690 |
-
transcript = await self.client.audio.transcriptions.create(
|
1691 |
-
model="whisper-1",
|
1692 |
-
file=wav_buffer,
|
1693 |
-
response_format="text" # Get only text, no timestamps
|
1694 |
-
)
|
1695 |
-
|
1696 |
-
user_text = transcript.text.strip()
|
1697 |
-
print(f"[INTERPRETATION] Transcribed: {user_text}")
|
1698 |
-
|
1699 |
-
if not user_text:
|
1700 |
-
return
|
1701 |
-
|
1702 |
-
# 2. Translate to all selected languages
|
1703 |
-
translations = []
|
1704 |
-
|
1705 |
-
for lang_code in self.interpretation_languages:
|
1706 |
-
target_lang_name = SUPPORTED_LANGUAGES.get(lang_code, lang_code)
|
1707 |
-
|
1708 |
-
# Create very explicit translation prompt
|
1709 |
-
translation_examples = {
|
1710 |
-
"en": {
|
1711 |
-
"안녕하세요": "Hello",
|
1712 |
-
"감사합니다": "Thank you",
|
1713 |
-
"오늘 날씨가 좋네요": "The weather is nice today"
|
1714 |
-
},
|
1715 |
-
"ja": {
|
1716 |
-
"안녕하세요": "こんにちは",
|
1717 |
-
"감사합니다": "ありがとうございます",
|
1718 |
-
"오늘 날씨가 좋네요": "今日はいい天気ですね"
|
1719 |
-
},
|
1720 |
-
"zh": {
|
1721 |
-
"안녕하세요": "你好",
|
1722 |
-
"감사합니다": "谢谢",
|
1723 |
-
"오늘 날씨가 좋네요": "今天天气很好"
|
1724 |
-
},
|
1725 |
-
"es": {
|
1726 |
-
"안녕하세요": "Hola",
|
1727 |
-
"감사합니다": "Gracias",
|
1728 |
-
"오늘 날씨가 좋네요": "El clima está agradable hoy"
|
1729 |
-
},
|
1730 |
-
"ru": {
|
1731 |
-
"안녕하세요": "Привет",
|
1732 |
-
"감사합니다": "Спасибо",
|
1733 |
-
"오늘 날씨가 좋네요": "Сегодня хорошая погода"
|
1734 |
-
},
|
1735 |
-
"th": {
|
1736 |
-
"안녕하세요": "สวัสดี",
|
1737 |
-
"감사합니다": "ขอบคุณ",
|
1738 |
-
"오늘 날씨가 좋네요": "วันนี้อากาศดี"
|
1739 |
-
}
|
1740 |
-
}
|
1741 |
-
|
1742 |
-
examples = translation_examples.get(lang_code, translation_examples.get("en", {}))
|
1743 |
-
examples_text = "\n".join([f'"{k}" → "{v}"' for k, v in examples.items()])
|
1744 |
-
|
1745 |
-
system_prompt = f"""You are a direct translator from Korean to {target_lang_name}.
|
1746 |
-
|
1747 |
-
CRITICAL RULES:
|
1748 |
-
1. TRANSLATE ONLY - Do not answer questions
|
1749 |
-
2. Do not generate responses
|
1750 |
-
3. Do not add any commentary
|
1751 |
-
4. Output ONLY the direct translation
|
1752 |
-
5. Preserve the original meaning exactly
|
1753 |
-
|
1754 |
-
If the user says "오늘 날씨 어때?" translate it as "How's the weather today?" NOT answer about the weather.
|
1755 |
-
If the user says "안녕하세요" translate it as "Hello" NOT respond with greetings.
|
1756 |
-
|
1757 |
-
Examples:
|
1758 |
-
{examples_text}
|
1759 |
-
|
1760 |
-
Direct translation to {target_lang_name}:"""
|
1761 |
-
|
1762 |
-
print(f"[INTERPRETATION] Translating to {target_lang_name}...")
|
1763 |
-
|
1764 |
-
translation_response = await self.client.chat.completions.create(
|
1765 |
-
model="gpt-4o-mini",
|
1766 |
-
messages=[
|
1767 |
-
{
|
1768 |
-
"role": "system",
|
1769 |
-
"content": system_prompt
|
1770 |
-
},
|
1771 |
-
{
|
1772 |
-
"role": "user",
|
1773 |
-
"content": user_text # Just the text, no additional prompt
|
1774 |
-
}
|
1775 |
-
],
|
1776 |
-
temperature=0.1,
|
1777 |
-
max_tokens=200
|
1778 |
-
)
|
1779 |
-
|
1780 |
-
translated_text = translation_response.choices[0].message.content.strip()
|
1781 |
-
|
1782 |
-
# Remove any Korean characters if they accidentally appear
|
1783 |
-
import re
|
1784 |
-
if re.search(r'[가-힣]', translated_text):
|
1785 |
-
print(f"[INTERPRETATION] WARNING: Korean characters detected in {target_lang_name} translation")
|
1786 |
-
translated_text = re.sub(r'[가-힣\s]+', ' ', translated_text).strip()
|
1787 |
-
|
1788 |
-
translations.append({
|
1789 |
-
"language": target_lang_name,
|
1790 |
-
"text": translated_text
|
1791 |
-
})
|
1792 |
-
|
1793 |
-
print(f"[INTERPRETATION] {target_lang_name}: {translated_text}")
|
1794 |
-
|
1795 |
-
# Send interpretation result (text only)
|
1796 |
-
output_data = {
|
1797 |
-
"event": type('Event', (), {
|
1798 |
-
'transcript': f"통역 완료" # Simple notification
|
1799 |
-
})(),
|
1800 |
-
"mode": "interpretation",
|
1801 |
-
"original": user_text,
|
1802 |
-
"translations": translations
|
1803 |
-
}
|
1804 |
-
await self.output_queue.put(AdditionalOutputs(output_data))
|
1805 |
-
|
1806 |
-
except Exception as e:
|
1807 |
-
print(f"[INTERPRETATION] Error: {e}")
|
1808 |
-
import traceback
|
1809 |
-
traceback.print_exc()
|
1810 |
-
|
1811 |
-
# Send error message to client
|
1812 |
-
error_data = {
|
1813 |
-
"event": type('Event', (), {
|
1814 |
-
'transcript': f"통역 오류: {str(e)}"
|
1815 |
-
})(),
|
1816 |
-
"mode": "error"
|
1817 |
-
}
|
1818 |
-
await self.output_queue.put(AdditionalOutputs(error_data))
|
1819 |
-
finally:
|
1820 |
-
# Clear the audio buffer
|
1821 |
-
self.audio_buffer = []
|
1822 |
-
self.is_recording = False
|
1823 |
-
self.silence_frames = 0
|
1824 |
-
|
1825 |
def get_translation_instructions(self):
|
1826 |
"""Get instructions for translation based on target language"""
|
1827 |
-
if not self.target_language
|
1828 |
return ""
|
1829 |
|
1830 |
language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
|
@@ -1835,7 +1269,7 @@ Direct translation to {target_lang_name}:"""
|
|
1835 |
)
|
1836 |
|
1837 |
async def start_up(self):
|
1838 |
-
"""Connect to realtime API
|
1839 |
# First check if we have the most recent settings
|
1840 |
if connection_settings and self.webrtc_id:
|
1841 |
if self.webrtc_id in connection_settings:
|
@@ -1843,27 +1277,13 @@ Direct translation to {target_lang_name}:"""
|
|
1843 |
self.web_search_enabled = settings.get('web_search_enabled', False)
|
1844 |
self.target_language = settings.get('target_language', '')
|
1845 |
self.system_prompt = settings.get('system_prompt', '')
|
1846 |
-
self.interpretation_mode = settings.get('interpretation_mode', False)
|
1847 |
-
self.interpretation_languages = settings.get('interpretation_languages', [])
|
1848 |
|
1849 |
print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
|
1850 |
-
print(f"[START_UP] interpretation_mode={self.interpretation_mode}")
|
1851 |
-
print(f"[START_UP] interpretation_languages={self.interpretation_languages}")
|
1852 |
|
1853 |
-
print(f"[START_UP] Starting
|
1854 |
|
1855 |
self.client = openai.AsyncOpenAI()
|
1856 |
|
1857 |
-
# If in interpretation mode, don't connect to Realtime API
|
1858 |
-
if self.interpretation_mode:
|
1859 |
-
print(f"[INTERPRETATION MODE] Active - Skipping Realtime API connection")
|
1860 |
-
print(f"[INTERPRETATION MODE] Using Whisper + GPT-4o-mini (text only)")
|
1861 |
-
print(f"[INTERPRETATION MODE] Target languages: {self.interpretation_languages}")
|
1862 |
-
|
1863 |
-
# Do NOT connect to Realtime API
|
1864 |
-
# Just keep the handler ready to process audio
|
1865 |
-
return
|
1866 |
-
|
1867 |
# Normal mode - connect to Realtime API
|
1868 |
print(f"[NORMAL MODE] Connecting to Realtime API...")
|
1869 |
|
@@ -2080,18 +1500,18 @@ RULES:
|
|
2080 |
),
|
2081 |
)
|
2082 |
|
2083 |
-
# Handle function calls
|
2084 |
-
elif event.type == "response.function_call_arguments.start"
|
2085 |
print(f"Function call started")
|
2086 |
self.function_call_in_progress = True
|
2087 |
self.current_function_args = ""
|
2088 |
self.current_call_id = getattr(event, 'call_id', None)
|
2089 |
|
2090 |
-
elif event.type == "response.function_call_arguments.delta"
|
2091 |
if self.function_call_in_progress:
|
2092 |
self.current_function_args += event.delta
|
2093 |
|
2094 |
-
elif event.type == "response.function_call_arguments.done"
|
2095 |
if self.function_call_in_progress:
|
2096 |
print(f"Function call done, args: {self.current_function_args}")
|
2097 |
try:
|
@@ -2127,82 +1547,41 @@ RULES:
|
|
2127 |
self.current_call_id = None
|
2128 |
|
2129 |
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
2130 |
-
|
2131 |
-
|
2132 |
-
|
|
|
|
|
2133 |
_, array = frame
|
2134 |
array = array.squeeze()
|
2135 |
-
|
2136 |
-
|
2137 |
-
|
2138 |
-
|
2139 |
-
if audio_level > 200: # Lower threshold for better detection
|
2140 |
-
if not self.is_recording:
|
2141 |
-
print(f"[INTERPRETATION] Started recording, level: {audio_level:.1f}")
|
2142 |
-
self.is_recording = True
|
2143 |
-
self.silence_frames = 0
|
2144 |
-
self.audio_buffer.append(array)
|
2145 |
-
elif self.is_recording:
|
2146 |
-
self.silence_frames += 1
|
2147 |
-
self.audio_buffer.append(array)
|
2148 |
-
|
2149 |
-
# If we've had enough silence, process the audio
|
2150 |
-
if self.silence_frames > self.silence_threshold and len(self.audio_buffer) > self.min_audio_length:
|
2151 |
-
print(f"[INTERPRETATION] Silence detected after {len(self.audio_buffer)} frames")
|
2152 |
-
# Process in the background to avoid blocking
|
2153 |
-
asyncio.create_task(self.process_interpretation())
|
2154 |
-
else:
|
2155 |
-
# Normal mode - use Realtime API
|
2156 |
-
if not self.connection:
|
2157 |
-
print(f"[RECEIVE] No connection in normal mode, skipping")
|
2158 |
-
return
|
2159 |
-
try:
|
2160 |
-
_, array = frame
|
2161 |
-
array = array.squeeze()
|
2162 |
-
audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
|
2163 |
-
await self.connection.input_audio_buffer.append(audio=audio_message)
|
2164 |
-
except Exception as e:
|
2165 |
-
print(f"Error in receive: {e}")
|
2166 |
|
2167 |
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
|
2168 |
-
#
|
2169 |
-
|
2170 |
-
|
2171 |
-
|
2172 |
-
|
2173 |
-
|
2174 |
-
|
2175 |
-
|
2176 |
-
|
2177 |
-
else:
|
2178 |
-
# Normal mode
|
2179 |
-
item = await wait_for_item(self.output_queue)
|
2180 |
-
|
2181 |
-
# Check if it's a dict with text message
|
2182 |
-
if isinstance(item, dict) and item.get('type') == 'text_message':
|
2183 |
-
await self.process_text_message(item['content'])
|
2184 |
-
return None
|
2185 |
-
|
2186 |
-
return item
|
2187 |
|
2188 |
async def shutdown(self) -> None:
|
2189 |
-
print(f"[SHUTDOWN] Called
|
2190 |
|
2191 |
-
|
2192 |
-
|
2193 |
-
self.
|
2194 |
-
self.
|
2195 |
-
print("[
|
2196 |
-
else:
|
2197 |
-
# Normal mode - close Realtime API connection
|
2198 |
-
if self.connection:
|
2199 |
-
await self.connection.close()
|
2200 |
-
self.connection = None
|
2201 |
-
print("[NORMAL MODE] Connection closed")
|
2202 |
|
2203 |
|
2204 |
# Create initial handler instance
|
2205 |
-
handler = OpenAIHandler(web_search_enabled=False
|
2206 |
|
2207 |
# Create components
|
2208 |
chatbot = gr.Chatbot(type="messages")
|
@@ -2235,12 +1614,8 @@ async def custom_offer(request: Request):
|
|
2235 |
web_search_enabled = body.get("web_search_enabled", False)
|
2236 |
target_language = body.get("target_language", "")
|
2237 |
system_prompt = body.get("system_prompt", "")
|
2238 |
-
interpretation_mode = body.get("interpretation_mode", False)
|
2239 |
-
interpretation_languages = body.get("interpretation_languages", [])
|
2240 |
|
2241 |
print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
|
2242 |
-
print(f"[OFFER] interpretation_mode: {interpretation_mode}")
|
2243 |
-
print(f"[OFFER] interpretation_languages: {interpretation_languages}")
|
2244 |
print(f"[OFFER] web_search_enabled: {web_search_enabled}")
|
2245 |
print(f"[OFFER] target_language: {target_language}")
|
2246 |
|
@@ -2250,8 +1625,6 @@ async def custom_offer(request: Request):
|
|
2250 |
'web_search_enabled': web_search_enabled,
|
2251 |
'target_language': target_language,
|
2252 |
'system_prompt': system_prompt,
|
2253 |
-
'interpretation_mode': interpretation_mode,
|
2254 |
-
'interpretation_languages': interpretation_languages,
|
2255 |
'timestamp': asyncio.get_event_loop().time()
|
2256 |
}
|
2257 |
|
@@ -2335,10 +1708,7 @@ async def outputs(webrtc_id: str):
|
|
2335 |
data = {
|
2336 |
"role": "assistant",
|
2337 |
"content": event_data['event'].transcript,
|
2338 |
-
"language": event_data.get('language', '')
|
2339 |
-
"mode": event_data.get('mode', 'normal'),
|
2340 |
-
"original": event_data.get('original', ''),
|
2341 |
-
"translations": event_data.get('translations', [])
|
2342 |
}
|
2343 |
yield f"event: output\ndata: {json.dumps(data)}\n\n"
|
2344 |
|
|
|
162 |
gap: 15px;
|
163 |
margin-bottom: 15px;
|
164 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
.setting-item {
|
166 |
display: flex;
|
167 |
align-items: center;
|
|
|
238 |
resize: vertical;
|
239 |
min-height: 80px;
|
240 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
.chat-container {
|
242 |
border-radius: 12px;
|
243 |
background-color: var(--card-bg);
|
|
|
304 |
padding: 10px;
|
305 |
margin-bottom: 10px;
|
306 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
307 |
.language-info {
|
308 |
font-size: 12px;
|
309 |
color: #888;
|
|
|
528 |
</div>
|
529 |
</div>
|
530 |
<div class="setting-item">
|
531 |
+
<span class="setting-label">다국어 번역 채팅</span>
|
532 |
<select id="language-select">
|
533 |
<option value="">비활성화</option>
|
534 |
<option value="ko">한국어 (Korean)</option>
|
|
|
563 |
</select>
|
564 |
</div>
|
565 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
566 |
<div class="text-input-section">
|
567 |
<label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
|
568 |
<textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
|
|
|
595 |
let webrtc_id;
|
596 |
let webSearchEnabled = false;
|
597 |
let selectedLanguage = "";
|
|
|
|
|
598 |
let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
|
599 |
const audioOutput = document.getElementById('audio-output');
|
600 |
const startButton = document.getElementById('start-button');
|
|
|
604 |
const statusText = document.getElementById('status-text');
|
605 |
const searchToggle = document.getElementById('search-toggle');
|
606 |
const languageSelect = document.getElementById('language-select');
|
|
|
|
|
|
|
607 |
const systemPromptInput = document.getElementById('system-prompt');
|
608 |
const textInput = document.getElementById('text-input');
|
|
|
|
|
609 |
let audioLevel = 0;
|
610 |
let animationFrame;
|
611 |
let audioContext, analyser, audioSource;
|
612 |
let dataChannel = null;
|
613 |
let isVoiceActive = false;
|
614 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
// Web search toggle functionality
|
616 |
searchToggle.addEventListener('click', () => {
|
617 |
webSearchEnabled = !webSearchEnabled;
|
|
|
625 |
console.log('Selected language:', selectedLanguage);
|
626 |
});
|
627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
628 |
// System prompt update
|
629 |
systemPromptInput.addEventListener('input', () => {
|
630 |
systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
|
|
|
644 |
const message = textInput.value.trim();
|
645 |
if (!message) return;
|
646 |
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
// Add user message to chat
|
648 |
addMessage('user', message);
|
649 |
textInput.value = '';
|
|
|
697 |
statusDot.className = 'status-dot ' + state;
|
698 |
if (state === 'connected') {
|
699 |
statusText.textContent = '연결됨';
|
700 |
+
sendButton.style.display = 'block';
|
|
|
|
|
701 |
isVoiceActive = true;
|
702 |
} else if (state === 'connecting') {
|
703 |
statusText.textContent = '연결 중...';
|
704 |
sendButton.style.display = 'none';
|
705 |
} else {
|
706 |
statusText.textContent = '연결 대기 중';
|
707 |
+
sendButton.style.display = 'block'; // Show send button even when disconnected for text chat
|
|
|
|
|
708 |
isVoiceActive = false;
|
709 |
}
|
710 |
}
|
|
|
850 |
webrtc_id: webrtc_id,
|
851 |
web_search_enabled: webSearchEnabled,
|
852 |
target_language: selectedLanguage,
|
853 |
+
system_prompt: systemPrompt
|
|
|
|
|
854 |
});
|
855 |
|
856 |
const response = await fetch('/webrtc/offer', {
|
|
|
862 |
webrtc_id: webrtc_id,
|
863 |
web_search_enabled: webSearchEnabled,
|
864 |
target_language: selectedLanguage,
|
865 |
+
system_prompt: systemPrompt
|
|
|
|
|
866 |
})
|
867 |
});
|
868 |
const serverResponse = await response.json();
|
|
|
879 |
const eventJson = JSON.parse(event.data);
|
880 |
let content = eventJson.content;
|
881 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
882 |
if (selectedLanguage && eventJson.language) {
|
883 |
content += ` <span class="language-info">[${eventJson.language}]</span>`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
884 |
}
|
885 |
+
addMessage("assistant", content);
|
886 |
});
|
887 |
eventSource.addEventListener("search", (event) => {
|
888 |
const eventJson = JSON.parse(event.data);
|
|
|
910 |
chatMessages.scrollTop = chatMessages.scrollHeight;
|
911 |
}
|
912 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
913 |
function stop() {
|
914 |
console.log('[STOP] Stopping connection...');
|
915 |
|
|
|
1000 |
// Initialize on page load
|
1001 |
window.addEventListener('DOMContentLoaded', () => {
|
1002 |
sendButton.style.display = 'block';
|
|
|
1003 |
});
|
1004 |
</script>
|
1005 |
</body>
|
|
|
1178 |
|
1179 |
class OpenAIHandler(AsyncStreamHandler):
|
1180 |
def __init__(self, web_search_enabled: bool = False, target_language: str = "",
|
1181 |
+
system_prompt: str = "", webrtc_id: str = None) -> None:
|
|
|
1182 |
super().__init__(
|
1183 |
expected_layout="mono",
|
1184 |
output_sample_rate=SAMPLE_RATE,
|
|
|
1195 |
self.web_search_enabled = web_search_enabled
|
1196 |
self.target_language = target_language
|
1197 |
self.system_prompt = system_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1198 |
|
1199 |
+
print(f"[INIT] Handler created with web_search={web_search_enabled}, "
|
1200 |
+
f"target_language={target_language}")
|
1201 |
|
1202 |
def copy(self):
|
1203 |
# Get the most recent settings
|
|
|
1212 |
|
1213 |
# Log the settings being copied
|
1214 |
print(f"[COPY] Copying settings from {recent_id}:")
|
|
|
|
|
1215 |
|
1216 |
return OpenAIHandler(
|
1217 |
web_search_enabled=settings.get('web_search_enabled', False),
|
1218 |
target_language=settings.get('target_language', ''),
|
1219 |
system_prompt=settings.get('system_prompt', ''),
|
1220 |
+
webrtc_id=recent_id
|
|
|
|
|
1221 |
)
|
1222 |
|
1223 |
print(f"[COPY] No settings found, creating default handler")
|
1224 |
+
return OpenAIHandler(web_search_enabled=False)
|
1225 |
|
1226 |
async def search_web(self, query: str) -> str:
|
1227 |
"""Perform web search and return formatted results"""
|
|
|
1256 |
)
|
1257 |
await self.connection.response.create()
|
1258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1259 |
def get_translation_instructions(self):
|
1260 |
"""Get instructions for translation based on target language"""
|
1261 |
+
if not self.target_language:
|
1262 |
return ""
|
1263 |
|
1264 |
language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
|
|
|
1269 |
)
|
1270 |
|
1271 |
async def start_up(self):
|
1272 |
+
"""Connect to realtime API"""
|
1273 |
# First check if we have the most recent settings
|
1274 |
if connection_settings and self.webrtc_id:
|
1275 |
if self.webrtc_id in connection_settings:
|
|
|
1277 |
self.web_search_enabled = settings.get('web_search_enabled', False)
|
1278 |
self.target_language = settings.get('target_language', '')
|
1279 |
self.system_prompt = settings.get('system_prompt', '')
|
|
|
|
|
1280 |
|
1281 |
print(f"[START_UP] Updated settings from storage for {self.webrtc_id}")
|
|
|
|
|
1282 |
|
1283 |
+
print(f"[START_UP] Starting normal mode")
|
1284 |
|
1285 |
self.client = openai.AsyncOpenAI()
|
1286 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1287 |
# Normal mode - connect to Realtime API
|
1288 |
print(f"[NORMAL MODE] Connecting to Realtime API...")
|
1289 |
|
|
|
1500 |
),
|
1501 |
)
|
1502 |
|
1503 |
+
# Handle function calls
|
1504 |
+
elif event.type == "response.function_call_arguments.start":
|
1505 |
print(f"Function call started")
|
1506 |
self.function_call_in_progress = True
|
1507 |
self.current_function_args = ""
|
1508 |
self.current_call_id = getattr(event, 'call_id', None)
|
1509 |
|
1510 |
+
elif event.type == "response.function_call_arguments.delta":
|
1511 |
if self.function_call_in_progress:
|
1512 |
self.current_function_args += event.delta
|
1513 |
|
1514 |
+
elif event.type == "response.function_call_arguments.done":
|
1515 |
if self.function_call_in_progress:
|
1516 |
print(f"Function call done, args: {self.current_function_args}")
|
1517 |
try:
|
|
|
1547 |
self.current_call_id = None
|
1548 |
|
1549 |
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
1550 |
+
# Normal mode - use Realtime API
|
1551 |
+
if not self.connection:
|
1552 |
+
print(f"[RECEIVE] No connection in normal mode, skipping")
|
1553 |
+
return
|
1554 |
+
try:
|
1555 |
_, array = frame
|
1556 |
array = array.squeeze()
|
1557 |
+
audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
|
1558 |
+
await self.connection.input_audio_buffer.append(audio=audio_message)
|
1559 |
+
except Exception as e:
|
1560 |
+
print(f"Error in receive: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1561 |
|
1562 |
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
|
1563 |
+
# Normal mode
|
1564 |
+
item = await wait_for_item(self.output_queue)
|
1565 |
+
|
1566 |
+
# Check if it's a dict with text message
|
1567 |
+
if isinstance(item, dict) and item.get('type') == 'text_message':
|
1568 |
+
await self.process_text_message(item['content'])
|
1569 |
+
return None
|
1570 |
+
|
1571 |
+
return item
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1572 |
|
1573 |
async def shutdown(self) -> None:
|
1574 |
+
print(f"[SHUTDOWN] Called")
|
1575 |
|
1576 |
+
# Normal mode - close Realtime API connection
|
1577 |
+
if self.connection:
|
1578 |
+
await self.connection.close()
|
1579 |
+
self.connection = None
|
1580 |
+
print("[NORMAL MODE] Connection closed")
|
|
|
|
|
|
|
|
|
|
|
|
|
1581 |
|
1582 |
|
1583 |
# Create initial handler instance
|
1584 |
+
handler = OpenAIHandler(web_search_enabled=False)
|
1585 |
|
1586 |
# Create components
|
1587 |
chatbot = gr.Chatbot(type="messages")
|
|
|
1614 |
web_search_enabled = body.get("web_search_enabled", False)
|
1615 |
target_language = body.get("target_language", "")
|
1616 |
system_prompt = body.get("system_prompt", "")
|
|
|
|
|
1617 |
|
1618 |
print(f"[OFFER] Received offer with webrtc_id: {webrtc_id}")
|
|
|
|
|
1619 |
print(f"[OFFER] web_search_enabled: {web_search_enabled}")
|
1620 |
print(f"[OFFER] target_language: {target_language}")
|
1621 |
|
|
|
1625 |
'web_search_enabled': web_search_enabled,
|
1626 |
'target_language': target_language,
|
1627 |
'system_prompt': system_prompt,
|
|
|
|
|
1628 |
'timestamp': asyncio.get_event_loop().time()
|
1629 |
}
|
1630 |
|
|
|
1708 |
data = {
|
1709 |
"role": "assistant",
|
1710 |
"content": event_data['event'].transcript,
|
1711 |
+
"language": event_data.get('language', '')
|
|
|
|
|
|
|
1712 |
}
|
1713 |
yield f"event: output\ndata: {json.dumps(data)}\n\n"
|
1714 |
|