Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -25,6 +25,39 @@ load_dotenv()
|
|
25 |
|
26 |
SAMPLE_RATE = 24000
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# HTML content embedded as a string
|
29 |
HTML_CONTENT = """<!DOCTYPE html>
|
30 |
<html lang="ko">
|
@@ -87,14 +120,31 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
87 |
font-size: 32px;
|
88 |
letter-spacing: 1px;
|
89 |
}
|
90 |
-
/*
|
91 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
display: flex;
|
93 |
align-items: center;
|
94 |
-
justify-content: center;
|
95 |
gap: 10px;
|
96 |
-
margin-top: 15px;
|
97 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
.toggle-switch {
|
99 |
position: relative;
|
100 |
width: 50px;
|
@@ -120,9 +170,43 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
120 |
.toggle-switch.active .toggle-slider {
|
121 |
transform: translateX(24px);
|
122 |
}
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
font-size: 14px;
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
}
|
127 |
.chat-container {
|
128 |
border-radius: 12px;
|
@@ -192,6 +276,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
192 |
margin-top: 20px;
|
193 |
display: flex;
|
194 |
justify-content: center;
|
|
|
195 |
flex-shrink: 0;
|
196 |
}
|
197 |
button {
|
@@ -220,6 +305,14 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
220 |
button:active {
|
221 |
transform: translateY(1px);
|
222 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
#audio-output {
|
224 |
display: none;
|
225 |
}
|
@@ -344,6 +437,11 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
344 |
background-color: var(--secondary-color);
|
345 |
border-radius: 50%;
|
346 |
}
|
|
|
|
|
|
|
|
|
|
|
347 |
</style>
|
348 |
</head>
|
349 |
|
@@ -359,22 +457,71 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
359 |
</div>
|
360 |
<h1>MOUSE 음성 챗</h1>
|
361 |
</div>
|
362 |
-
<div class="search-toggle">
|
363 |
-
<span class="search-label">웹 검색</span>
|
364 |
-
<div id="search-toggle" class="toggle-switch">
|
365 |
-
<div class="toggle-slider"></div>
|
366 |
-
</div>
|
367 |
-
</div>
|
368 |
<div class="status-indicator">
|
369 |
<div id="status-dot" class="status-dot disconnected"></div>
|
370 |
<span id="status-text">연결 대기 중</span>
|
371 |
</div>
|
372 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
<div class="chat-container">
|
374 |
<div class="chat-messages" id="chat-messages"></div>
|
|
|
|
|
|
|
375 |
</div>
|
376 |
<div class="controls">
|
377 |
<button id="start-button">대화 시작</button>
|
|
|
378 |
</div>
|
379 |
</div>
|
380 |
<audio id="audio-output"></audio>
|
@@ -383,15 +530,22 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
383 |
let peerConnection;
|
384 |
let webrtc_id;
|
385 |
let webSearchEnabled = false;
|
|
|
|
|
386 |
const audioOutput = document.getElementById('audio-output');
|
387 |
const startButton = document.getElementById('start-button');
|
|
|
388 |
const chatMessages = document.getElementById('chat-messages');
|
389 |
const statusDot = document.getElementById('status-dot');
|
390 |
const statusText = document.getElementById('status-text');
|
391 |
const searchToggle = document.getElementById('search-toggle');
|
|
|
|
|
|
|
392 |
let audioLevel = 0;
|
393 |
let animationFrame;
|
394 |
let audioContext, analyser, audioSource;
|
|
|
395 |
|
396 |
// Web search toggle functionality
|
397 |
searchToggle.addEventListener('click', () => {
|
@@ -400,14 +554,50 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
400 |
console.log('Web search enabled:', webSearchEnabled);
|
401 |
});
|
402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
403 |
function updateStatus(state) {
|
404 |
statusDot.className = 'status-dot ' + state;
|
405 |
if (state === 'connected') {
|
406 |
statusText.textContent = '연결됨';
|
|
|
407 |
} else if (state === 'connecting') {
|
408 |
statusText.textContent = '연결 중...';
|
|
|
409 |
} else {
|
410 |
statusText.textContent = '연결 대기 중';
|
|
|
411 |
}
|
412 |
}
|
413 |
function updateButtonState() {
|
@@ -508,13 +698,19 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
508 |
audioOutput.play();
|
509 |
}
|
510 |
});
|
511 |
-
|
|
|
|
|
|
|
|
|
|
|
512 |
dataChannel.onmessage = (event) => {
|
513 |
const eventJson = JSON.parse(event.data);
|
514 |
if (eventJson.type === "error") {
|
515 |
showError(eventJson.message);
|
516 |
}
|
517 |
};
|
|
|
518 |
const offer = await peerConnection.createOffer();
|
519 |
await peerConnection.setLocalDescription(offer);
|
520 |
await new Promise((resolve) => {
|
@@ -547,7 +743,9 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
547 |
sdp: peerConnection.localDescription.sdp,
|
548 |
type: peerConnection.localDescription.type,
|
549 |
webrtc_id: webrtc_id,
|
550 |
-
web_search_enabled: webSearchEnabled
|
|
|
|
|
551 |
})
|
552 |
});
|
553 |
const serverResponse = await response.json();
|
@@ -562,7 +760,11 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
562 |
const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
|
563 |
eventSource.addEventListener("output", (event) => {
|
564 |
const eventJson = JSON.parse(event.data);
|
565 |
-
|
|
|
|
|
|
|
|
|
566 |
});
|
567 |
eventSource.addEventListener("search", (event) => {
|
568 |
const eventJson = JSON.parse(event.data);
|
@@ -580,7 +782,11 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
580 |
function addMessage(role, content) {
|
581 |
const messageDiv = document.createElement('div');
|
582 |
messageDiv.classList.add('message', role);
|
583 |
-
|
|
|
|
|
|
|
|
|
584 |
chatMessages.appendChild(messageDiv);
|
585 |
chatMessages.scrollTop = chatMessages.scrollHeight;
|
586 |
}
|
@@ -610,6 +816,7 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
610 |
console.log('closing');
|
611 |
peerConnection.close();
|
612 |
}
|
|
|
613 |
updateButtonState();
|
614 |
audioLevel = 0;
|
615 |
}
|
@@ -675,8 +882,8 @@ brave_api_key = os.getenv("BSEARCH_API")
|
|
675 |
search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
|
676 |
print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
|
677 |
|
678 |
-
# Store
|
679 |
-
|
680 |
|
681 |
def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
|
682 |
chatbot.append({"role": "assistant", "content": response.transcript})
|
@@ -684,7 +891,8 @@ def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEve
|
|
684 |
|
685 |
|
686 |
class OpenAIHandler(AsyncStreamHandler):
|
687 |
-
def __init__(self, web_search_enabled: bool = False,
|
|
|
688 |
super().__init__(
|
689 |
expected_layout="mono",
|
690 |
output_sample_rate=SAMPLE_RATE,
|
@@ -699,21 +907,27 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
699 |
self.current_call_id = None
|
700 |
self.webrtc_id = webrtc_id
|
701 |
self.web_search_enabled = web_search_enabled
|
702 |
-
|
|
|
|
|
|
|
703 |
|
704 |
def copy(self):
|
705 |
# Get the most recent settings
|
706 |
-
if
|
707 |
# Get the most recent webrtc_id
|
708 |
-
recent_ids = sorted(
|
709 |
-
key=lambda k:
|
710 |
reverse=True)
|
711 |
if recent_ids:
|
712 |
recent_id = recent_ids[0]
|
713 |
-
settings =
|
714 |
-
|
715 |
-
|
716 |
-
|
|
|
|
|
|
|
717 |
|
718 |
print(f"Handler.copy() called - creating new handler with default settings")
|
719 |
return OpenAIHandler(web_search_enabled=False)
|
@@ -739,26 +953,57 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
739 |
|
740 |
return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)
|
741 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
742 |
async def start_up(self):
|
743 |
"""Connect to realtime API with function calling enabled"""
|
744 |
# First check if we have the most recent settings
|
745 |
-
if
|
746 |
-
recent_ids = sorted(
|
747 |
-
key=lambda k:
|
748 |
reverse=True)
|
749 |
if recent_ids:
|
750 |
recent_id = recent_ids[0]
|
751 |
-
settings =
|
752 |
-
self.web_search_enabled = settings.get('
|
|
|
|
|
753 |
self.webrtc_id = recent_id
|
754 |
-
print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id},
|
|
|
755 |
|
756 |
-
print(f"Starting up handler with web_search_enabled={self.web_search_enabled}"
|
|
|
757 |
self.client = openai.AsyncOpenAI()
|
758 |
|
759 |
# Define the web search function
|
760 |
tools = []
|
761 |
-
|
|
|
|
|
|
|
762 |
|
763 |
if self.web_search_enabled and self.search_client:
|
764 |
tools = [{
|
@@ -771,7 +1016,7 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
771 |
"properties": {
|
772 |
"query": {
|
773 |
"type": "string",
|
774 |
-
"description": "The search query
|
775 |
}
|
776 |
},
|
777 |
"required": ["query"]
|
@@ -780,8 +1025,8 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
780 |
}]
|
781 |
print("Web search function added to tools")
|
782 |
|
783 |
-
|
784 |
-
"
|
785 |
"IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
|
786 |
"- Weather (날씨, 기온, 비, 눈)\n"
|
787 |
"- News (뉴스, 소식)\n"
|
@@ -791,8 +1036,11 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
791 |
"- Any question about 2024 or 2025\n"
|
792 |
"- Any time-sensitive information\n\n"
|
793 |
"When in doubt, USE web_search. It's better to search and provide accurate information "
|
794 |
-
"than to guess or use outdated information.
|
795 |
)
|
|
|
|
|
|
|
796 |
|
797 |
async with self.client.beta.realtime.connect(
|
798 |
model="gpt-4o-mini-realtime-preview-2024-12-17"
|
@@ -805,9 +1053,23 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
805 |
"tool_choice": "auto" if tools else "none"
|
806 |
}
|
807 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
808 |
await conn.session.update(session=session_update)
|
809 |
self.connection = conn
|
810 |
-
print(f"Connected with tools: {len(tools)} functions")
|
811 |
|
812 |
async for event in self.connection:
|
813 |
# Debug logging for function calls
|
@@ -815,7 +1077,11 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
815 |
print(f"Function event: {event.type}")
|
816 |
|
817 |
if event.type == "response.audio_transcript.done":
|
818 |
-
|
|
|
|
|
|
|
|
|
819 |
|
820 |
elif event.type == "response.audio.delta":
|
821 |
await self.output_queue.put(
|
@@ -886,7 +1152,14 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
886 |
# Connection might be closed, ignore the error
|
887 |
|
888 |
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
|
889 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
890 |
|
891 |
async def shutdown(self) -> None:
|
892 |
if self.connection:
|
@@ -921,18 +1194,23 @@ stream.mount(app)
|
|
921 |
# Intercept offer to capture settings
|
922 |
@app.post("/webrtc/offer", include_in_schema=False)
|
923 |
async def custom_offer(request: Request):
|
924 |
-
"""Intercept offer to capture
|
925 |
body = await request.json()
|
926 |
|
927 |
webrtc_id = body.get("webrtc_id")
|
928 |
web_search_enabled = body.get("web_search_enabled", False)
|
|
|
|
|
929 |
|
930 |
-
print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}"
|
|
|
931 |
|
932 |
# Store settings with timestamp
|
933 |
if webrtc_id:
|
934 |
-
|
935 |
-
'
|
|
|
|
|
936 |
'timestamp': asyncio.get_event_loop().time()
|
937 |
}
|
938 |
|
@@ -953,6 +1231,24 @@ async def custom_offer(request: Request):
|
|
953 |
return response
|
954 |
|
955 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
956 |
@app.get("/outputs")
|
957 |
async def outputs(webrtc_id: str):
|
958 |
"""Stream outputs including search events"""
|
@@ -962,10 +1258,16 @@ async def outputs(webrtc_id: str):
|
|
962 |
# Check if it's a search event
|
963 |
if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
|
964 |
yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
|
965 |
-
# Regular transcript event
|
966 |
-
elif
|
967 |
-
|
968 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
969 |
|
970 |
return StreamingResponse(output_stream(), media_type="text/event-stream")
|
971 |
|
|
|
25 |
|
26 |
SAMPLE_RATE = 24000
|
27 |
|
28 |
+
# Supported languages for OpenAI Realtime API
|
29 |
+
SUPPORTED_LANGUAGES = {
|
30 |
+
"ko": "한국어 (Korean)",
|
31 |
+
"en": "English",
|
32 |
+
"es": "Español (Spanish)",
|
33 |
+
"fr": "Français (French)",
|
34 |
+
"de": "Deutsch (German)",
|
35 |
+
"it": "Italiano (Italian)",
|
36 |
+
"pt": "Português (Portuguese)",
|
37 |
+
"ru": "Русский (Russian)",
|
38 |
+
"ja": "日本語 (Japanese)",
|
39 |
+
"zh": "中文 (Chinese)",
|
40 |
+
"ar": "العربية (Arabic)",
|
41 |
+
"hi": "हिन्दी (Hindi)",
|
42 |
+
"nl": "Nederlands (Dutch)",
|
43 |
+
"pl": "Polski (Polish)",
|
44 |
+
"tr": "Türkçe (Turkish)",
|
45 |
+
"vi": "Tiếng Việt (Vietnamese)",
|
46 |
+
"th": "ไทย (Thai)",
|
47 |
+
"id": "Bahasa Indonesia",
|
48 |
+
"sv": "Svenska (Swedish)",
|
49 |
+
"da": "Dansk (Danish)",
|
50 |
+
"no": "Norsk (Norwegian)",
|
51 |
+
"fi": "Suomi (Finnish)",
|
52 |
+
"he": "עברית (Hebrew)",
|
53 |
+
"uk": "Українська (Ukrainian)",
|
54 |
+
"cs": "Čeština (Czech)",
|
55 |
+
"el": "Ελληνικά (Greek)",
|
56 |
+
"ro": "Română (Romanian)",
|
57 |
+
"hu": "Magyar (Hungarian)",
|
58 |
+
"ms": "Bahasa Melayu (Malay)"
|
59 |
+
}
|
60 |
+
|
61 |
# HTML content embedded as a string
|
62 |
HTML_CONTENT = """<!DOCTYPE html>
|
63 |
<html lang="ko">
|
|
|
120 |
font-size: 32px;
|
121 |
letter-spacing: 1px;
|
122 |
}
|
123 |
+
/* Settings section */
|
124 |
+
.settings-section {
|
125 |
+
background-color: var(--card-bg);
|
126 |
+
border-radius: 12px;
|
127 |
+
padding: 20px;
|
128 |
+
margin-bottom: 20px;
|
129 |
+
border: 1px solid var(--border-color);
|
130 |
+
}
|
131 |
+
.settings-grid {
|
132 |
+
display: grid;
|
133 |
+
grid-template-columns: 1fr 1fr;
|
134 |
+
gap: 15px;
|
135 |
+
margin-bottom: 15px;
|
136 |
+
}
|
137 |
+
.setting-item {
|
138 |
display: flex;
|
139 |
align-items: center;
|
|
|
140 |
gap: 10px;
|
|
|
141 |
}
|
142 |
+
.setting-label {
|
143 |
+
font-size: 14px;
|
144 |
+
color: #aaa;
|
145 |
+
min-width: 80px;
|
146 |
+
}
|
147 |
+
/* Toggle switch */
|
148 |
.toggle-switch {
|
149 |
position: relative;
|
150 |
width: 50px;
|
|
|
170 |
.toggle-switch.active .toggle-slider {
|
171 |
transform: translateX(24px);
|
172 |
}
|
173 |
+
/* Select dropdown */
|
174 |
+
select {
|
175 |
+
background-color: var(--card-bg);
|
176 |
+
color: var(--text-color);
|
177 |
+
border: 1px solid var(--border-color);
|
178 |
+
padding: 8px 12px;
|
179 |
+
border-radius: 6px;
|
180 |
font-size: 14px;
|
181 |
+
cursor: pointer;
|
182 |
+
min-width: 150px;
|
183 |
+
}
|
184 |
+
select:focus {
|
185 |
+
outline: none;
|
186 |
+
border-color: var(--primary-color);
|
187 |
+
}
|
188 |
+
/* Text inputs */
|
189 |
+
.text-input-section {
|
190 |
+
margin-top: 15px;
|
191 |
+
}
|
192 |
+
input[type="text"], textarea {
|
193 |
+
width: 100%;
|
194 |
+
background-color: var(--dark-bg);
|
195 |
+
color: var(--text-color);
|
196 |
+
border: 1px solid var(--border-color);
|
197 |
+
padding: 10px;
|
198 |
+
border-radius: 6px;
|
199 |
+
font-size: 14px;
|
200 |
+
box-sizing: border-box;
|
201 |
+
margin-top: 5px;
|
202 |
+
}
|
203 |
+
input[type="text"]:focus, textarea:focus {
|
204 |
+
outline: none;
|
205 |
+
border-color: var(--primary-color);
|
206 |
+
}
|
207 |
+
textarea {
|
208 |
+
resize: vertical;
|
209 |
+
min-height: 80px;
|
210 |
}
|
211 |
.chat-container {
|
212 |
border-radius: 12px;
|
|
|
276 |
margin-top: 20px;
|
277 |
display: flex;
|
278 |
justify-content: center;
|
279 |
+
gap: 10px;
|
280 |
flex-shrink: 0;
|
281 |
}
|
282 |
button {
|
|
|
305 |
button:active {
|
306 |
transform: translateY(1px);
|
307 |
}
|
308 |
+
#send-button {
|
309 |
+
background: linear-gradient(135deg, #2ecc71, #27ae60);
|
310 |
+
padding: 10px 20px;
|
311 |
+
font-size: 14px;
|
312 |
+
}
|
313 |
+
#send-button:hover {
|
314 |
+
background: linear-gradient(135deg, #27ae60, #229954);
|
315 |
+
}
|
316 |
#audio-output {
|
317 |
display: none;
|
318 |
}
|
|
|
437 |
background-color: var(--secondary-color);
|
438 |
border-radius: 50%;
|
439 |
}
|
440 |
+
.language-info {
|
441 |
+
font-size: 12px;
|
442 |
+
color: #888;
|
443 |
+
margin-left: 5px;
|
444 |
+
}
|
445 |
</style>
|
446 |
</head>
|
447 |
|
|
|
457 |
</div>
|
458 |
<h1>MOUSE 음성 챗</h1>
|
459 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
<div class="status-indicator">
|
461 |
<div id="status-dot" class="status-dot disconnected"></div>
|
462 |
<span id="status-text">연결 대기 중</span>
|
463 |
</div>
|
464 |
</div>
|
465 |
+
|
466 |
+
<div class="settings-section">
|
467 |
+
<div class="settings-grid">
|
468 |
+
<div class="setting-item">
|
469 |
+
<span class="setting-label">웹 검색</span>
|
470 |
+
<div id="search-toggle" class="toggle-switch">
|
471 |
+
<div class="toggle-slider"></div>
|
472 |
+
</div>
|
473 |
+
</div>
|
474 |
+
<div class="setting-item">
|
475 |
+
<span class="setting-label">자동 번역</span>
|
476 |
+
<select id="language-select">
|
477 |
+
<option value="">비활성화</option>
|
478 |
+
<option value="ko">한국어 (Korean)</option>
|
479 |
+
<option value="en">English</option>
|
480 |
+
<option value="es">Español (Spanish)</option>
|
481 |
+
<option value="fr">Français (French)</option>
|
482 |
+
<option value="de">Deutsch (German)</option>
|
483 |
+
<option value="it">Italiano (Italian)</option>
|
484 |
+
<option value="pt">Português (Portuguese)</option>
|
485 |
+
<option value="ru">Русский (Russian)</option>
|
486 |
+
<option value="ja">日本語 (Japanese)</option>
|
487 |
+
<option value="zh">中文 (Chinese)</option>
|
488 |
+
<option value="ar">العربية (Arabic)</option>
|
489 |
+
<option value="hi">हिन्दी (Hindi)</option>
|
490 |
+
<option value="nl">Nederlands (Dutch)</option>
|
491 |
+
<option value="pl">Polski (Polish)</option>
|
492 |
+
<option value="tr">Türkçe (Turkish)</option>
|
493 |
+
<option value="vi">Tiếng Việt (Vietnamese)</option>
|
494 |
+
<option value="th">ไทย (Thai)</option>
|
495 |
+
<option value="id">Bahasa Indonesia</option>
|
496 |
+
<option value="sv">Svenska (Swedish)</option>
|
497 |
+
<option value="da">Dansk (Danish)</option>
|
498 |
+
<option value="no">Norsk (Norwegian)</option>
|
499 |
+
<option value="fi">Suomi (Finnish)</option>
|
500 |
+
<option value="he">עברית (Hebrew)</option>
|
501 |
+
<option value="uk">Українська (Ukrainian)</option>
|
502 |
+
<option value="cs">Čeština (Czech)</option>
|
503 |
+
<option value="el">Ελληνικά (Greek)</option>
|
504 |
+
<option value="ro">Română (Romanian)</option>
|
505 |
+
<option value="hu">Magyar (Hungarian)</option>
|
506 |
+
<option value="ms">Bahasa Melayu (Malay)</option>
|
507 |
+
</select>
|
508 |
+
</div>
|
509 |
+
</div>
|
510 |
+
<div class="text-input-section">
|
511 |
+
<label for="system-prompt" class="setting-label">시스템 프롬프트:</label>
|
512 |
+
<textarea id="system-prompt" placeholder="AI 어시스턴트의 성격, 역할, 행동 방식을 정의하세요...">You are a helpful assistant. Respond in a friendly and professional manner.</textarea>
|
513 |
+
</div>
|
514 |
+
</div>
|
515 |
+
|
516 |
<div class="chat-container">
|
517 |
<div class="chat-messages" id="chat-messages"></div>
|
518 |
+
<div class="text-input-section" style="margin-top: 10px;">
|
519 |
+
<input type="text" id="text-input" placeholder="텍스트 메시지를 입력하세요..." />
|
520 |
+
</div>
|
521 |
</div>
|
522 |
<div class="controls">
|
523 |
<button id="start-button">대화 시작</button>
|
524 |
+
<button id="send-button" style="display: none;">전송</button>
|
525 |
</div>
|
526 |
</div>
|
527 |
<audio id="audio-output"></audio>
|
|
|
530 |
let peerConnection;
|
531 |
let webrtc_id;
|
532 |
let webSearchEnabled = false;
|
533 |
+
let selectedLanguage = "";
|
534 |
+
let systemPrompt = "You are a helpful assistant. Respond in a friendly and professional manner.";
|
535 |
const audioOutput = document.getElementById('audio-output');
|
536 |
const startButton = document.getElementById('start-button');
|
537 |
+
const sendButton = document.getElementById('send-button');
|
538 |
const chatMessages = document.getElementById('chat-messages');
|
539 |
const statusDot = document.getElementById('status-dot');
|
540 |
const statusText = document.getElementById('status-text');
|
541 |
const searchToggle = document.getElementById('search-toggle');
|
542 |
+
const languageSelect = document.getElementById('language-select');
|
543 |
+
const systemPromptInput = document.getElementById('system-prompt');
|
544 |
+
const textInput = document.getElementById('text-input');
|
545 |
let audioLevel = 0;
|
546 |
let animationFrame;
|
547 |
let audioContext, analyser, audioSource;
|
548 |
+
let dataChannel = null;
|
549 |
|
550 |
// Web search toggle functionality
|
551 |
searchToggle.addEventListener('click', () => {
|
|
|
554 |
console.log('Web search enabled:', webSearchEnabled);
|
555 |
});
|
556 |
|
557 |
+
// Language selection
|
558 |
+
languageSelect.addEventListener('change', () => {
|
559 |
+
selectedLanguage = languageSelect.value;
|
560 |
+
console.log('Selected language:', selectedLanguage);
|
561 |
+
});
|
562 |
+
|
563 |
+
// System prompt update
|
564 |
+
systemPromptInput.addEventListener('input', () => {
|
565 |
+
systemPrompt = systemPromptInput.value || "You are a helpful assistant. Respond in a friendly and professional manner.";
|
566 |
+
});
|
567 |
+
|
568 |
+
// Text input handling
|
569 |
+
textInput.addEventListener('keypress', (e) => {
|
570 |
+
if (e.key === 'Enter' && !e.shiftKey) {
|
571 |
+
e.preventDefault();
|
572 |
+
sendTextMessage();
|
573 |
+
}
|
574 |
+
});
|
575 |
+
|
576 |
+
sendButton.addEventListener('click', sendTextMessage);
|
577 |
+
|
578 |
+
function sendTextMessage() {
|
579 |
+
const message = textInput.value.trim();
|
580 |
+
if (!message || !dataChannel || dataChannel.readyState !== 'open') return;
|
581 |
+
|
582 |
+
addMessage('user', message);
|
583 |
+
dataChannel.send(JSON.stringify({
|
584 |
+
type: 'text_message',
|
585 |
+
content: message
|
586 |
+
}));
|
587 |
+
textInput.value = '';
|
588 |
+
}
|
589 |
+
|
590 |
function updateStatus(state) {
|
591 |
statusDot.className = 'status-dot ' + state;
|
592 |
if (state === 'connected') {
|
593 |
statusText.textContent = '연결됨';
|
594 |
+
sendButton.style.display = 'block';
|
595 |
} else if (state === 'connecting') {
|
596 |
statusText.textContent = '연결 중...';
|
597 |
+
sendButton.style.display = 'none';
|
598 |
} else {
|
599 |
statusText.textContent = '연결 대기 중';
|
600 |
+
sendButton.style.display = 'none';
|
601 |
}
|
602 |
}
|
603 |
function updateButtonState() {
|
|
|
698 |
audioOutput.play();
|
699 |
}
|
700 |
});
|
701 |
+
|
702 |
+
// Create data channel for text messages
|
703 |
+
dataChannel = peerConnection.createDataChannel('text');
|
704 |
+
dataChannel.onopen = () => {
|
705 |
+
console.log('Data channel opened');
|
706 |
+
};
|
707 |
dataChannel.onmessage = (event) => {
|
708 |
const eventJson = JSON.parse(event.data);
|
709 |
if (eventJson.type === "error") {
|
710 |
showError(eventJson.message);
|
711 |
}
|
712 |
};
|
713 |
+
|
714 |
const offer = await peerConnection.createOffer();
|
715 |
await peerConnection.setLocalDescription(offer);
|
716 |
await new Promise((resolve) => {
|
|
|
743 |
sdp: peerConnection.localDescription.sdp,
|
744 |
type: peerConnection.localDescription.type,
|
745 |
webrtc_id: webrtc_id,
|
746 |
+
web_search_enabled: webSearchEnabled,
|
747 |
+
target_language: selectedLanguage,
|
748 |
+
system_prompt: systemPrompt
|
749 |
})
|
750 |
});
|
751 |
const serverResponse = await response.json();
|
|
|
760 |
const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
|
761 |
eventSource.addEventListener("output", (event) => {
|
762 |
const eventJson = JSON.parse(event.data);
|
763 |
+
let content = eventJson.content;
|
764 |
+
if (selectedLanguage && eventJson.language) {
|
765 |
+
content += ` <span class="language-info">[${eventJson.language}]</span>`;
|
766 |
+
}
|
767 |
+
addMessage("assistant", content);
|
768 |
});
|
769 |
eventSource.addEventListener("search", (event) => {
|
770 |
const eventJson = JSON.parse(event.data);
|
|
|
782 |
function addMessage(role, content) {
|
783 |
const messageDiv = document.createElement('div');
|
784 |
messageDiv.classList.add('message', role);
|
785 |
+
if (content.includes('<span')) {
|
786 |
+
messageDiv.innerHTML = content;
|
787 |
+
} else {
|
788 |
+
messageDiv.textContent = content;
|
789 |
+
}
|
790 |
chatMessages.appendChild(messageDiv);
|
791 |
chatMessages.scrollTop = chatMessages.scrollHeight;
|
792 |
}
|
|
|
816 |
console.log('closing');
|
817 |
peerConnection.close();
|
818 |
}
|
819 |
+
dataChannel = null;
|
820 |
updateButtonState();
|
821 |
audioLevel = 0;
|
822 |
}
|
|
|
882 |
search_client = BraveSearchClient(brave_api_key) if brave_api_key else None
|
883 |
print(f"Search client initialized: {search_client is not None}, API key present: {bool(brave_api_key)}")
|
884 |
|
885 |
+
# Store connection settings
|
886 |
+
connection_settings = {}
|
887 |
|
888 |
def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
|
889 |
chatbot.append({"role": "assistant", "content": response.transcript})
|
|
|
891 |
|
892 |
|
893 |
class OpenAIHandler(AsyncStreamHandler):
|
894 |
+
def __init__(self, web_search_enabled: bool = False, target_language: str = "",
|
895 |
+
system_prompt: str = "", webrtc_id: str = None) -> None:
|
896 |
super().__init__(
|
897 |
expected_layout="mono",
|
898 |
output_sample_rate=SAMPLE_RATE,
|
|
|
907 |
self.current_call_id = None
|
908 |
self.webrtc_id = webrtc_id
|
909 |
self.web_search_enabled = web_search_enabled
|
910 |
+
self.target_language = target_language
|
911 |
+
self.system_prompt = system_prompt
|
912 |
+
print(f"Handler created with web_search_enabled={web_search_enabled}, "
|
913 |
+
f"target_language={target_language}, webrtc_id={webrtc_id}")
|
914 |
|
915 |
def copy(self):
|
916 |
# Get the most recent settings
|
917 |
+
if connection_settings:
|
918 |
# Get the most recent webrtc_id
|
919 |
+
recent_ids = sorted(connection_settings.keys(),
|
920 |
+
key=lambda k: connection_settings[k].get('timestamp', 0),
|
921 |
reverse=True)
|
922 |
if recent_ids:
|
923 |
recent_id = recent_ids[0]
|
924 |
+
settings = connection_settings[recent_id]
|
925 |
+
return OpenAIHandler(
|
926 |
+
web_search_enabled=settings.get('web_search_enabled', False),
|
927 |
+
target_language=settings.get('target_language', ''),
|
928 |
+
system_prompt=settings.get('system_prompt', ''),
|
929 |
+
webrtc_id=recent_id
|
930 |
+
)
|
931 |
|
932 |
print(f"Handler.copy() called - creating new handler with default settings")
|
933 |
return OpenAIHandler(web_search_enabled=False)
|
|
|
953 |
|
954 |
return f"웹 검색 결과 '{query}':\n\n" + "\n".join(formatted_results)
|
955 |
|
956 |
+
async def process_text_message(self, message: str):
|
957 |
+
"""Process text message from user"""
|
958 |
+
if self.connection:
|
959 |
+
await self.connection.conversation.item.create(
|
960 |
+
item={
|
961 |
+
"type": "message",
|
962 |
+
"role": "user",
|
963 |
+
"content": [{"type": "input_text", "text": message}]
|
964 |
+
}
|
965 |
+
)
|
966 |
+
await self.connection.response.create()
|
967 |
+
|
968 |
+
def get_translation_instructions(self):
|
969 |
+
"""Get instructions for translation based on target language"""
|
970 |
+
if not self.target_language:
|
971 |
+
return ""
|
972 |
+
|
973 |
+
language_name = SUPPORTED_LANGUAGES.get(self.target_language, self.target_language)
|
974 |
+
return (
|
975 |
+
f"\n\nIMPORTANT: You must respond in {language_name} ({self.target_language}). "
|
976 |
+
f"Translate all your responses to {language_name}. "
|
977 |
+
f"This includes both spoken and written responses."
|
978 |
+
)
|
979 |
+
|
980 |
async def start_up(self):
|
981 |
"""Connect to realtime API with function calling enabled"""
|
982 |
# First check if we have the most recent settings
|
983 |
+
if connection_settings:
|
984 |
+
recent_ids = sorted(connection_settings.keys(),
|
985 |
+
key=lambda k: connection_settings[k].get('timestamp', 0),
|
986 |
reverse=True)
|
987 |
if recent_ids:
|
988 |
recent_id = recent_ids[0]
|
989 |
+
settings = connection_settings[recent_id]
|
990 |
+
self.web_search_enabled = settings.get('web_search_enabled', False)
|
991 |
+
self.target_language = settings.get('target_language', '')
|
992 |
+
self.system_prompt = settings.get('system_prompt', '')
|
993 |
self.webrtc_id = recent_id
|
994 |
+
print(f"start_up: Updated settings from storage - webrtc_id={self.webrtc_id}, "
|
995 |
+
f"web_search_enabled={self.web_search_enabled}, target_language={self.target_language}")
|
996 |
|
997 |
+
print(f"Starting up handler with web_search_enabled={self.web_search_enabled}, "
|
998 |
+
f"target_language={self.target_language}")
|
999 |
self.client = openai.AsyncOpenAI()
|
1000 |
|
1001 |
# Define the web search function
|
1002 |
tools = []
|
1003 |
+
base_instructions = self.system_prompt or "You are a helpful assistant."
|
1004 |
+
|
1005 |
+
# Add translation instructions if language is selected
|
1006 |
+
translation_instructions = self.get_translation_instructions()
|
1007 |
|
1008 |
if self.web_search_enabled and self.search_client:
|
1009 |
tools = [{
|
|
|
1016 |
"properties": {
|
1017 |
"query": {
|
1018 |
"type": "string",
|
1019 |
+
"description": "The search query"
|
1020 |
}
|
1021 |
},
|
1022 |
"required": ["query"]
|
|
|
1025 |
}]
|
1026 |
print("Web search function added to tools")
|
1027 |
|
1028 |
+
search_instructions = (
|
1029 |
+
"\n\nYou have web search capabilities. "
|
1030 |
"IMPORTANT: You MUST use the web_search function for ANY of these topics:\n"
|
1031 |
"- Weather (날씨, 기온, 비, 눈)\n"
|
1032 |
"- News (뉴스, 소식)\n"
|
|
|
1036 |
"- Any question about 2024 or 2025\n"
|
1037 |
"- Any time-sensitive information\n\n"
|
1038 |
"When in doubt, USE web_search. It's better to search and provide accurate information "
|
1039 |
+
"than to guess or use outdated information."
|
1040 |
)
|
1041 |
+
instructions = base_instructions + search_instructions + translation_instructions
|
1042 |
+
else:
|
1043 |
+
instructions = base_instructions + translation_instructions
|
1044 |
|
1045 |
async with self.client.beta.realtime.connect(
|
1046 |
model="gpt-4o-mini-realtime-preview-2024-12-17"
|
|
|
1053 |
"tool_choice": "auto" if tools else "none"
|
1054 |
}
|
1055 |
|
1056 |
+
# Add voice setting if target language is selected
|
1057 |
+
if self.target_language:
|
1058 |
+
# Map languages to appropriate voices
|
1059 |
+
voice_map = {
|
1060 |
+
"en": "alloy",
|
1061 |
+
"es": "nova",
|
1062 |
+
"fr": "nova",
|
1063 |
+
"de": "nova",
|
1064 |
+
"ja": "nova",
|
1065 |
+
"zh": "nova",
|
1066 |
+
# Default to alloy for other languages
|
1067 |
+
}
|
1068 |
+
session_update["voice"] = voice_map.get(self.target_language, "alloy")
|
1069 |
+
|
1070 |
await conn.session.update(session=session_update)
|
1071 |
self.connection = conn
|
1072 |
+
print(f"Connected with tools: {len(tools)} functions, voice: {session_update.get('voice', 'default')}")
|
1073 |
|
1074 |
async for event in self.connection:
|
1075 |
# Debug logging for function calls
|
|
|
1077 |
print(f"Function event: {event.type}")
|
1078 |
|
1079 |
if event.type == "response.audio_transcript.done":
|
1080 |
+
output_data = {
|
1081 |
+
"event": event,
|
1082 |
+
"language": SUPPORTED_LANGUAGES.get(self.target_language, "") if self.target_language else ""
|
1083 |
+
}
|
1084 |
+
await self.output_queue.put(AdditionalOutputs(output_data))
|
1085 |
|
1086 |
elif event.type == "response.audio.delta":
|
1087 |
await self.output_queue.put(
|
|
|
1152 |
# Connection might be closed, ignore the error
|
1153 |
|
1154 |
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
|
1155 |
+
item = await wait_for_item(self.output_queue)
|
1156 |
+
|
1157 |
+
# Check if it's a dict with text message
|
1158 |
+
if isinstance(item, dict) and item.get('type') == 'text_message':
|
1159 |
+
await self.process_text_message(item['content'])
|
1160 |
+
return None
|
1161 |
+
|
1162 |
+
return item
|
1163 |
|
1164 |
async def shutdown(self) -> None:
|
1165 |
if self.connection:
|
|
|
1194 |
# Intercept offer to capture settings
|
1195 |
@app.post("/webrtc/offer", include_in_schema=False)
|
1196 |
async def custom_offer(request: Request):
|
1197 |
+
"""Intercept offer to capture settings"""
|
1198 |
body = await request.json()
|
1199 |
|
1200 |
webrtc_id = body.get("webrtc_id")
|
1201 |
web_search_enabled = body.get("web_search_enabled", False)
|
1202 |
+
target_language = body.get("target_language", "")
|
1203 |
+
system_prompt = body.get("system_prompt", "")
|
1204 |
|
1205 |
+
print(f"Custom offer - webrtc_id: {webrtc_id}, web_search_enabled: {web_search_enabled}, "
|
1206 |
+
f"target_language: {target_language}")
|
1207 |
|
1208 |
# Store settings with timestamp
|
1209 |
if webrtc_id:
|
1210 |
+
connection_settings[webrtc_id] = {
|
1211 |
+
'web_search_enabled': web_search_enabled,
|
1212 |
+
'target_language': target_language,
|
1213 |
+
'system_prompt': system_prompt,
|
1214 |
'timestamp': asyncio.get_event_loop().time()
|
1215 |
}
|
1216 |
|
|
|
1231 |
return response
|
1232 |
|
1233 |
|
1234 |
+
@app.post("/text_message/{webrtc_id}")
|
1235 |
+
async def receive_text_message(webrtc_id: str, request: Request):
|
1236 |
+
"""Receive text message from client"""
|
1237 |
+
body = await request.json()
|
1238 |
+
message = body.get("content", "")
|
1239 |
+
|
1240 |
+
# Find the handler for this connection
|
1241 |
+
if webrtc_id in stream.handlers:
|
1242 |
+
handler = stream.handlers[webrtc_id]
|
1243 |
+
# Queue the text message for processing
|
1244 |
+
await handler.output_queue.put({
|
1245 |
+
'type': 'text_message',
|
1246 |
+
'content': message
|
1247 |
+
})
|
1248 |
+
|
1249 |
+
return {"status": "ok"}
|
1250 |
+
|
1251 |
+
|
1252 |
@app.get("/outputs")
|
1253 |
async def outputs(webrtc_id: str):
|
1254 |
"""Stream outputs including search events"""
|
|
|
1258 |
# Check if it's a search event
|
1259 |
if isinstance(output.args[0], dict) and output.args[0].get('type') == 'search':
|
1260 |
yield f"event: search\ndata: {json.dumps(output.args[0])}\n\n"
|
1261 |
+
# Regular transcript event with language info
|
1262 |
+
elif isinstance(output.args[0], dict) and 'event' in output.args[0]:
|
1263 |
+
event = output.args[0]['event']
|
1264 |
+
if hasattr(event, 'transcript'):
|
1265 |
+
data = {
|
1266 |
+
"role": "assistant",
|
1267 |
+
"content": event.transcript,
|
1268 |
+
"language": output.args[0].get('language', '')
|
1269 |
+
}
|
1270 |
+
yield f"event: output\ndata: {json.dumps(data)}\n\n"
|
1271 |
|
1272 |
return StreamingResponse(output_stream(), media_type="text/event-stream")
|
1273 |
|