Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -546,7 +546,12 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
546 |
|
547 |
try {
|
548 |
const stream = await navigator.mediaDevices.getUserMedia({
|
549 |
-
audio:
|
|
|
|
|
|
|
|
|
|
|
550 |
});
|
551 |
setupAudioVisualization(stream);
|
552 |
stream.getTracks().forEach(track => {
|
@@ -556,7 +561,11 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
556 |
peerConnection.addEventListener('track', (evt) => {
|
557 |
if (audioOutput.srcObject !== evt.streams[0]) {
|
558 |
audioOutput.srcObject = evt.streams[0];
|
559 |
-
|
|
|
|
|
|
|
|
|
560 |
}
|
561 |
});
|
562 |
|
@@ -677,7 +686,19 @@ HTML_CONTENT = """<!DOCTYPE html>
|
|
677 |
}
|
678 |
}
|
679 |
|
|
|
|
|
|
|
680 |
function addMessage(role, content) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
681 |
const messageDiv = document.createElement('div');
|
682 |
messageDiv.classList.add('message', role);
|
683 |
messageDiv.textContent = content;
|
@@ -810,6 +831,8 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
810 |
self.keep_alive_task = None
|
811 |
self.last_activity = datetime.now()
|
812 |
self.connection_active = True
|
|
|
|
|
813 |
logger.info(f"Handler created with web_search_enabled={web_search_enabled}, webrtc_id={webrtc_id}")
|
814 |
|
815 |
def copy(self):
|
@@ -895,7 +918,15 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
895 |
|
896 |
# Define the web search function
|
897 |
tools = []
|
898 |
-
instructions =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
899 |
|
900 |
if self.web_search_enabled and self.search_client:
|
901 |
tools = [{
|
@@ -919,7 +950,13 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
919 |
|
920 |
instructions = (
|
921 |
"You are a helpful assistant with web search capabilities. "
|
922 |
-
"IMPORTANT
|
|
|
|
|
|
|
|
|
|
|
|
|
923 |
"- Weather (๋ ์จ, ๊ธฐ์จ, ๋น, ๋)\n"
|
924 |
"- News (๋ด์ค, ์์)\n"
|
925 |
"- Current events (ํ์ฌ, ์ต๊ทผ, ์ค๋, ์ง๊ธ)\n"
|
@@ -927,20 +964,27 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
927 |
"- Sports scores or results\n"
|
928 |
"- Any question about 2024 or 2025\n"
|
929 |
"- Any time-sensitive information\n\n"
|
930 |
-
"When in doubt, USE web_search.
|
931 |
-
"than to guess or use outdated information. Always respond in Korean when the user speaks Korean."
|
932 |
)
|
933 |
|
934 |
try:
|
935 |
async with self.client.beta.realtime.connect(
|
936 |
model="gpt-4o-mini-realtime-preview-2024-12-17"
|
937 |
) as conn:
|
938 |
-
# Update session with tools
|
939 |
session_update = {
|
940 |
-
"turn_detection": {
|
|
|
|
|
|
|
|
|
|
|
941 |
"instructions": instructions,
|
942 |
"tools": tools,
|
943 |
-
"tool_choice": "auto" if tools else "none"
|
|
|
|
|
|
|
944 |
}
|
945 |
|
946 |
await conn.session.update(session=session_update)
|
@@ -954,14 +998,34 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
954 |
async for event in self.connection:
|
955 |
self.last_activity = datetime.now()
|
956 |
|
957 |
-
# Debug logging
|
958 |
-
if event.type.
|
959 |
-
logger.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
960 |
|
961 |
if event.type == "response.audio_transcript.done":
|
962 |
await self.output_queue.put(AdditionalOutputs(event))
|
963 |
|
964 |
elif event.type == "response.audio.delta":
|
|
|
|
|
|
|
|
|
|
|
965 |
await self.output_queue.put(
|
966 |
(
|
967 |
self.output_sample_rate,
|
@@ -1008,7 +1072,7 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
1008 |
"output": search_results
|
1009 |
}
|
1010 |
)
|
1011 |
-
|
1012 |
|
1013 |
except Exception as e:
|
1014 |
logger.error(f"Function call error: {e}")
|
|
|
546 |
|
547 |
try {
|
548 |
const stream = await navigator.mediaDevices.getUserMedia({
|
549 |
+
audio: {
|
550 |
+
echoCancellation: true, // ์์ฝ ์ ๊ฑฐ
|
551 |
+
noiseSuppression: true, // ๋
ธ์ด์ฆ ์ ๊ฑฐ
|
552 |
+
autoGainControl: true, // ์๋ ๊ฒ์ธ ์ ์ด
|
553 |
+
sampleRate: 24000
|
554 |
+
}
|
555 |
});
|
556 |
setupAudioVisualization(stream);
|
557 |
stream.getTracks().forEach(track => {
|
|
|
561 |
peerConnection.addEventListener('track', (evt) => {
|
562 |
if (audioOutput.srcObject !== evt.streams[0]) {
|
563 |
audioOutput.srcObject = evt.streams[0];
|
564 |
+
// ์๋ ์ฌ์ ์ ๋ณผ๋ฅจ ์กฐ์ ๋ก ํผ๋๋ฐฑ ๋ฐฉ์ง
|
565 |
+
audioOutput.volume = 0.8;
|
566 |
+
audioOutput.play().catch(e => {
|
567 |
+
console.error('Audio play error:', e);
|
568 |
+
});
|
569 |
}
|
570 |
});
|
571 |
|
|
|
686 |
}
|
687 |
}
|
688 |
|
689 |
+
let lastMessageContent = '';
|
690 |
+
let lastMessageTime = 0;
|
691 |
+
|
692 |
function addMessage(role, content) {
|
693 |
+
// ์ค๋ณต ๋ฉ์์ง ๋ฐฉ์ง
|
694 |
+
const now = Date.now();
|
695 |
+
if (content === lastMessageContent && (now - lastMessageTime) < 1000) {
|
696 |
+
return; // 1์ด ์ด๋ด ๊ฐ์ ๋ฉ์์ง ๋ฌด์
|
697 |
+
}
|
698 |
+
|
699 |
+
lastMessageContent = content;
|
700 |
+
lastMessageTime = now;
|
701 |
+
|
702 |
const messageDiv = document.createElement('div');
|
703 |
messageDiv.classList.add('message', role);
|
704 |
messageDiv.textContent = content;
|
|
|
831 |
self.keep_alive_task = None
|
832 |
self.last_activity = datetime.now()
|
833 |
self.connection_active = True
|
834 |
+
self.response_in_progress = False # ์๋ต ์ค๋ณต ๋ฐฉ์ง
|
835 |
+
self.last_response_time = datetime.now() # ๋ง์ง๋ง ์๋ต ์๊ฐ
|
836 |
logger.info(f"Handler created with web_search_enabled={web_search_enabled}, webrtc_id={webrtc_id}")
|
837 |
|
838 |
def copy(self):
|
|
|
918 |
|
919 |
# Define the web search function
|
920 |
tools = []
|
921 |
+
instructions = (
|
922 |
+
"You are a helpful assistant. Respond in Korean when the user speaks Korean. "
|
923 |
+
"IMPORTANT RULES:\n"
|
924 |
+
"1. Wait for the user to finish speaking before responding\n"
|
925 |
+
"2. Keep responses concise and to the point\n"
|
926 |
+
"3. Do not continue speaking if the user interrupts\n"
|
927 |
+
"4. Stop immediately when you finish answering the question\n"
|
928 |
+
"5. Do not add unnecessary elaboration or follow-up questions unless asked"
|
929 |
+
)
|
930 |
|
931 |
if self.web_search_enabled and self.search_client:
|
932 |
tools = [{
|
|
|
950 |
|
951 |
instructions = (
|
952 |
"You are a helpful assistant with web search capabilities. "
|
953 |
+
"IMPORTANT RULES:\n"
|
954 |
+
"1. Wait for the user to finish speaking before responding\n"
|
955 |
+
"2. Keep responses concise and to the point\n"
|
956 |
+
"3. Do not continue speaking if the user interrupts\n"
|
957 |
+
"4. Stop immediately when you finish answering the question\n"
|
958 |
+
"5. Do not add unnecessary elaboration or follow-up questions unless asked\n\n"
|
959 |
+
"WEB SEARCH RULES: You MUST use the web_search function for ANY of these topics:\n"
|
960 |
"- Weather (๋ ์จ, ๊ธฐ์จ, ๋น, ๋)\n"
|
961 |
"- News (๋ด์ค, ์์)\n"
|
962 |
"- Current events (ํ์ฌ, ์ต๊ทผ, ์ค๋, ์ง๊ธ)\n"
|
|
|
964 |
"- Sports scores or results\n"
|
965 |
"- Any question about 2024 or 2025\n"
|
966 |
"- Any time-sensitive information\n\n"
|
967 |
+
"When in doubt, USE web_search for accuracy. Always respond in Korean when the user speaks Korean."
|
|
|
968 |
)
|
969 |
|
970 |
try:
|
971 |
async with self.client.beta.realtime.connect(
|
972 |
model="gpt-4o-mini-realtime-preview-2024-12-17"
|
973 |
) as conn:
|
974 |
+
# Update session with tools - VAD ์ค์ ๊ฐํ
|
975 |
session_update = {
|
976 |
+
"turn_detection": {
|
977 |
+
"type": "server_vad",
|
978 |
+
"threshold": 0.5, # ๋ฏผ๊ฐ๋ ์กฐ์
|
979 |
+
"prefix_padding_ms": 300, # ์์ฑ ์์ ์ ํจ๋ฉ
|
980 |
+
"silence_duration_ms": 500 # ์นจ๋ฌต ๊ฐ์ง ์๊ฐ ์ฆ๊ฐ
|
981 |
+
},
|
982 |
"instructions": instructions,
|
983 |
"tools": tools,
|
984 |
+
"tool_choice": "auto" if tools else "none",
|
985 |
+
"voice": "echo", # ์์ฑ ์ค์
|
986 |
+
"temperature": 0.7, # ์๋ต ๋ค์์ฑ ๊ฐ์
|
987 |
+
"max_response_output_tokens": "inf" # ๋ฌดํ ์๋ต ๋ฐฉ์ง
|
988 |
}
|
989 |
|
990 |
await conn.session.update(session=session_update)
|
|
|
998 |
async for event in self.connection:
|
999 |
self.last_activity = datetime.now()
|
1000 |
|
1001 |
+
# Debug logging
|
1002 |
+
if event.type in ["response.audio_transcript.done", "response.done", "response.created", "input_audio_buffer.speech_started", "input_audio_buffer.speech_stopped"]:
|
1003 |
+
logger.info(f"Event: {event.type}")
|
1004 |
+
|
1005 |
+
# ์ฌ์ฉ์ ์์ฑ ๊ฐ์ง
|
1006 |
+
if event.type == "input_audio_buffer.speech_started":
|
1007 |
+
logger.info("User started speaking")
|
1008 |
+
self.response_in_progress = False # ์ฌ์ฉ์๊ฐ ๋งํ๋ฉด ์๋ต ์ค๋จ
|
1009 |
+
|
1010 |
+
# ์๋ต ์์/์ข
๋ฃ ์ถ์
|
1011 |
+
if event.type == "response.created":
|
1012 |
+
self.response_in_progress = True
|
1013 |
+
self.last_response_time = datetime.now()
|
1014 |
+
logger.info("Response started")
|
1015 |
+
|
1016 |
+
if event.type == "response.done":
|
1017 |
+
self.response_in_progress = False
|
1018 |
+
logger.info("Response completed")
|
1019 |
|
1020 |
if event.type == "response.audio_transcript.done":
|
1021 |
await self.output_queue.put(AdditionalOutputs(event))
|
1022 |
|
1023 |
elif event.type == "response.audio.delta":
|
1024 |
+
# ์๋ต ์ค๋ณต ๋ฐฉ์ง - ๋๋ฌด ๋น ๋ฅธ ์ฐ์ ์๋ต ์ฐจ๋จ
|
1025 |
+
time_since_last = (datetime.now() - self.last_response_time).total_seconds()
|
1026 |
+
if time_since_last < 0.1: # 100ms ์ด๋ด ์๋ต ๋ฌด์
|
1027 |
+
continue
|
1028 |
+
|
1029 |
await self.output_queue.put(
|
1030 |
(
|
1031 |
self.output_sample_rate,
|
|
|
1072 |
"output": search_results
|
1073 |
}
|
1074 |
)
|
1075 |
+
# response.create() ์ ๊ฑฐ - ์๋์ผ๋ก ์๋ต ์์ฑ๋จ
|
1076 |
|
1077 |
except Exception as e:
|
1078 |
logger.error(f"Function call error: {e}")
|