seawolf2357 commited on
Commit
a412ec1
ยท
verified ยท
1 Parent(s): 10391c6

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -151
app.py DELETED
@@ -1,151 +0,0 @@
1
- import asyncio
2
- import base64
3
- import json
4
- from pathlib import Path
5
-
6
- import gradio as gr
7
- import numpy as np
8
- import openai
9
- from dotenv import load_dotenv
10
- from fastapi import FastAPI
11
- from fastapi.responses import HTMLResponse, StreamingResponse
12
- from fastrtc import (
13
- AdditionalOutputs,
14
- AsyncStreamHandler,
15
- Stream,
16
- get_twilio_turn_credentials,
17
- wait_for_item,
18
- )
19
- from gradio.utils import get_space
20
- from openai.types.beta.realtime import ResponseAudioTranscriptDoneEvent
21
-
22
- load_dotenv()
23
-
24
- cur_dir = Path(__file__).parent
25
-
26
- SAMPLE_RATE = 24000
27
-
28
-
29
- class OpenAIHandler(AsyncStreamHandler):
30
- def __init__(
31
- self,
32
- ) -> None:
33
- super().__init__(
34
- expected_layout="mono",
35
- output_sample_rate=SAMPLE_RATE,
36
- output_frame_size=480,
37
- input_sample_rate=SAMPLE_RATE,
38
- )
39
- self.connection = None
40
- self.output_queue = asyncio.Queue()
41
-
42
- def copy(self):
43
- return OpenAIHandler()
44
-
45
- async def start_up(
46
- self,
47
- ):
48
- """Connect to realtime API. Run forever in separate thread to keep connection open."""
49
- self.client = openai.AsyncOpenAI()
50
- async with self.client.beta.realtime.connect(
51
- model="gpt-4o-mini-realtime-preview-2024-12-17"
52
- ) as conn:
53
- await conn.session.update(
54
- session={
55
- "turn_detection": {"type": "server_vad"},
56
- "system_instruction": "๋‹น์‹ ์€ MOUSE ์ฑ—์ด๋ผ๋Š” AI ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ์นœ์ ˆํ•˜๊ณ  ๋ช…ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”. ํ•œ๊ตญ์–ด๋กœ ๋Œ€ํ™”๋ฅผ ๋‚˜๋ˆ„๋ฉฐ, ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ •๋ณด๋ฅผ ์ƒ์„ธํžˆ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. ๋‹น์‹ ์˜ ์ด๋ฆ„์€ 'MOUSE ์ฑ—'์ž…๋‹ˆ๋‹ค."
57
- }
58
- )
59
- self.connection = conn
60
- async for event in self.connection:
61
- if event.type == "response.audio_transcript.done":
62
- await self.output_queue.put(AdditionalOutputs(event))
63
- if event.type == "response.audio.delta":
64
- await self.output_queue.put(
65
- (
66
- self.output_sample_rate,
67
- np.frombuffer(
68
- base64.b64decode(event.delta), dtype=np.int16
69
- ).reshape(1, -1),
70
- ),
71
- )
72
-
73
- async def receive(self, frame: tuple[int, np.ndarray]) -> None:
74
- if not self.connection:
75
- return
76
- try:
77
- _, array = frame
78
- array = array.squeeze()
79
- audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
80
- await self.connection.input_audio_buffer.append(audio=audio_message) # type: ignore
81
- except Exception as e:
82
- # ์—ฐ๊ฒฐ์ด ์ด๋ฏธ ๋‹ซํ˜”๊ฑฐ๋‚˜ ๋‹ค๋ฅธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ•œ ๊ฒฝ์šฐ ๋กœ๊ทธ๋งŒ ๋‚จ๊ธฐ๊ณ  ๊ณ„์† ์ง„ํ–‰
83
- print(f"Error in receive: {e}")
84
- # ์—ฐ๊ฒฐ์ด ๋Š์–ด์ง„ ๊ฒƒ ๊ฐ™์œผ๋ฉด connection์„ None์œผ๋กœ ์„ค์ •
85
- if "ConnectionClosed" in str(e):
86
- self.connection = None
87
-
88
- async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
89
- return await wait_for_item(self.output_queue)
90
-
91
- async def shutdown(self) -> None:
92
- if self.connection:
93
- await self.connection.close()
94
- self.connection = None
95
-
96
-
97
- def update_chatbot(chatbot: list[dict], response: ResponseAudioTranscriptDoneEvent):
98
- chatbot.append({"role": "assistant", "content": response.transcript})
99
- return chatbot
100
-
101
-
102
- chatbot = gr.Chatbot(type="messages")
103
- latest_message = gr.Textbox(type="text", visible=False)
104
- stream = Stream(
105
- OpenAIHandler(),
106
- mode="send-receive",
107
- modality="audio",
108
- additional_inputs=[chatbot],
109
- additional_outputs=[chatbot],
110
- additional_outputs_handler=update_chatbot,
111
- rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
112
- concurrency_limit=5 if get_space() else None,
113
- time_limit=90 if get_space() else None,
114
- )
115
-
116
- app = FastAPI()
117
-
118
- stream.mount(app)
119
-
120
-
121
- @app.get("/")
122
- async def _():
123
- rtc_config = get_twilio_turn_credentials() if get_space() else None
124
- html_content = (cur_dir / "index.html").read_text()
125
- html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
126
- return HTMLResponse(content=html_content)
127
-
128
-
129
- @app.get("/outputs")
130
- def _(webrtc_id: str):
131
- async def output_stream():
132
- import json
133
-
134
- async for output in stream.output_stream(webrtc_id):
135
- s = json.dumps({"role": "assistant", "content": output.args[0].transcript})
136
- yield f"event: output\ndata: {s}\n\n"
137
-
138
- return StreamingResponse(output_stream(), media_type="text/event-stream")
139
-
140
-
141
- if __name__ == "__main__":
142
- import os
143
-
144
- if (mode := os.getenv("MODE")) == "UI":
145
- stream.ui.launch(server_port=7860)
146
- elif mode == "PHONE":
147
- stream.fastphone(host="0.0.0.0", port=7860)
148
- else:
149
- import uvicorn
150
-
151
- uvicorn.run(app, host="0.0.0.0", port=7860)