IAMTFRMZA commited on
Commit
2615e33
Β·
verified Β·
1 Parent(s): 36cbe82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -54
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # Final version with Perplexity-style UI, voice toggle, rich formatting, full-width input
2
  import gradio as gr
3
  import os, time, re, json, base64, asyncio, threading, uuid, io
4
  import numpy as np
@@ -18,7 +18,6 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
18
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
19
  connections = {}
20
 
21
- # WebSocket Client for Voice
22
  class WebSocketClient:
23
  def __init__(self, uri, headers, client_id):
24
  self.uri = uri
@@ -31,12 +30,12 @@ class WebSocketClient:
31
 
32
  async def connect(self):
33
  try:
34
- self.websocket = await connect(self.uri, additional_headers=self.headers)
35
  with open("openai_transcription_settings.json", "r") as f:
36
  await self.websocket.send(f.read())
37
  await asyncio.gather(self.receive_messages(), self.send_audio_chunks())
38
  except Exception as e:
39
- print(f"πŸ”΄ WebSocket Connection Failed: {e}")
40
 
41
  def run(self):
42
  asyncio.set_event_loop(self.loop)
@@ -71,7 +70,6 @@ class WebSocketClient:
71
  if data["type"] == "conversation.item.input_audio_transcription.delta":
72
  self.transcript += data["delta"]
73
 
74
- # Chat Handlers
75
  def create_ws():
76
  cid = str(uuid.uuid4())
77
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -92,17 +90,15 @@ def clear_transcript(cid):
92
  return ""
93
 
94
  def format_response(content, prompt):
95
- content_md = f"""### ❓ {prompt}\n\n**🧠 In summary:**\n\n{content}\n"""
96
- image_urls = re.findall(r'https://raw\\.githubusercontent\\.com/[^\s)]+\\.png', content)
97
- if image_urls:
98
- content_md += "\n\n**πŸ“Ž Sources:**\n"
99
- for url in image_urls:
100
- content_md += f"![]({url})\n"
101
- return content_md
102
 
103
  def handle_chat(prompt, thread_id):
104
  if not OPENAI_API_KEY or not ASSISTANT_ID:
105
- return "❌ Missing API Key or Assistant ID", thread_id
106
  try:
107
  if thread_id is None:
108
  thread = client.beta.threads.create()
@@ -118,7 +114,7 @@ def handle_chat(prompt, thread_id):
118
  for msg in reversed(msgs.data):
119
  if msg.role == "assistant":
120
  return format_response(msg.content[0].text.value, prompt), thread_id
121
- return "⚠️ No assistant reply", thread_id
122
  except Exception as e:
123
  return f"❌ {e}", thread_id
124
 
@@ -129,54 +125,53 @@ def feed_transcript(transcript, thread_id, cid):
129
  connections[cid].transcript = ""
130
  return handle_chat(transcript, thread_id)
131
 
132
- # Gradio UI
133
  with gr.Blocks(css="""
134
  body {
135
- background: #0f0f0f;
136
  color: white;
137
  font-family: 'Inter', sans-serif;
138
  }
139
  .markdown-container {
140
- margin-top: 20px;
141
- padding: 16px;
142
  background: #1a1a1a;
143
- border-radius: 14px;
144
- box-shadow: 0 2px 6px #000;
145
  font-size: 16px;
 
146
  }
147
  .input-bar {
148
  position: fixed;
149
  bottom: 16px;
150
  left: 0;
151
  right: 0;
 
152
  margin: auto;
153
- max-width: 800px;
154
- background: #1c1c1c;
155
  display: flex;
156
- gap: 8px;
157
- padding: 14px;
158
- border-radius: 14px;
 
159
  }
160
  #user_input {
161
- flex-grow: 1;
162
  padding: 12px;
163
  font-size: 16px;
164
- border-radius: 10px;
165
- border: none;
166
  background: #292929;
 
167
  color: white;
 
168
  }
169
- .mic-btn, .send-btn {
170
  background: #4f46e5;
 
171
  border: none;
172
  border-radius: 10px;
173
- color: white;
174
  font-size: 18px;
175
- padding: 10px 16px;
176
  }
177
- .voice-panel {
178
  background: #222;
179
- padding: 12px;
180
  margin-top: 20px;
181
  border-radius: 12px;
182
  display: flex;
@@ -189,29 +184,25 @@ body {
189
  voice_visible = gr.State(False)
190
 
191
  gr.Markdown("<h1 style='text-align:center;'>How can I help you today?</h1>")
192
- output = gr.Markdown(elem_classes="markdown-container")
193
 
194
  with gr.Row(elem_classes="input-bar"):
195
- user_input = gr.Textbox(placeholder="Ask a question...", elem_id="user_input", show_label=False)
196
- send_btn = gr.Button("➀", elem_id="send-btn")
197
- mic_toggle = gr.Button("πŸŽ™", elem_id="mic-btn")
198
-
199
- with gr.Column(visible=False) as voice_area:
200
- mic_audio = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
201
- mic_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
202
- mic_send = gr.Button("Send Voice")
203
- mic_clear = gr.Button("Clear Transcript")
204
-
205
- # Bindings
206
- send_btn.click(fn=handle_chat, inputs=[user_input, thread_state], outputs=[output, thread_state])
207
-
208
- mic_toggle.click(fn=lambda v: not v, inputs=voice_visible, outputs=voice_visible)
209
- voice_visible.change(fn=None, inputs=voice_visible, outputs=voice_area, show_progress=False)
210
-
211
- mic_audio.stream(fn=send_audio, inputs=[mic_audio, client_id], outputs=mic_transcript, stream_every=0.5)
212
- mic_send.click(fn=feed_transcript, inputs=[mic_transcript, thread_state, client_id], outputs=[output, thread_state])
213
- mic_clear.click(fn=clear_transcript, inputs=[client_id], outputs=mic_transcript)
214
-
215
  app.load(fn=create_ws, outputs=[client_id])
216
 
217
  app.launch()
 
1
+ # Final version with Perplexity-style UI, voice toggle, rich formatting, full-width input and working mic
2
  import gradio as gr
3
  import os, time, re, json, base64, asyncio, threading, uuid, io
4
  import numpy as np
 
18
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
19
  connections = {}
20
 
 
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
  self.uri = uri
 
30
 
31
  async def connect(self):
32
  try:
33
+ self.websocket = await connect(self.uri, extra_headers=self.headers)
34
  with open("openai_transcription_settings.json", "r") as f:
35
  await self.websocket.send(f.read())
36
  await asyncio.gather(self.receive_messages(), self.send_audio_chunks())
37
  except Exception as e:
38
+ print(f"WebSocket failed: {e}")
39
 
40
  def run(self):
41
  asyncio.set_event_loop(self.loop)
 
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
 
73
  def create_ws():
74
  cid = str(uuid.uuid4())
75
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
90
  return ""
91
 
92
  def format_response(content, prompt):
93
+ md = f"""### ❓ {prompt}\n\n**🧠 In summary:**\n\n{content}\n"""
94
+ images = re.findall(r'https://raw\.githubusercontent\.com/[^\s)]+\.png', content)
95
+ if images:
96
+ md += "\n\n**πŸ“Ž Sources:**\n" + "\n".join([f"![]({url})" for url in images])
97
+ return md
 
 
98
 
99
  def handle_chat(prompt, thread_id):
100
  if not OPENAI_API_KEY or not ASSISTANT_ID:
101
+ return "❌ Missing credentials", thread_id
102
  try:
103
  if thread_id is None:
104
  thread = client.beta.threads.create()
 
114
  for msg in reversed(msgs.data):
115
  if msg.role == "assistant":
116
  return format_response(msg.content[0].text.value, prompt), thread_id
117
+ return "⚠️ No reply", thread_id
118
  except Exception as e:
119
  return f"❌ {e}", thread_id
120
 
 
125
  connections[cid].transcript = ""
126
  return handle_chat(transcript, thread_id)
127
 
 
128
  with gr.Blocks(css="""
129
  body {
130
+ background-color: #0f0f0f;
131
  color: white;
132
  font-family: 'Inter', sans-serif;
133
  }
134
  .markdown-container {
135
+ margin-top: 16px;
136
+ padding: 18px;
137
  background: #1a1a1a;
138
+ border-radius: 12px;
 
139
  font-size: 16px;
140
+ box-shadow: 0 2px 6px #000;
141
  }
142
  .input-bar {
143
  position: fixed;
144
  bottom: 16px;
145
  left: 0;
146
  right: 0;
147
+ max-width: 1000px;
148
  margin: auto;
 
 
149
  display: flex;
150
+ padding: 12px;
151
+ gap: 10px;
152
+ background: #1f1f1f;
153
+ border-radius: 16px;
154
  }
155
  #user_input {
156
+ flex: 1;
157
  padding: 12px;
158
  font-size: 16px;
 
 
159
  background: #292929;
160
+ border: none;
161
  color: white;
162
+ border-radius: 10px;
163
  }
164
+ .btn {
165
  background: #4f46e5;
166
+ color: white;
167
  border: none;
168
  border-radius: 10px;
169
+ padding: 10px 14px;
170
  font-size: 18px;
 
171
  }
172
+ .voice-area {
173
  background: #222;
174
+ padding: 14px;
175
  margin-top: 20px;
176
  border-radius: 12px;
177
  display: flex;
 
184
  voice_visible = gr.State(False)
185
 
186
  gr.Markdown("<h1 style='text-align:center;'>How can I help you today?</h1>")
187
+ response = gr.Markdown(elem_classes="markdown-container")
188
 
189
  with gr.Row(elem_classes="input-bar"):
190
+ prompt = gr.Textbox(placeholder="Ask a question...", elem_id="user_input", show_label=False)
191
+ send = gr.Button("➀", elem_id="send", elem_classes="btn")
192
+ mic = gr.Button("πŸŽ™", elem_id="mic", elem_classes="btn")
193
+
194
+ with gr.Column(visible=False, elem_classes="voice-area") as voice_box:
195
+ voice = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
196
+ transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
197
+ send_voice = gr.Button("Send Voice")
198
+ clear = gr.Button("Clear Transcript")
199
+
200
+ send.click(handle_chat, [prompt, thread_state], [response, thread_state])
201
+ mic.click(lambda x: not x, voice_visible, voice_visible)
202
+ voice_visible.change(fn=None, inputs=voice_visible, outputs=voice_box, show_progress=False)
203
+ voice.stream(send_audio, [voice, client_id], transcript, stream_every=0.5)
204
+ send_voice.click(feed_transcript, [transcript, thread_state, client_id], [response, thread_state])
205
+ clear.click(clear_transcript, [client_id], transcript)
 
 
 
 
206
  app.load(fn=create_ws, outputs=[client_id])
207
 
208
  app.launch()