IAMTFRMZA commited on
Commit
36cbe82
·
verified ·
1 Parent(s): f736f6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -76
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import os, time, re, json, base64, asyncio, threading, uuid, io
3
  import numpy as np
@@ -17,7 +18,7 @@ HEADERS = {"Authorization": f"Bearer {OPENAI_API_KEY}", "OpenAI-Beta": "realtime
17
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
18
  connections = {}
19
 
20
- # WebSocket Client
21
  class WebSocketClient:
22
  def __init__(self, uri, headers, client_id):
23
  self.uri = uri
@@ -70,6 +71,7 @@ class WebSocketClient:
70
  if data["type"] == "conversation.item.input_audio_transcription.delta":
71
  self.transcript += data["delta"]
72
 
 
73
  def create_ws():
74
  cid = str(uuid.uuid4())
75
  client = WebSocketClient(WS_URI, HEADERS, cid)
@@ -90,18 +92,13 @@ def clear_transcript(cid):
90
  return ""
91
 
92
  def format_response(content, prompt):
93
- summary = f"""<div class="card">
94
- <h3>❓ {prompt}</h3>
95
- <p><b>🧠 In summary:</b></p>
96
- <p>{content}</p>"""
97
- thumbnails = re.findall(r'https://raw\.githubusercontent\.com/[^\s)]+\.png', content)
98
- if thumbnails:
99
- summary += "<h4>📎 Sources:</h4><div class='thumb-grid'>"
100
- for url in thumbnails:
101
- summary += f"<img src='{url}' class='thumb' />"
102
- summary += "</div>"
103
- summary += "</div>"
104
- return summary
105
 
106
  def handle_chat(prompt, thread_id):
107
  if not OPENAI_API_KEY or not ASSISTANT_ID:
@@ -132,108 +129,88 @@ def feed_transcript(transcript, thread_id, cid):
132
  connections[cid].transcript = ""
133
  return handle_chat(transcript, thread_id)
134
 
135
- # Gradio App
136
  with gr.Blocks(css="""
137
  body {
138
- background-color: #0f0f0f;
139
  color: white;
140
  font-family: 'Inter', sans-serif;
141
  }
142
- .card {
 
 
143
  background: #1a1a1a;
144
- padding: 20px;
145
- margin-top: 24px;
146
  border-radius: 14px;
147
- box-shadow: 0 2px 8px #000;
148
- }
149
- .thumb-grid {
150
- display: flex;
151
- gap: 10px;
152
- flex-wrap: wrap;
153
- margin-top: 12px;
154
- }
155
- .thumb {
156
- width: 120px;
157
- border-radius: 8px;
158
- border: 1px solid #333;
159
  }
160
- .input-box {
161
  position: fixed;
162
  bottom: 16px;
163
  left: 0;
164
  right: 0;
165
- max-width: 700px;
166
  margin: auto;
 
 
167
  display: flex;
168
  gap: 8px;
169
- background: #1f1f1f;
170
  padding: 14px;
171
- border-radius: 16px;
172
- justify-content: space-between;
173
  }
174
- #main-input {
175
  flex-grow: 1;
176
- background: #2a2a2a;
177
- border: none;
178
  padding: 12px;
179
- color: white;
180
  font-size: 16px;
181
- border-radius: 12px;
182
- }
183
- #send-btn, #mic-btn {
184
- background: #3f3fff;
185
  color: white;
 
 
 
186
  border: none;
187
- padding: 12px 16px;
 
 
 
 
 
 
 
 
188
  border-radius: 12px;
189
- font-size: 16px;
 
 
190
  }
191
  """) as app:
192
-
193
  thread_state = gr.State()
194
  client_id = gr.State()
195
  voice_visible = gr.State(False)
196
 
197
- gr.HTML("<h1 style='text-align:center; margin-top:40px;'>How can I help you today?</h1>")
198
- output_md = gr.HTML()
199
 
200
- with gr.Row(elem_classes="input-box"):
201
- user_input = gr.Textbox(elem_id="main-input", show_label=False, placeholder="Ask a question...")
202
  send_btn = gr.Button("➤", elem_id="send-btn")
203
  mic_toggle = gr.Button("🎙", elem_id="mic-btn")
204
 
205
  with gr.Column(visible=False) as voice_area:
206
- mic_audio = gr.Audio(label="Record", streaming=True, type="numpy")
207
  mic_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
208
  mic_send = gr.Button("Send Voice")
209
  mic_clear = gr.Button("Clear Transcript")
210
 
211
  # Bindings
212
- send_btn.click(fn=handle_chat,
213
- inputs=[user_input, thread_state],
214
- outputs=[output_md, thread_state])
215
-
216
- mic_toggle.click(fn=lambda v: not v,
217
- inputs=voice_visible,
218
- outputs=voice_visible)
219
-
220
- voice_visible.change(fn=None,
221
- inputs=voice_visible,
222
- outputs=voice_area,
223
- show_progress=False)
224
-
225
- mic_audio.stream(fn=send_audio,
226
- inputs=[mic_audio, client_id],
227
- outputs=mic_transcript,
228
- stream_every=0.5)
229
-
230
- mic_send.click(fn=feed_transcript,
231
- inputs=[mic_transcript, thread_state, client_id],
232
- outputs=[output_md, thread_state])
233
-
234
- mic_clear.click(fn=clear_transcript,
235
- inputs=[client_id],
236
- outputs=mic_transcript)
237
 
238
  app.load(fn=create_ws, outputs=[client_id])
239
 
 
1
+ # Final version with Perplexity-style UI, voice toggle, rich formatting, full-width input
2
  import gradio as gr
3
  import os, time, re, json, base64, asyncio, threading, uuid, io
4
  import numpy as np
 
18
  WS_URI = "wss://api.openai.com/v1/realtime?intent=transcription"
19
  connections = {}
20
 
21
+ # WebSocket Client for Voice
22
  class WebSocketClient:
23
  def __init__(self, uri, headers, client_id):
24
  self.uri = uri
 
71
  if data["type"] == "conversation.item.input_audio_transcription.delta":
72
  self.transcript += data["delta"]
73
 
74
+ # Chat Handlers
75
  def create_ws():
76
  cid = str(uuid.uuid4())
77
  client = WebSocketClient(WS_URI, HEADERS, cid)
 
92
  return ""
93
 
94
  def format_response(content, prompt):
95
+ content_md = f"""### ❓ {prompt}\n\n**🧠 In summary:**\n\n{content}\n"""
96
+ image_urls = re.findall(r'https://raw\\.githubusercontent\\.com/[^\s)]+\\.png', content)
97
+ if image_urls:
98
+ content_md += "\n\n**📎 Sources:**\n"
99
+ for url in image_urls:
100
+ content_md += f"![]({url})\n"
101
+ return content_md
 
 
 
 
 
102
 
103
  def handle_chat(prompt, thread_id):
104
  if not OPENAI_API_KEY or not ASSISTANT_ID:
 
129
  connections[cid].transcript = ""
130
  return handle_chat(transcript, thread_id)
131
 
132
+ # Gradio UI
133
  with gr.Blocks(css="""
134
  body {
135
+ background: #0f0f0f;
136
  color: white;
137
  font-family: 'Inter', sans-serif;
138
  }
139
+ .markdown-container {
140
+ margin-top: 20px;
141
+ padding: 16px;
142
  background: #1a1a1a;
 
 
143
  border-radius: 14px;
144
+ box-shadow: 0 2px 6px #000;
145
+ font-size: 16px;
 
 
 
 
 
 
 
 
 
 
146
  }
147
+ .input-bar {
148
  position: fixed;
149
  bottom: 16px;
150
  left: 0;
151
  right: 0;
 
152
  margin: auto;
153
+ max-width: 800px;
154
+ background: #1c1c1c;
155
  display: flex;
156
  gap: 8px;
 
157
  padding: 14px;
158
+ border-radius: 14px;
 
159
  }
160
+ #user_input {
161
  flex-grow: 1;
 
 
162
  padding: 12px;
 
163
  font-size: 16px;
164
+ border-radius: 10px;
165
+ border: none;
166
+ background: #292929;
 
167
  color: white;
168
+ }
169
+ .mic-btn, .send-btn {
170
+ background: #4f46e5;
171
  border: none;
172
+ border-radius: 10px;
173
+ color: white;
174
+ font-size: 18px;
175
+ padding: 10px 16px;
176
+ }
177
+ .voice-panel {
178
+ background: #222;
179
+ padding: 12px;
180
+ margin-top: 20px;
181
  border-radius: 12px;
182
+ display: flex;
183
+ flex-direction: column;
184
+ gap: 12px;
185
  }
186
  """) as app:
 
187
  thread_state = gr.State()
188
  client_id = gr.State()
189
  voice_visible = gr.State(False)
190
 
191
+ gr.Markdown("<h1 style='text-align:center;'>How can I help you today?</h1>")
192
+ output = gr.Markdown(elem_classes="markdown-container")
193
 
194
+ with gr.Row(elem_classes="input-bar"):
195
+ user_input = gr.Textbox(placeholder="Ask a question...", elem_id="user_input", show_label=False)
196
  send_btn = gr.Button("➤", elem_id="send-btn")
197
  mic_toggle = gr.Button("🎙", elem_id="mic-btn")
198
 
199
  with gr.Column(visible=False) as voice_area:
200
+ mic_audio = gr.Audio(label="Tap to Speak", streaming=True, type="numpy")
201
  mic_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
202
  mic_send = gr.Button("Send Voice")
203
  mic_clear = gr.Button("Clear Transcript")
204
 
205
  # Bindings
206
+ send_btn.click(fn=handle_chat, inputs=[user_input, thread_state], outputs=[output, thread_state])
207
+
208
+ mic_toggle.click(fn=lambda v: not v, inputs=voice_visible, outputs=voice_visible)
209
+ voice_visible.change(fn=None, inputs=voice_visible, outputs=voice_area, show_progress=False)
210
+
211
+ mic_audio.stream(fn=send_audio, inputs=[mic_audio, client_id], outputs=mic_transcript, stream_every=0.5)
212
+ mic_send.click(fn=feed_transcript, inputs=[mic_transcript, thread_state, client_id], outputs=[output, thread_state])
213
+ mic_clear.click(fn=clear_transcript, inputs=[client_id], outputs=mic_transcript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  app.load(fn=create_ws, outputs=[client_id])
216