shukdevdatta123 commited on
Commit
d797632
·
verified ·
1 Parent(s): 734a16d

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -549
app.py DELETED
@@ -1,549 +0,0 @@
1
- import base64
2
- import tempfile
3
- import os
4
- import requests
5
- import gradio as gr
6
- import random
7
- import time
8
- from openai import OpenAI
9
- from requests.exceptions import RequestException, Timeout, ConnectionError
10
-
11
- # Available voices for audio generation
12
- VOICES = ["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
13
-
14
- # Example audio URLs
15
- EXAMPLE_AUDIO_URLS = [
16
- "https://cdn.openai.com/API/docs/audio/alloy.wav",
17
- "https://cdn.openai.com/API/docs/audio/ash.wav",
18
- "https://cdn.openai.com/API/docs/audio/coral.wav",
19
- "https://cdn.openai.com/API/docs/audio/echo.wav",
20
- "https://cdn.openai.com/API/docs/audio/fable.wav",
21
- "https://cdn.openai.com/API/docs/audio/onyx.wav",
22
- "https://cdn.openai.com/API/docs/audio/nova.wav",
23
- "https://cdn.openai.com/API/docs/audio/sage.wav",
24
- "https://cdn.openai.com/API/docs/audio/shimmer.wav"
25
- ]
26
-
27
- # Supported languages for translation
28
- SUPPORTED_LANGUAGES = [
29
- "Afrikaans", "Arabic", "Armenian", "Azerbaijani", "Belarusian", "Bosnian",
30
- "Bulgarian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch",
31
- "English", "Estonian", "Finnish", "French", "Galician", "German", "Greek",
32
- "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese",
33
- "Kannada", "Kazakh", "Korean", "Latvian", "Lithuanian", "Macedonian", "Malay",
34
- "Marathi", "Maori", "Nepali", "Norwegian", "Persian", "Polish", "Portuguese",
35
- "Romanian", "Russian", "Serbian", "Slovak", "Slovenian", "Spanish", "Swahili",
36
- "Swedish", "Tagalog", "Tamil", "Thai", "Turkish", "Ukrainian", "Urdu",
37
- "Vietnamese", "Welsh"
38
- ]
39
-
40
- # Max retries for API calls
41
- MAX_RETRIES = 3
42
- RETRY_DELAY = 2 # seconds
43
-
44
- def create_openai_client(api_key):
45
- """Create an OpenAI client with proper timeout settings"""
46
- return OpenAI(
47
- api_key=api_key,
48
- timeout=60.0, # 60 second timeout
49
- max_retries=3 # Allow 3 retries
50
- )
51
-
52
- def process_text_input(api_key, text_prompt, selected_voice):
53
- """Generate audio response from text input"""
54
- try:
55
- # Initialize OpenAI client with the provided API key
56
- client = create_openai_client(api_key)
57
-
58
- completion = client.chat.completions.create(
59
- model="gpt-4o-audio-preview",
60
- modalities=["text", "audio"],
61
- audio={"voice": selected_voice, "format": "wav"},
62
- messages=[
63
- {
64
- "role": "user",
65
- "content": text_prompt
66
- }
67
- ]
68
- )
69
-
70
- # Save the audio to a temporary file
71
- wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
72
- temp_path = tempfile.mktemp(suffix=".wav")
73
- with open(temp_path, "wb") as f:
74
- f.write(wav_bytes)
75
-
76
- # Get the text response directly from the API
77
- text_response = completion.choices[0].message.content
78
-
79
- return text_response, temp_path
80
- except ConnectionError as e:
81
- return f"Connection error: {str(e)}. Please check your internet connection and try again.", None
82
- except Timeout as e:
83
- return f"Timeout error: {str(e)}. The request took too long to complete. Please try again.", None
84
- except Exception as e:
85
- return f"Error: {str(e)}", None
86
-
87
- def process_audio_input(api_key, audio_path, text_prompt, selected_voice):
88
- """Process audio input and generate a response"""
89
- try:
90
- if not audio_path:
91
- return "Please upload or record audio first.", None
92
-
93
- # Initialize OpenAI client with the provided API key
94
- client = create_openai_client(api_key)
95
-
96
- # Read audio file and encode to base64
97
- with open(audio_path, "rb") as audio_file:
98
- audio_data = audio_file.read()
99
- encoded_audio = base64.b64encode(audio_data).decode('utf-8')
100
-
101
- # Create message content with both text and audio
102
- message_content = []
103
-
104
- if text_prompt:
105
- message_content.append({
106
- "type": "text",
107
- "text": text_prompt
108
- })
109
-
110
- message_content.append({
111
- "type": "input_audio",
112
- "input_audio": {
113
- "data": encoded_audio,
114
- "format": "wav"
115
- }
116
- })
117
-
118
- # Call OpenAI API
119
- completion = client.chat.completions.create(
120
- model="gpt-4o-audio-preview",
121
- modalities=["text", "audio"],
122
- audio={"voice": selected_voice, "format": "wav"},
123
- messages=[
124
- {
125
- "role": "user",
126
- "content": message_content
127
- }
128
- ]
129
- )
130
-
131
- # Save the audio response
132
- wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
133
- temp_path = tempfile.mktemp(suffix=".wav")
134
- with open(temp_path, "wb") as f:
135
- f.write(wav_bytes)
136
-
137
- # Get the text response
138
- text_response = completion.choices[0].message.content
139
-
140
- return text_response, temp_path
141
- except ConnectionError as e:
142
- return f"Connection error: {str(e)}. Please check your internet connection and try again.", None
143
- except Timeout as e:
144
- return f"Timeout error: {str(e)}. The request took too long to complete. Please try again.", None
145
- except Exception as e:
146
- return f"Error: {str(e)}", None
147
-
148
- def transcribe_audio(api_key, audio_path):
149
- """Transcribe an audio file using OpenAI's API"""
150
- try:
151
- if not audio_path:
152
- return "No audio file provided for transcription."
153
-
154
- client = create_openai_client(api_key)
155
-
156
- # Make sure the file exists and is readable
157
- if not os.path.exists(audio_path):
158
- return "Audio file not found or inaccessible."
159
-
160
- # Check file size
161
- file_size = os.path.getsize(audio_path)
162
- if file_size == 0:
163
- return "Audio file is empty."
164
-
165
- with open(audio_path, "rb") as audio_file:
166
- for attempt in range(MAX_RETRIES):
167
- try:
168
- transcription = client.audio.transcriptions.create(
169
- model="gpt-4o-transcribe",
170
- file=audio_file
171
- )
172
- return transcription.text
173
- except (ConnectionError, Timeout) as e:
174
- if attempt < MAX_RETRIES - 1:
175
- time.sleep(RETRY_DELAY)
176
- # Reset file pointer
177
- audio_file.seek(0)
178
- continue
179
- else:
180
- return f"Transcription failed after {MAX_RETRIES} attempts: {str(e)}"
181
- except Exception as e:
182
- return f"Transcription error: {str(e)}"
183
-
184
- except Exception as e:
185
- return f"Transcription error: {str(e)}"
186
-
187
- def translate_audio(api_key, audio_path):
188
- """Translate audio to English using OpenAI's Whisper model with improved error handling"""
189
- try:
190
- if not audio_path:
191
- return "No audio file provided for translation."
192
-
193
- # Verify file exists and is accessible
194
- if not os.path.exists(audio_path):
195
- return "Audio file not found or inaccessible."
196
-
197
- # Check file size
198
- file_size = os.path.getsize(audio_path)
199
- if file_size == 0:
200
- return "Audio file is empty."
201
-
202
- client = create_openai_client(api_key)
203
-
204
- # Implement retry mechanism
205
- for attempt in range(MAX_RETRIES):
206
- try:
207
- with open(audio_path, "rb") as audio_file:
208
- translation = client.audio.translations.create(
209
- model="whisper-1",
210
- file=audio_file,
211
- timeout=90.0 # Extended timeout for translation
212
- )
213
- return translation.text
214
- except (ConnectionError, Timeout) as e:
215
- if attempt < MAX_RETRIES - 1:
216
- # Wait before retrying
217
- time.sleep(RETRY_DELAY * (attempt + 1)) # Exponential backoff
218
- continue
219
- else:
220
- return f"Translation failed after {MAX_RETRIES} attempts: Connection error. Please check your internet connection and try again."
221
- except Exception as e:
222
- # Handle other exceptions
223
- error_message = str(e)
224
- if "connection" in error_message.lower():
225
- return f"Connection error: {error_message}. Please check your internet connection and try again."
226
- else:
227
- return f"Translation error: {error_message}"
228
-
229
- except Exception as e:
230
- return f"Translation error: {str(e)}"
231
-
232
- def download_example_audio():
233
- """Download a random example audio file for testing with improved error handling"""
234
- try:
235
- # Randomly select one of the example audio URLs
236
- url = random.choice(EXAMPLE_AUDIO_URLS)
237
-
238
- # Get the voice name from the URL for feedback
239
- voice_name = url.split('/')[-1].split('.')[0]
240
-
241
- # Implement retry mechanism
242
- for attempt in range(MAX_RETRIES):
243
- try:
244
- response = requests.get(url, timeout=30)
245
- response.raise_for_status()
246
-
247
- # Save to a temporary file
248
- temp_path = tempfile.mktemp(suffix=".wav")
249
- with open(temp_path, "wb") as f:
250
- f.write(response.content)
251
-
252
- return temp_path, f"Loaded example voice: {voice_name}"
253
- except (ConnectionError, Timeout) as e:
254
- if attempt < MAX_RETRIES - 1:
255
- time.sleep(RETRY_DELAY)
256
- continue
257
- else:
258
- return None, f"Failed to download example after {MAX_RETRIES} attempts: {str(e)}"
259
- except Exception as e:
260
- return None, f"Error loading example: {str(e)}"
261
-
262
- except Exception as e:
263
- return None, f"Error loading example: {str(e)}"
264
-
265
- def use_example_audio():
266
- """Load random example audio for the interface"""
267
- audio_path, message = download_example_audio()
268
- return audio_path, message
269
-
270
- def check_api_key(api_key):
271
- """Validate if the API key is provided"""
272
- if not api_key or api_key.strip() == "":
273
- return False
274
- return True
275
-
276
- # Create Gradio Interface
277
- with gr.Blocks(title="OpenAI Audio Chat App", theme=gr.themes.Ocean()) as app:
278
- gr.Markdown("# OpenAI Audio Chat App")
279
- gr.Markdown("Interact with GPT-4o audio model through text and audio inputs")
280
-
281
- # API Key input (used across all tabs)
282
- api_key = gr.Textbox(
283
- label="OpenAI API Key",
284
- placeholder="Enter your OpenAI API key here",
285
- type="password"
286
- )
287
-
288
- with gr.Tab("Text to Audio"):
289
- with gr.Row():
290
- with gr.Column():
291
- text_input = gr.Textbox(
292
- label="Text Prompt",
293
- placeholder="Enter your question or prompt here...",
294
- lines=3
295
- )
296
- text_voice = gr.Dropdown(
297
- choices=VOICES,
298
- value="alloy",
299
- label="Voice"
300
- )
301
- text_submit = gr.Button("Generate Response")
302
-
303
- with gr.Column():
304
- text_output = gr.Textbox(label="AI Response (Checks Error)", lines=5)
305
- audio_output = gr.Audio(label="AI Response (Audio)")
306
- transcribed_output = gr.Textbox(label="Transcription of Audio Response", lines=3)
307
-
308
- # Function to process text input and then transcribe the resulting audio
309
- def text_input_with_transcription(api_key, text_prompt, voice):
310
- if not check_api_key(api_key):
311
- return "Please enter your OpenAI API key first.", None, "No API key provided."
312
-
313
- text_response, audio_path = process_text_input(api_key, text_prompt, voice)
314
-
315
- # Get transcription of the generated audio
316
- if audio_path:
317
- transcription = transcribe_audio(api_key, audio_path)
318
- else:
319
- transcription = "No audio generated to transcribe."
320
-
321
- return text_response, audio_path, transcription
322
-
323
- text_submit.click(
324
- fn=text_input_with_transcription,
325
- inputs=[api_key, text_input, text_voice],
326
- outputs=[text_output, audio_output, transcribed_output]
327
- )
328
-
329
- with gr.Tab("Audio Input to Audio Response"):
330
- with gr.Row():
331
- with gr.Column():
332
- audio_input = gr.Audio(
333
- label="Audio Input",
334
- type="filepath",
335
- sources=["microphone", "upload"]
336
- )
337
- example_btn = gr.Button("Use Random Example Audio")
338
- example_message = gr.Textbox(label="Example Status", interactive=False)
339
-
340
- accompanying_text = gr.Textbox(
341
- label="Accompanying Text (Optional)",
342
- placeholder="Add any text context or question about the audio...",
343
- lines=2
344
- )
345
- audio_voice = gr.Dropdown(
346
- choices=VOICES,
347
- value="alloy",
348
- label="Response Voice"
349
- )
350
- audio_submit = gr.Button("Process Audio & Generate Response")
351
-
352
- with gr.Column():
353
- audio_text_output = gr.Textbox(label="AI Response (Checks Error)", lines=5)
354
- audio_audio_output = gr.Audio(label="AI Response (Audio)")
355
- audio_transcribed_output = gr.Textbox(label="Transcription of Audio Response", lines=3)
356
- input_transcription = gr.Textbox(label="Transcription of Input Audio", lines=3)
357
-
358
- # Function to process audio input, generate response, and provide transcriptions
359
- def audio_input_with_transcription(api_key, audio_path, text_prompt, voice):
360
- if not check_api_key(api_key):
361
- return "Please enter your OpenAI API key first.", None, "No API key provided.", "No API key provided."
362
-
363
- # First transcribe the input audio
364
- input_transcription = "N/A"
365
- if audio_path:
366
- input_transcription = transcribe_audio(api_key, audio_path)
367
- else:
368
- return "Please upload or record audio first.", None, "No audio to transcribe.", "No audio provided."
369
-
370
- # Process the audio input and get response
371
- text_response, response_audio_path = process_audio_input(api_key, audio_path, text_prompt, voice)
372
-
373
- # Transcribe the response audio
374
- response_transcription = "No audio generated to transcribe."
375
- if response_audio_path:
376
- response_transcription = transcribe_audio(api_key, response_audio_path)
377
-
378
- return text_response, response_audio_path, response_transcription, input_transcription
379
-
380
- audio_submit.click(
381
- fn=audio_input_with_transcription,
382
- inputs=[api_key, audio_input, accompanying_text, audio_voice],
383
- outputs=[audio_text_output, audio_audio_output, audio_transcribed_output, input_transcription]
384
- )
385
-
386
- example_btn.click(
387
- fn=use_example_audio,
388
- inputs=[],
389
- outputs=[audio_input, example_message]
390
- )
391
-
392
- with gr.Tab("Voice Samples"):
393
- gr.Markdown("## Listen to samples of each voice")
394
-
395
- def generate_voice_sample(api_key, voice_type):
396
- if not check_api_key(api_key):
397
- return "Please enter your OpenAI API key first.", None, "No API key provided."
398
-
399
- try:
400
- client = create_openai_client(api_key)
401
-
402
- # Use retry mechanism
403
- for attempt in range(MAX_RETRIES):
404
- try:
405
- completion = client.chat.completions.create(
406
- model="gpt-4o-audio-preview",
407
- modalities=["text", "audio"],
408
- audio={"voice": voice_type, "format": "wav"},
409
- messages=[
410
- {
411
- "role": "user",
412
- "content": f"This is a sample of the {voice_type} voice. It has its own unique tone and character."
413
- }
414
- ]
415
- )
416
-
417
- # Save the audio to a temporary file
418
- wav_bytes = base64.b64decode(completion.choices[0].message.audio.data)
419
- temp_path = tempfile.mktemp(suffix=".wav")
420
- with open(temp_path, "wb") as f:
421
- f.write(wav_bytes)
422
-
423
- # Get transcription
424
- transcription = transcribe_audio(api_key, temp_path)
425
-
426
- return f"Sample generated with voice: {voice_type}", temp_path, transcription
427
- except (ConnectionError, Timeout) as e:
428
- if attempt < MAX_RETRIES - 1:
429
- time.sleep(RETRY_DELAY)
430
- continue
431
- else:
432
- return f"Connection error after {MAX_RETRIES} attempts: {str(e)}. Please check your internet connection.", None, "No sample generated."
433
- except Exception as e:
434
- return f"Error: {str(e)}", None, "No transcription available."
435
-
436
- with gr.Row():
437
- sample_voice = gr.Dropdown(
438
- choices=VOICES,
439
- value="alloy",
440
- label="Select Voice Sample"
441
- )
442
- sample_btn = gr.Button("Generate Sample")
443
-
444
- with gr.Row():
445
- sample_text = gr.Textbox(label="Status")
446
- sample_audio = gr.Audio(label="Voice Sample")
447
- sample_transcription = gr.Textbox(label="Transcription", lines=3)
448
-
449
- sample_btn.click(
450
- fn=generate_voice_sample,
451
- inputs=[api_key, sample_voice],
452
- outputs=[sample_text, sample_audio, sample_transcription]
453
- )
454
-
455
- # New tab for audio translation with improved error handling
456
- with gr.Tab("Audio Translation"):
457
- gr.Markdown("## Translate audio from other languages to English")
458
- gr.Markdown("Supports 50+ languages including: Arabic, Chinese, French, German, Japanese, Spanish, and many more.")
459
-
460
- with gr.Row():
461
- with gr.Column():
462
- translation_audio_input = gr.Audio(
463
- label="Audio to Translate",
464
- type="filepath",
465
- sources=["microphone", "upload"]
466
- )
467
-
468
- translate_btn = gr.Button("Translate to English")
469
- connection_status = gr.Textbox(label="Connection Status", value="Ready", interactive=False)
470
-
471
- with gr.Column():
472
- translation_output = gr.Textbox(label="English Translation", lines=5)
473
- original_transcription = gr.Textbox(label="Original Transcription (if available)", lines=5)
474
-
475
- def translate_audio_input(api_key, audio_path):
476
- """Handle the translation of uploaded audio with better connection handling"""
477
- if not check_api_key(api_key):
478
- return "Please enter your OpenAI API key first.", "No API key provided.", "No API key provided."
479
-
480
- try:
481
- if not audio_path:
482
- return "Please upload or record audio first.", "No audio to translate.", "Connection ready"
483
-
484
- # Update connection status
485
- yield "Processing...", "Preparing audio for translation...", "Connecting to OpenAI API..."
486
-
487
- # Get the translation
488
- translation = translate_audio(api_key, audio_path)
489
-
490
- # If there's a connection error message in the translation
491
- if "connection error" in translation.lower():
492
- yield translation, "Translation failed due to connection issues.", "Connection failed"
493
- return
494
-
495
- # Try to get original transcription (this might be in the original language)
496
- try:
497
- original = transcribe_audio(api_key, audio_path)
498
- if "error" in original.lower():
499
- original = "Could not transcribe original audio due to connection issues."
500
- except Exception:
501
- original = "Could not transcribe original audio."
502
-
503
- yield translation, original, "Connection successful"
504
- except ConnectionError as e:
505
- yield f"Connection error: {str(e)}. Please check your internet connection and try again.", "Translation failed.", "Connection failed"
506
- except Timeout as e:
507
- yield f"Timeout error: {str(e)}. The request took too long to complete. Please try again.", "Translation timed out.", "Connection timed out"
508
- except Exception as e:
509
- yield f"Translation error: {str(e)}", "Error occurred during processing.", "Error occurred"
510
-
511
- translate_btn.click(
512
- fn=translate_audio_input,
513
- inputs=[api_key, translation_audio_input],
514
- outputs=[translation_output, original_transcription, connection_status]
515
- )
516
-
517
- # Show supported languages
518
- with gr.Accordion("Supported Languages", open=False):
519
- gr.Markdown(", ".join(SUPPORTED_LANGUAGES))
520
-
521
- # Connection troubleshooting tips
522
- with gr.Accordion("Connection Troubleshooting", open=False):
523
- gr.Markdown("""
524
- ### If you experience connection errors:
525
-
526
- 1. **Check your internet connection** - Ensure you have a stable internet connection
527
- 2. **Verify your API key** - Make sure your OpenAI API key is valid and has sufficient credits
528
- 3. **Try a smaller audio file** - Large audio files may time out during upload
529
- 4. **Wait and retry** - OpenAI servers might be experiencing high traffic
530
- 5. **Check file format** - Make sure your audio file is in a supported format (MP3, WAV, etc.)
531
- 6. **Try on a different network** - Some networks might block API calls to OpenAI
532
-
533
- The app will automatically retry failed connections up to 3 times.
534
- """)
535
-
536
- gr.Markdown("""
537
- ## Notes:
538
- - You must provide your OpenAI API key in the field above
539
- - The model used is `gpt-4o-audio-preview` for conversation, `gpt-4o-transcribe` for transcriptions, and `whisper-1` for translations
540
- - Audio inputs should be in WAV format for chat and any supported format for translation
541
- - Available voices: alloy, ash, ballad, coral, echo, fable, onyx, nova, sage, shimmer, and verse
542
- - Each audio response is automatically transcribed for verification
543
- - The "Use Random Example Audio" button will load a random sample from OpenAI's demo voices
544
- - The translation feature supports 50+ languages, translating them to English
545
- - If you experience connection errors, the app will automatically retry up to 3 times
546
- """)
547
-
548
- if __name__ == "__main__":
549
- app.launch()