muhtasham commited on
Commit
2e5323f
·
1 Parent(s): 020bd94
Files changed (1) hide show
  1. app.py +154 -51
app.py CHANGED
@@ -127,70 +127,153 @@ def download_youtube_audio(url):
127
  Raises:
128
  gr.Error: If download fails or API key is not set
129
  """
 
 
130
  if not SIEVE_API_KEY:
 
131
  raise gr.Error("SIEVE_API_KEY environment variable is not set")
132
-
133
  try:
134
  # Create a temporary file for the audio
135
  temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
136
  temp_file.close()
137
  output_path = temp_file.name
 
138
 
139
- # Prepare the request to Sieve API
140
  payload = {
141
  "function": "sieve/youtube-downloader",
142
  "inputs": {
143
  "url": url,
144
- "download_type": "audio",
145
- "audio_format": "mp3",
 
 
 
146
  "include_metadata": False,
147
- "include_subtitles": False
 
 
 
 
148
  }
149
  }
 
150
 
151
- # Send request to Sieve API
152
- response = requests.post(
153
- f"{SIEVE_API_URL}/push",
154
- headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
155
- json=payload
156
- )
157
- response.raise_for_status()
158
- job_id = response.json().get("id")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- if not job_id:
161
- raise gr.Error("Failed to get job ID from Sieve API")
162
-
163
- # Poll for job completion
164
  while True:
165
- job_response = requests.get(
166
- f"{SIEVE_API_URL}/jobs/{job_id}",
167
- headers={"X-API-Key": SIEVE_API_KEY}
168
- )
169
- job_response.raise_for_status()
170
- job_data = job_response.json()
171
 
172
- if job_data.get("status") == "completed":
173
- # Download the audio file
174
- audio_url = job_data.get("output_0", {}).get("url")
175
- if not audio_url:
176
- raise gr.Error("No audio URL in job response")
177
-
178
- audio_response = requests.get(audio_url)
179
- audio_response.raise_for_status()
 
 
 
 
180
 
181
- with open(output_path, "wb") as f:
182
- f.write(audio_response.content)
 
 
 
 
 
183
 
184
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- elif job_data.get("status") == "failed":
187
- raise gr.Error(f"Job failed: {job_data.get('error', 'Unknown error')}")
188
 
189
- # Wait before polling again
190
- time.sleep(2)
 
 
 
191
 
 
 
 
192
  except Exception as e:
193
- logger.exception(f"Error downloading YouTube audio: {str(e)}")
194
  raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
195
 
196
  def transcribe_youtube(url, return_timestamps, generate_subs):
@@ -204,16 +287,25 @@ def transcribe_youtube(url, return_timestamps, generate_subs):
204
  Returns:
205
  tuple: (formatted_result, srt_file, correction_text)
206
  """
 
 
 
207
  try:
208
  # Download audio from YouTube
 
209
  audio_path = download_youtube_audio(url)
 
210
 
211
  # Transcribe the downloaded audio
 
212
  result = transcribe(audio_path, return_timestamps, generate_subs)
 
213
 
214
  # Clean up the temporary file
 
215
  try:
216
  os.unlink(audio_path)
 
217
  except Exception as e:
218
  logger.warning(f"Failed to delete temporary file: {str(e)}")
219
 
@@ -240,32 +332,39 @@ def transcribe(inputs, return_timestamps, generate_subs):
240
  Raises:
241
  gr.Error: If no audio file is provided or transcription fails.
242
  """
 
 
 
243
  if inputs is None:
244
  logger.warning("No audio file submitted")
245
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
246
 
247
  try:
248
- logger.info(f"Processing audio file: {inputs}")
249
-
250
  # Read the audio file
 
251
  with open(inputs, "rb") as f:
252
  data = f.read()
 
 
253
 
254
  # Send request to API
 
255
  response = requests.post(API_URL, headers=headers, data=data)
256
- response.raise_for_status() # Raise an exception for bad status codes
257
  result = response.json()
258
-
259
  logger.debug(f"API response: {result}")
 
260
 
261
  # Format response as JSON
 
262
  formatted_result = {
263
  "text": result.get("text", "")
264
  }
 
265
 
266
  chunks = []
267
  if return_timestamps and "chunks" in result:
268
- logger.info(f"Processing {len(result['chunks'])} chunks")
269
  for i, chunk in enumerate(result["chunks"]):
270
  logger.debug(f"Processing chunk {i}: {chunk}")
271
  try:
@@ -286,17 +385,19 @@ def transcribe(inputs, return_timestamps, generate_subs):
286
  continue
287
 
288
  formatted_result["chunks"] = chunks
289
- logger.info(f"Successfully processed transcription with {len(chunks)} chunks")
290
 
291
  # Generate subtitles if requested
292
  srt_file = None
293
  if generate_subs and chunks:
294
- logger.info("Generating SRT subtitles")
295
  srt_content = generate_srt(chunks)
296
  srt_file = save_srt_to_file(srt_content)
297
- logger.info("SRT subtitles generated successfully")
298
 
 
299
  return formatted_result, srt_file, "" # Return empty string for correction textbox
 
300
  except requests.exceptions.RequestException as e:
301
  logger.exception(f"API request failed: {str(e)}")
302
  raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
@@ -323,7 +424,10 @@ youtube_transcribe = gr.Interface(
323
  description=(
324
  "Transcribe Tajik language audio from YouTube videos. "
325
  "Paste a YouTube URL and get accurate transcription with optional timestamps "
326
- "and subtitles."
 
 
 
327
  )
328
  )
329
 
@@ -365,11 +469,10 @@ file_transcribe = gr.Interface(
365
  )
366
  )
367
 
368
- # Then set up the demo with the interfaces
369
  with demo:
370
  gr.TabbedInterface(
371
- [youtube_transcribe, file_transcribe, mf_transcribe],
372
- ["YouTube", "Audio file", "Microphone"]
373
  )
374
 
375
  logger.info("Starting Gradio interface")
 
127
  Raises:
128
  gr.Error: If download fails or API key is not set
129
  """
130
+ logger.info(f"Starting YouTube audio download process for URL: {url}")
131
+
132
  if not SIEVE_API_KEY:
133
+ logger.error("SIEVE_API_KEY environment variable is not set")
134
  raise gr.Error("SIEVE_API_KEY environment variable is not set")
135
+
136
  try:
137
  # Create a temporary file for the audio
138
  temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
139
  temp_file.close()
140
  output_path = temp_file.name
141
+ logger.info(f"Created temporary file at: {output_path}")
142
 
143
+ # Prepare the request to Sieve API with exact parameters
144
  payload = {
145
  "function": "sieve/youtube-downloader",
146
  "inputs": {
147
  "url": url,
148
+ "download_type": "audio", # Ensure we're only downloading audio
149
+ "resolution": "highest-available",
150
+ "include_audio": True,
151
+ "start_time": 0,
152
+ "end_time": -1,
153
  "include_metadata": False,
154
+ "metadata_fields": ["title", "thumbnail", "description", "tags", "duration"],
155
+ "include_subtitles": False,
156
+ "subtitle_languages": ["en"],
157
+ "video_format": "mp4",
158
+ "audio_format": "mp3"
159
  }
160
  }
161
+ logger.debug(f"Prepared Sieve API payload: {payload}")
162
 
163
+ # Send request to Sieve API with retries
164
+ max_retries = 3
165
+ retry_delay = 5 # seconds
166
+
167
+ for attempt in range(max_retries):
168
+ try:
169
+ logger.info(f"Sending request to Sieve API (attempt {attempt + 1}/{max_retries})...")
170
+ response = requests.post(
171
+ f"{SIEVE_API_URL}/push",
172
+ headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
173
+ json=payload,
174
+ timeout=30 # Add timeout
175
+ )
176
+ response.raise_for_status()
177
+ response_data = response.json()
178
+ logger.debug(f"Sieve API response: {response_data}")
179
+
180
+ job_id = response_data.get("id")
181
+ if not job_id:
182
+ logger.error("No job ID received from Sieve API")
183
+ if attempt < max_retries - 1:
184
+ logger.warning(f"Retrying in {retry_delay} seconds...")
185
+ time.sleep(retry_delay)
186
+ continue
187
+ raise gr.Error("Failed to get job ID from Sieve API")
188
+ break
189
+
190
+ except requests.exceptions.RequestException as e:
191
+ logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {str(e)}")
192
+ if attempt < max_retries - 1:
193
+ logger.info(f"Retrying in {retry_delay} seconds...")
194
+ time.sleep(retry_delay)
195
+ continue
196
+ raise
197
+
198
+ logger.info(f"Received job ID: {job_id}")
199
+
200
+ # Poll for job completion with timeout
201
+ poll_count = 0
202
+ max_polls = 60 # Maximum number of polls (2 minutes with 2-second delay)
203
+ start_time = time.time()
204
 
 
 
 
 
205
  while True:
206
+ poll_count += 1
207
+ logger.info(f"Polling job status (attempt {poll_count}/{max_polls})...")
 
 
 
 
208
 
209
+ try:
210
+ job_response = requests.get(
211
+ f"{SIEVE_API_URL}/jobs/{job_id}",
212
+ headers={"X-API-Key": SIEVE_API_KEY},
213
+ timeout=10
214
+ )
215
+ job_response.raise_for_status()
216
+ job_data = job_response.json()
217
+ logger.debug(f"Job status response: {job_data}")
218
+
219
+ status = job_data.get("status")
220
+ logger.info(f"Current job status: {status}")
221
 
222
+ if status == "completed":
223
+ logger.info("Job completed successfully")
224
+ # Get the output data
225
+ output_data = job_data.get("output_0", {})
226
+ if not output_data:
227
+ logger.error("No output data found in completed job response")
228
+ raise gr.Error("No output data in job response")
229
 
230
+ # Get the audio URL from the output
231
+ audio_url = output_data.get("url")
232
+ if not audio_url:
233
+ logger.error("No audio URL found in output data")
234
+ raise gr.Error("No audio URL in output data")
235
+
236
+ logger.info(f"Received audio URL from Sieve: {audio_url}")
237
+
238
+ # Download the audio file
239
+ logger.info("Downloading audio file from Sieve storage...")
240
+ audio_response = requests.get(audio_url, timeout=30)
241
+ audio_response.raise_for_status()
242
+
243
+ file_size = len(audio_response.content)
244
+ logger.info(f"Downloaded audio file size: {file_size/1024/1024:.2f} MB")
245
+
246
+ # Save the file
247
+ with open(output_path, "wb") as f:
248
+ f.write(audio_response.content)
249
+ logger.info(f"Successfully saved audio to: {output_path}")
250
+
251
+ return output_path
252
+
253
+ elif status == "failed":
254
+ error_msg = job_data.get("error", "Unknown error")
255
+ logger.error(f"Job failed with error: {error_msg}")
256
+ raise gr.Error(f"Job failed: {error_msg}")
257
+
258
+ # Check for timeout
259
+ if time.time() - start_time > 120: # 2 minutes timeout
260
+ logger.error("Job polling timed out after 2 minutes")
261
+ raise gr.Error("Download timed out. Please try again.")
262
 
263
+ logger.info("Job still processing, waiting 2 seconds before next poll...")
264
+ time.sleep(2)
265
 
266
+ except requests.exceptions.RequestException as e:
267
+ logger.warning(f"Poll request failed: {str(e)}")
268
+ if poll_count >= max_polls:
269
+ raise gr.Error("Failed to check job status. Please try again.")
270
+ time.sleep(2)
271
 
272
+ except requests.exceptions.RequestException as e:
273
+ logger.exception(f"Network error during YouTube download: {str(e)}")
274
+ raise gr.Error(f"Failed to download YouTube audio: Network error - {str(e)}")
275
  except Exception as e:
276
+ logger.exception(f"Unexpected error during YouTube download: {str(e)}")
277
  raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
278
 
279
  def transcribe_youtube(url, return_timestamps, generate_subs):
 
287
  Returns:
288
  tuple: (formatted_result, srt_file, correction_text)
289
  """
290
+ logger.info(f"Starting YouTube transcription process for URL: {url}")
291
+ logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
292
+
293
  try:
294
  # Download audio from YouTube
295
+ logger.info("Step 1: Downloading audio from YouTube...")
296
  audio_path = download_youtube_audio(url)
297
+ logger.info(f"Successfully downloaded audio to: {audio_path}")
298
 
299
  # Transcribe the downloaded audio
300
+ logger.info("Step 2: Transcribing downloaded audio...")
301
  result = transcribe(audio_path, return_timestamps, generate_subs)
302
+ logger.info("Successfully completed transcription")
303
 
304
  # Clean up the temporary file
305
+ logger.info("Step 3: Cleaning up temporary files...")
306
  try:
307
  os.unlink(audio_path)
308
+ logger.info(f"Successfully deleted temporary file: {audio_path}")
309
  except Exception as e:
310
  logger.warning(f"Failed to delete temporary file: {str(e)}")
311
 
 
332
  Raises:
333
  gr.Error: If no audio file is provided or transcription fails.
334
  """
335
+ logger.info(f"Starting transcription process for file: {inputs}")
336
+ logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
337
+
338
  if inputs is None:
339
  logger.warning("No audio file submitted")
340
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
341
 
342
  try:
 
 
343
  # Read the audio file
344
+ logger.info("Step 1: Reading audio file...")
345
  with open(inputs, "rb") as f:
346
  data = f.read()
347
+ file_size = len(data)
348
+ logger.info(f"Successfully read audio file, size: {file_size/1024/1024:.2f} MB")
349
 
350
  # Send request to API
351
+ logger.info("Step 2: Sending request to Whisper API...")
352
  response = requests.post(API_URL, headers=headers, data=data)
353
+ response.raise_for_status()
354
  result = response.json()
 
355
  logger.debug(f"API response: {result}")
356
+ logger.info("Successfully received response from API")
357
 
358
  # Format response as JSON
359
+ logger.info("Step 3: Processing API response...")
360
  formatted_result = {
361
  "text": result.get("text", "")
362
  }
363
+ logger.info(f"Transcribed text length: {len(formatted_result['text'])} characters")
364
 
365
  chunks = []
366
  if return_timestamps and "chunks" in result:
367
+ logger.info(f"Processing {len(result['chunks'])} chunks for timestamps")
368
  for i, chunk in enumerate(result["chunks"]):
369
  logger.debug(f"Processing chunk {i}: {chunk}")
370
  try:
 
385
  continue
386
 
387
  formatted_result["chunks"] = chunks
388
+ logger.info(f"Successfully processed {len(chunks)} chunks with timestamps")
389
 
390
  # Generate subtitles if requested
391
  srt_file = None
392
  if generate_subs and chunks:
393
+ logger.info("Step 4: Generating SRT subtitles...")
394
  srt_content = generate_srt(chunks)
395
  srt_file = save_srt_to_file(srt_content)
396
+ logger.info(f"Successfully generated SRT file: {srt_file}")
397
 
398
+ logger.info("Transcription process completed successfully")
399
  return formatted_result, srt_file, "" # Return empty string for correction textbox
400
+
401
  except requests.exceptions.RequestException as e:
402
  logger.exception(f"API request failed: {str(e)}")
403
  raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
 
424
  description=(
425
  "Transcribe Tajik language audio from YouTube videos. "
426
  "Paste a YouTube URL and get accurate transcription with optional timestamps "
427
+ "and subtitles.\n\n"
428
+ "⚠️ Note: YouTube downloads may occasionally fail due to YouTube's restrictions "
429
+ "or temporary service issues. If this happens, please try again in a few minutes "
430
+ "or use the audio file upload option instead."
431
  )
432
  )
433
 
 
469
  )
470
  )
471
 
 
472
  with demo:
473
  gr.TabbedInterface(
474
+ [file_transcribe, mf_transcribe, youtube_transcribe],
475
+ ["Audio file", "Microphone", "YouTube"]
476
  )
477
 
478
  logger.info("Starting Gradio interface")