muhtasham commited on
Commit
06fa661
·
1 Parent(s): 78d9435
Files changed (1) hide show
  1. app.py +28 -2
app.py CHANGED
@@ -226,20 +226,26 @@ def check_api_health():
226
  def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
227
  """Transcribe audio from YouTube video using URL endpoint."""
228
  logger.info(f"Starting YouTube transcription process for URL: {url}")
 
229
 
230
  try:
231
  # Check API health first
 
232
  check_api_health()
233
 
234
  # Validate URL scheme
235
  if not url.startswith(('http://', 'https://')):
 
236
  raise gr.Error("URL must start with http:// or https://")
237
 
238
  # Get audio URL from Sieve
 
239
  audio_url = download_youtube_audio(url)
 
240
 
241
  # Validate audio URL scheme
242
  if not audio_url.startswith(('http://', 'https://')):
 
243
  raise gr.Error("Invalid audio URL scheme received from Sieve")
244
 
245
  # Prepare request parameters
@@ -250,8 +256,10 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
250
  "chunk_length_s": chunk_length_s,
251
  "batch_size": batch_size
252
  }
 
253
 
254
  # Send request to API
 
255
  response = requests.post(
256
  f"{API_URL}/transcribe/url",
257
  json={"url": audio_url},
@@ -259,12 +267,15 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
259
  )
260
  response.raise_for_status()
261
  result = response.json()
 
262
 
263
  # Log metadata
264
  metadata = result.get("metadata", {})
265
  logger.info(f"Transcription metadata: {metadata}")
 
266
 
267
  # Format response with segments (without id)
 
268
  formatted_result = {
269
  "text": result["transcription"]["text"],
270
  "segments": [
@@ -276,13 +287,17 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
276
  for segment in result["transcription"]["segments"]
277
  ] if return_timestamps else None
278
  }
 
279
 
280
  # Generate subtitles if requested
281
  srt_file = None
282
  if generate_subs and return_timestamps and "segments" in result["transcription"]:
 
283
  srt_content = generate_srt(result["transcription"]["segments"])
284
  srt_file = save_srt_to_file(srt_content)
 
285
 
 
286
  return formatted_result, srt_file, ""
287
 
288
  except Exception as e:
@@ -292,15 +307,19 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
292
  def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
293
  """Transcribe audio input using Whisper API."""
294
  logger.info(f"Starting transcription process for file: {inputs}")
 
295
 
296
  if inputs is None:
 
297
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
298
 
299
  try:
300
  # Check API health first
 
301
  check_api_health()
302
 
303
  # Read the audio file
 
304
  with open(inputs, "rb") as f:
305
  files = {"file": f}
306
 
@@ -312,10 +331,10 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
312
  "chunk_length_s": chunk_length_s,
313
  "batch_size": batch_size
314
  }
315
-
316
- logger.info(f"Sending request to API with parameters: {params}")
317
 
318
  # Send request to API
 
319
  response = requests.post(
320
  f"{API_URL}/transcribe",
321
  files=files,
@@ -323,12 +342,15 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
323
  )
324
  response.raise_for_status()
325
  result = response.json()
 
326
 
327
  # Log metadata
328
  metadata = result.get("metadata", {})
329
  logger.info(f"Transcription metadata: {metadata}")
 
330
 
331
  # Format response with segments (without id)
 
332
  formatted_result = {
333
  "text": result["transcription"]["text"],
334
  "segments": [
@@ -340,13 +362,17 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
340
  for segment in result["transcription"]["segments"]
341
  ] if return_timestamps else None
342
  }
 
343
 
344
  # Generate subtitles if requested
345
  srt_file = None
346
  if generate_subs and return_timestamps and "segments" in result["transcription"]:
 
347
  srt_content = generate_srt(result["transcription"]["segments"])
348
  srt_file = save_srt_to_file(srt_content)
 
349
 
 
350
  return formatted_result, srt_file, ""
351
 
352
  except requests.exceptions.RequestException as e:
 
226
  def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
227
  """Transcribe audio from YouTube video using URL endpoint."""
228
  logger.info(f"Starting YouTube transcription process for URL: {url}")
229
+ logger.info(f"Parameters - return_timestamps: {return_timestamps}, generate_subs: {generate_subs}, chunk_length_s: {chunk_length_s}, batch_size: {batch_size}")
230
 
231
  try:
232
  # Check API health first
233
+ logger.info("Performing API health check...")
234
  check_api_health()
235
 
236
  # Validate URL scheme
237
  if not url.startswith(('http://', 'https://')):
238
+ logger.error(f"Invalid URL scheme: {url}")
239
  raise gr.Error("URL must start with http:// or https://")
240
 
241
  # Get audio URL from Sieve
242
+ logger.info("Starting YouTube audio download via Sieve API...")
243
  audio_url = download_youtube_audio(url)
244
+ logger.info(f"Successfully obtained audio URL from Sieve: {audio_url}")
245
 
246
  # Validate audio URL scheme
247
  if not audio_url.startswith(('http://', 'https://')):
248
+ logger.error(f"Invalid audio URL scheme from Sieve: {audio_url}")
249
  raise gr.Error("Invalid audio URL scheme received from Sieve")
250
 
251
  # Prepare request parameters
 
256
  "chunk_length_s": chunk_length_s,
257
  "batch_size": batch_size
258
  }
259
+ logger.info(f"Prepared API request parameters: {params}")
260
 
261
  # Send request to API
262
+ logger.info("Sending transcription request to API...")
263
  response = requests.post(
264
  f"{API_URL}/transcribe/url",
265
  json={"url": audio_url},
 
267
  )
268
  response.raise_for_status()
269
  result = response.json()
270
+ logger.info("Successfully received response from API")
271
 
272
  # Log metadata
273
  metadata = result.get("metadata", {})
274
  logger.info(f"Transcription metadata: {metadata}")
275
+ logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
276
 
277
  # Format response with segments (without id)
278
+ logger.info("Formatting response...")
279
  formatted_result = {
280
  "text": result["transcription"]["text"],
281
  "segments": [
 
287
  for segment in result["transcription"]["segments"]
288
  ] if return_timestamps else None
289
  }
290
+ logger.info(f"Formatted result contains {len(formatted_result['segments'] or [])} segments")
291
 
292
  # Generate subtitles if requested
293
  srt_file = None
294
  if generate_subs and return_timestamps and "segments" in result["transcription"]:
295
+ logger.info("Generating SRT subtitles...")
296
  srt_content = generate_srt(result["transcription"]["segments"])
297
  srt_file = save_srt_to_file(srt_content)
298
+ logger.info(f"Generated SRT file: {srt_file}")
299
 
300
+ logger.info("YouTube transcription process completed successfully")
301
  return formatted_result, srt_file, ""
302
 
303
  except Exception as e:
 
307
  def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
308
  """Transcribe audio input using Whisper API."""
309
  logger.info(f"Starting transcription process for file: {inputs}")
310
+ logger.info(f"Parameters - return_timestamps: {return_timestamps}, generate_subs: {generate_subs}, chunk_length_s: {chunk_length_s}, batch_size: {batch_size}")
311
 
312
  if inputs is None:
313
+ logger.error("No audio file submitted")
314
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
315
 
316
  try:
317
  # Check API health first
318
+ logger.info("Performing API health check...")
319
  check_api_health()
320
 
321
  # Read the audio file
322
+ logger.info(f"Reading audio file: {inputs}")
323
  with open(inputs, "rb") as f:
324
  files = {"file": f}
325
 
 
331
  "chunk_length_s": chunk_length_s,
332
  "batch_size": batch_size
333
  }
334
+ logger.info(f"Prepared API request parameters: {params}")
 
335
 
336
  # Send request to API
337
+ logger.info("Sending transcription request to API...")
338
  response = requests.post(
339
  f"{API_URL}/transcribe",
340
  files=files,
 
342
  )
343
  response.raise_for_status()
344
  result = response.json()
345
+ logger.info("Successfully received response from API")
346
 
347
  # Log metadata
348
  metadata = result.get("metadata", {})
349
  logger.info(f"Transcription metadata: {metadata}")
350
+ logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
351
 
352
  # Format response with segments (without id)
353
+ logger.info("Formatting response...")
354
  formatted_result = {
355
  "text": result["transcription"]["text"],
356
  "segments": [
 
362
  for segment in result["transcription"]["segments"]
363
  ] if return_timestamps else None
364
  }
365
+ logger.info(f"Formatted result contains {len(formatted_result['segments'] or [])} segments")
366
 
367
  # Generate subtitles if requested
368
  srt_file = None
369
  if generate_subs and return_timestamps and "segments" in result["transcription"]:
370
+ logger.info("Generating SRT subtitles...")
371
  srt_content = generate_srt(result["transcription"]["segments"])
372
  srt_file = save_srt_to_file(srt_content)
373
+ logger.info(f"Generated SRT file: {srt_file}")
374
 
375
+ logger.info("Transcription process completed successfully")
376
  return formatted_result, srt_file, ""
377
 
378
  except requests.exceptions.RequestException as e: