muhtasham commited on
Commit
90b9a04
·
1 Parent(s): 822fd58
Files changed (1) hide show
  1. app.py +36 -39
app.py CHANGED
@@ -265,49 +265,46 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=15,
265
  logger.error(f"Invalid URL scheme: {url}")
266
  raise gr.Error("URL must start with http:// or https://")
267
 
268
- # Get audio URL from Sieve
269
  logger.info("Starting YouTube audio download via Sieve API...")
270
- audio_url = download_youtube_audio(url)
271
- logger.info(f"Successfully obtained audio URL from Sieve: {audio_url}")
272
-
273
- # Validate audio URL scheme
274
- if not audio_url.startswith(('http://', 'https://')):
275
- logger.error(f"Invalid audio URL scheme from Sieve: {audio_url}")
276
- raise gr.Error("Invalid audio URL scheme received from Sieve")
277
 
278
  # Send request to API
279
  logger.info("Sending transcription request to API...")
280
- response = requests.post(
281
- f"{API_URL}/transcribe/url",
282
- json={
283
- "url": audio_url,
284
- "timestamp_level": "sentence" if return_timestamps else None,
285
- "task": "transcribe",
286
- "chunk_length_s": chunk_length_s,
287
- "batch_size": batch_size
288
- },
289
- timeout=1800
290
- )
291
- response.raise_for_status()
292
- result = response.json()
293
- logger.info("Successfully received response from API")
294
-
295
- # Log metadata
296
- metadata = result.get("metadata", {})
297
- logger.info(f"Transcription metadata: {metadata}")
298
- logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
299
-
300
- # Generate subtitles if requested
301
- srt_file = None
302
- if generate_subs and return_timestamps and "segments" in result["transcription"]:
303
- logger.info("Generating SRT subtitles...")
304
- srt_content = generate_srt(result["transcription"]["segments"])
305
- srt_file = save_srt_to_file(srt_content)
306
- logger.info(f"Generated SRT file: {srt_file}")
307
-
308
- logger.info("YouTube transcription process completed successfully")
309
- return result, srt_file, ""
310
-
 
 
311
  except Exception as e:
312
  logger.exception(f"Error in YouTube transcription: {str(e)}")
313
  raise gr.Error(f"Failed to transcribe YouTube video: {str(e)}")
 
265
  logger.error(f"Invalid URL scheme: {url}")
266
  raise gr.Error("URL must start with http:// or https://")
267
 
268
+ # Get audio file from Sieve
269
  logger.info("Starting YouTube audio download via Sieve API...")
270
+ audio_file = download_youtube_audio(url)
271
+ logger.info(f"Successfully obtained audio file from Sieve: {audio_file}")
 
 
 
 
 
272
 
273
  # Send request to API
274
  logger.info("Sending transcription request to API...")
275
+ with open(audio_file, "rb") as f:
276
+ files = {"file": f}
277
+ response = requests.post(
278
+ f"{API_URL}/transcribe",
279
+ files=files,
280
+ data={
281
+ "timestamp_level": "sentence" if return_timestamps else None,
282
+ "task": "transcribe",
283
+ "chunk_length_s": chunk_length_s,
284
+ "batch_size": batch_size
285
+ },
286
+ timeout=1800
287
+ )
288
+ response.raise_for_status()
289
+ result = response.json()
290
+ logger.info("Successfully received response from API")
291
+
292
+ # Log metadata
293
+ metadata = result.get("metadata", {})
294
+ logger.info(f"Transcription metadata: {metadata}")
295
+ logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
296
+
297
+ # Generate subtitles if requested
298
+ srt_file = None
299
+ if generate_subs and return_timestamps and "segments" in result["transcription"]:
300
+ logger.info("Generating SRT subtitles...")
301
+ srt_content = generate_srt(result["transcription"]["segments"])
302
+ srt_file = save_srt_to_file(srt_content)
303
+ logger.info(f"Generated SRT file: {srt_file}")
304
+
305
+ logger.info("YouTube transcription process completed successfully")
306
+ return result, srt_file, ""
307
+
308
  except Exception as e:
309
  logger.exception(f"Error in YouTube transcription: {str(e)}")
310
  raise gr.Error(f"Failed to transcribe YouTube video: {str(e)}")