whisper-tg

Paused

App Files Files Community

muhtasham commited on Mar 23

Commit

06fa661

1 Parent(s): 78d9435

WIP

Browse files

Files changed (1) hide show

app.py +28 -2

app.py CHANGED Viewed

@@ -226,20 +226,26 @@ def check_api_health():
 def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
     """Transcribe audio from YouTube video using URL endpoint."""
     logger.info(f"Starting YouTube transcription process for URL: {url}")
     try:
         # Check API health first
         check_api_health()
         # Validate URL scheme
         if not url.startswith(('http://', 'https://')):
             raise gr.Error("URL must start with http:// or https://")
         # Get audio URL from Sieve
         audio_url = download_youtube_audio(url)
         # Validate audio URL scheme
         if not audio_url.startswith(('http://', 'https://')):
             raise gr.Error("Invalid audio URL scheme received from Sieve")
         # Prepare request parameters
@@ -250,8 +256,10 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
             "chunk_length_s": chunk_length_s,
             "batch_size": batch_size
         }
         # Send request to API
         response = requests.post(
             f"{API_URL}/transcribe/url",
             json={"url": audio_url},
@@ -259,12 +267,15 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
         )
         response.raise_for_status()
         result = response.json()
         # Log metadata
         metadata = result.get("metadata", {})
         logger.info(f"Transcription metadata: {metadata}")
         # Format response with segments (without id)
         formatted_result = {
             "text": result["transcription"]["text"],
             "segments": [
@@ -276,13 +287,17 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
                 for segment in result["transcription"]["segments"]
             ] if return_timestamps else None
         }
         # Generate subtitles if requested
         srt_file = None
         if generate_subs and return_timestamps and "segments" in result["transcription"]:
             srt_content = generate_srt(result["transcription"]["segments"])
             srt_file = save_srt_to_file(srt_content)
         return formatted_result, srt_file, ""
     except Exception as e:
@@ -292,15 +307,19 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
 def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
     """Transcribe audio input using Whisper API."""
     logger.info(f"Starting transcription process for file: {inputs}")
     if inputs is None:
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
     try:
         # Check API health first
         check_api_health()
         # Read the audio file
         with open(inputs, "rb") as f:
             files = {"file": f}
@@ -312,10 +331,10 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
                 "chunk_length_s": chunk_length_s,
                 "batch_size": batch_size
             }
-            logger.info(f"Sending request to API with parameters: {params}")
             # Send request to API
             response = requests.post(
                 f"{API_URL}/transcribe",
                 files=files,
@@ -323,12 +342,15 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
             )
             response.raise_for_status()
             result = response.json()
             # Log metadata
             metadata = result.get("metadata", {})
             logger.info(f"Transcription metadata: {metadata}")
             # Format response with segments (without id)
             formatted_result = {
                 "text": result["transcription"]["text"],
                 "segments": [
@@ -340,13 +362,17 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
                     for segment in result["transcription"]["segments"]
                 ] if return_timestamps else None
             }
             # Generate subtitles if requested
             srt_file = None
             if generate_subs and return_timestamps and "segments" in result["transcription"]:
                 srt_content = generate_srt(result["transcription"]["segments"])
                 srt_file = save_srt_to_file(srt_content)
             return formatted_result, srt_file, ""
     except requests.exceptions.RequestException as e:

 def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
     """Transcribe audio from YouTube video using URL endpoint."""
     logger.info(f"Starting YouTube transcription process for URL: {url}")
+    logger.info(f"Parameters - return_timestamps: {return_timestamps}, generate_subs: {generate_subs}, chunk_length_s: {chunk_length_s}, batch_size: {batch_size}")
     try:
         # Check API health first
+        logger.info("Performing API health check...")
         check_api_health()
         # Validate URL scheme
         if not url.startswith(('http://', 'https://')):
+            logger.error(f"Invalid URL scheme: {url}")
             raise gr.Error("URL must start with http:// or https://")
         # Get audio URL from Sieve
+        logger.info("Starting YouTube audio download via Sieve API...")
         audio_url = download_youtube_audio(url)
+        logger.info(f"Successfully obtained audio URL from Sieve: {audio_url}")
         # Validate audio URL scheme
         if not audio_url.startswith(('http://', 'https://')):
+            logger.error(f"Invalid audio URL scheme from Sieve: {audio_url}")
             raise gr.Error("Invalid audio URL scheme received from Sieve")
         # Prepare request parameters
             "chunk_length_s": chunk_length_s,
             "batch_size": batch_size
         }
+        logger.info(f"Prepared API request parameters: {params}")
         # Send request to API
+        logger.info("Sending transcription request to API...")
         response = requests.post(
             f"{API_URL}/transcribe/url",
             json={"url": audio_url},
         )
         response.raise_for_status()
         result = response.json()
+        logger.info("Successfully received response from API")
         # Log metadata
         metadata = result.get("metadata", {})
         logger.info(f"Transcription metadata: {metadata}")
+        logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
         # Format response with segments (without id)
+        logger.info("Formatting response...")
         formatted_result = {
             "text": result["transcription"]["text"],
             "segments": [
                 for segment in result["transcription"]["segments"]
             ] if return_timestamps else None
         }
+        logger.info(f"Formatted result contains {len(formatted_result['segments'] or [])} segments")
         # Generate subtitles if requested
         srt_file = None
         if generate_subs and return_timestamps and "segments" in result["transcription"]:
+            logger.info("Generating SRT subtitles...")
             srt_content = generate_srt(result["transcription"]["segments"])
             srt_file = save_srt_to_file(srt_content)
+            logger.info(f"Generated SRT file: {srt_file}")
+        logger.info("YouTube transcription process completed successfully")
         return formatted_result, srt_file, ""
     except Exception as e:
 def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
     """Transcribe audio input using Whisper API."""
     logger.info(f"Starting transcription process for file: {inputs}")
+    logger.info(f"Parameters - return_timestamps: {return_timestamps}, generate_subs: {generate_subs}, chunk_length_s: {chunk_length_s}, batch_size: {batch_size}")
     if inputs is None:
+        logger.error("No audio file submitted")
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
     try:
         # Check API health first
+        logger.info("Performing API health check...")
         check_api_health()
         # Read the audio file
+        logger.info(f"Reading audio file: {inputs}")
         with open(inputs, "rb") as f:
             files = {"file": f}
                 "chunk_length_s": chunk_length_s,
                 "batch_size": batch_size
             }
+            logger.info(f"Prepared API request parameters: {params}")
             # Send request to API
+            logger.info("Sending transcription request to API...")
             response = requests.post(
                 f"{API_URL}/transcribe",
                 files=files,
             )
             response.raise_for_status()
             result = response.json()
+            logger.info("Successfully received response from API")
             # Log metadata
             metadata = result.get("metadata", {})
             logger.info(f"Transcription metadata: {metadata}")
+            logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
             # Format response with segments (without id)
+            logger.info("Formatting response...")
             formatted_result = {
                 "text": result["transcription"]["text"],
                 "segments": [
                     for segment in result["transcription"]["segments"]
                 ] if return_timestamps else None
             }
+            logger.info(f"Formatted result contains {len(formatted_result['segments'] or [])} segments")
             # Generate subtitles if requested
             srt_file = None
             if generate_subs and return_timestamps and "segments" in result["transcription"]:
+                logger.info("Generating SRT subtitles...")
                 srt_content = generate_srt(result["transcription"]["segments"])
                 srt_file = save_srt_to_file(srt_content)
+                logger.info(f"Generated SRT file: {srt_file}")
+            logger.info("Transcription process completed successfully")
             return formatted_result, srt_file, ""
     except requests.exceptions.RequestException as e: