whisper-tg

Paused

App Files Files Community

muhtasham commited on Mar 23

Commit

2e5323f

1 Parent(s): 020bd94

WIP

Browse files

Files changed (1) hide show

app.py +154 -51

app.py CHANGED Viewed

@@ -127,70 +127,153 @@ def download_youtube_audio(url):
     Raises:
         gr.Error: If download fails or API key is not set
     """
     if not SIEVE_API_KEY:
         raise gr.Error("SIEVE_API_KEY environment variable is not set")
     try:
         # Create a temporary file for the audio
         temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
         temp_file.close()
         output_path = temp_file.name
-        # Prepare the request to Sieve API
         payload = {
             "function": "sieve/youtube-downloader",
             "inputs": {
                 "url": url,
-                "download_type": "audio",
-                "audio_format": "mp3",
                 "include_metadata": False,
-                "include_subtitles": False
             }
         }
-        # Send request to Sieve API
-        response = requests.post(
-            f"{SIEVE_API_URL}/push",
-            headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
-            json=payload
-        )
-        response.raise_for_status()
-        job_id = response.json().get("id")
-        if not job_id:
-            raise gr.Error("Failed to get job ID from Sieve API")
-        # Poll for job completion
         while True:
-            job_response = requests.get(
-                f"{SIEVE_API_URL}/jobs/{job_id}",
-                headers={"X-API-Key": SIEVE_API_KEY}
-            )
-            job_response.raise_for_status()
-            job_data = job_response.json()
-            if job_data.get("status") == "completed":
-                # Download the audio file
-                audio_url = job_data.get("output_0", {}).get("url")
-                if not audio_url:
-                    raise gr.Error("No audio URL in job response")
-                audio_response = requests.get(audio_url)
-                audio_response.raise_for_status()
-                with open(output_path, "wb") as f:
-                    f.write(audio_response.content)
-                return output_path
-            elif job_data.get("status") == "failed":
-                raise gr.Error(f"Job failed: {job_data.get('error', 'Unknown error')}")
-            # Wait before polling again
-            time.sleep(2)
     except Exception as e:
-        logger.exception(f"Error downloading YouTube audio: {str(e)}")
         raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
 def transcribe_youtube(url, return_timestamps, generate_subs):
@@ -204,16 +287,25 @@ def transcribe_youtube(url, return_timestamps, generate_subs):
     Returns:
         tuple: (formatted_result, srt_file, correction_text)
     """
     try:
         # Download audio from YouTube
         audio_path = download_youtube_audio(url)
         # Transcribe the downloaded audio
         result = transcribe(audio_path, return_timestamps, generate_subs)
         # Clean up the temporary file
         try:
             os.unlink(audio_path)
         except Exception as e:
             logger.warning(f"Failed to delete temporary file: {str(e)}")
@@ -240,32 +332,39 @@ def transcribe(inputs, return_timestamps, generate_subs):
     Raises:
         gr.Error: If no audio file is provided or transcription fails.
     """
     if inputs is None:
         logger.warning("No audio file submitted")
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
     try:
-        logger.info(f"Processing audio file: {inputs}")
         # Read the audio file
         with open(inputs, "rb") as f:
             data = f.read()
         # Send request to API
         response = requests.post(API_URL, headers=headers, data=data)
-        response.raise_for_status()  # Raise an exception for bad status codes
         result = response.json()
         logger.debug(f"API response: {result}")
         # Format response as JSON
         formatted_result = {
             "text": result.get("text", "")
         }
         chunks = []
         if return_timestamps and "chunks" in result:
-            logger.info(f"Processing {len(result['chunks'])} chunks")
             for i, chunk in enumerate(result["chunks"]):
                 logger.debug(f"Processing chunk {i}: {chunk}")
                 try:
@@ -286,17 +385,19 @@ def transcribe(inputs, return_timestamps, generate_subs):
                     continue
             formatted_result["chunks"] = chunks
-            logger.info(f"Successfully processed transcription with {len(chunks)} chunks")
         # Generate subtitles if requested
         srt_file = None
         if generate_subs and chunks:
-            logger.info("Generating SRT subtitles")
             srt_content = generate_srt(chunks)
             srt_file = save_srt_to_file(srt_content)
-            logger.info("SRT subtitles generated successfully")
         return formatted_result, srt_file, ""  # Return empty string for correction textbox
     except requests.exceptions.RequestException as e:
         logger.exception(f"API request failed: {str(e)}")
         raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
@@ -323,7 +424,10 @@ youtube_transcribe = gr.Interface(
     description=(
         "Transcribe Tajik language audio from YouTube videos. "
         "Paste a YouTube URL and get accurate transcription with optional timestamps "
-        "and subtitles."
     )
 )
@@ -365,11 +469,10 @@ file_transcribe = gr.Interface(
     )
 )
-# Then set up the demo with the interfaces
 with demo:
     gr.TabbedInterface(
-        [youtube_transcribe, file_transcribe, mf_transcribe],
-        ["YouTube", "Audio file", "Microphone"]
     )
 logger.info("Starting Gradio interface")

     Raises:
         gr.Error: If download fails or API key is not set
     """
+    logger.info(f"Starting YouTube audio download process for URL: {url}")
     if not SIEVE_API_KEY:
+        logger.error("SIEVE_API_KEY environment variable is not set")
         raise gr.Error("SIEVE_API_KEY environment variable is not set")
     try:
         # Create a temporary file for the audio
         temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
         temp_file.close()
         output_path = temp_file.name
+        logger.info(f"Created temporary file at: {output_path}")
+        # Prepare the request to Sieve API with exact parameters
         payload = {
             "function": "sieve/youtube-downloader",
             "inputs": {
                 "url": url,
+                "download_type": "audio",  # Ensure we're only downloading audio
+                "resolution": "highest-available",
+                "include_audio": True,
+                "start_time": 0,
+                "end_time": -1,
                 "include_metadata": False,
+                "metadata_fields": ["title", "thumbnail", "description", "tags", "duration"],
+                "include_subtitles": False,
+                "subtitle_languages": ["en"],
+                "video_format": "mp4",
+                "audio_format": "mp3"
             }
         }
+        logger.debug(f"Prepared Sieve API payload: {payload}")
+        # Send request to Sieve API with retries
+        max_retries = 3
+        retry_delay = 5  # seconds
+        for attempt in range(max_retries):
+            try:
+                logger.info(f"Sending request to Sieve API (attempt {attempt + 1}/{max_retries})...")
+                response = requests.post(
+                    f"{SIEVE_API_URL}/push",
+                    headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
+                    json=payload,
+                    timeout=30  # Add timeout
+                )
+                response.raise_for_status()
+                response_data = response.json()
+                logger.debug(f"Sieve API response: {response_data}")
+                job_id = response_data.get("id")
+                if not job_id:
+                    logger.error("No job ID received from Sieve API")
+                    if attempt < max_retries - 1:
+                        logger.warning(f"Retrying in {retry_delay} seconds...")
+                        time.sleep(retry_delay)
+                        continue
+                    raise gr.Error("Failed to get job ID from Sieve API")
+                break
+            except requests.exceptions.RequestException as e:
+                logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {str(e)}")
+                if attempt < max_retries - 1:
+                    logger.info(f"Retrying in {retry_delay} seconds...")
+                    time.sleep(retry_delay)
+                    continue
+                raise
+        logger.info(f"Received job ID: {job_id}")
+        # Poll for job completion with timeout
+        poll_count = 0
+        max_polls = 60  # Maximum number of polls (2 minutes with 2-second delay)
+        start_time = time.time()
         while True:
+            poll_count += 1
+            logger.info(f"Polling job status (attempt {poll_count}/{max_polls})...")
+            try:
+                job_response = requests.get(
+                    f"{SIEVE_API_URL}/jobs/{job_id}",
+                    headers={"X-API-Key": SIEVE_API_KEY},
+                    timeout=10
+                )
+                job_response.raise_for_status()
+                job_data = job_response.json()
+                logger.debug(f"Job status response: {job_data}")
+                status = job_data.get("status")
+                logger.info(f"Current job status: {status}")
+                if status == "completed":
+                    logger.info("Job completed successfully")
+                    # Get the output data
+                    output_data = job_data.get("output_0", {})
+                    if not output_data:
+                        logger.error("No output data found in completed job response")
+                        raise gr.Error("No output data in job response")
+                    # Get the audio URL from the output
+                    audio_url = output_data.get("url")
+                    if not audio_url:
+                        logger.error("No audio URL found in output data")
+                        raise gr.Error("No audio URL in output data")
+                    logger.info(f"Received audio URL from Sieve: {audio_url}")
+                    # Download the audio file
+                    logger.info("Downloading audio file from Sieve storage...")
+                    audio_response = requests.get(audio_url, timeout=30)
+                    audio_response.raise_for_status()
+                    file_size = len(audio_response.content)
+                    logger.info(f"Downloaded audio file size: {file_size/1024/1024:.2f} MB")
+                    # Save the file
+                    with open(output_path, "wb") as f:
+                        f.write(audio_response.content)
+                    logger.info(f"Successfully saved audio to: {output_path}")
+                    return output_path
+                elif status == "failed":
+                    error_msg = job_data.get("error", "Unknown error")
+                    logger.error(f"Job failed with error: {error_msg}")
+                    raise gr.Error(f"Job failed: {error_msg}")
+                # Check for timeout
+                if time.time() - start_time > 120:  # 2 minutes timeout
+                    logger.error("Job polling timed out after 2 minutes")
+                    raise gr.Error("Download timed out. Please try again.")
+                logger.info("Job still processing, waiting 2 seconds before next poll...")
+                time.sleep(2)
+            except requests.exceptions.RequestException as e:
+                logger.warning(f"Poll request failed: {str(e)}")
+                if poll_count >= max_polls:
+                    raise gr.Error("Failed to check job status. Please try again.")
+                time.sleep(2)
+    except requests.exceptions.RequestException as e:
+        logger.exception(f"Network error during YouTube download: {str(e)}")
+        raise gr.Error(f"Failed to download YouTube audio: Network error - {str(e)}")
     except Exception as e:
+        logger.exception(f"Unexpected error during YouTube download: {str(e)}")
         raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
 def transcribe_youtube(url, return_timestamps, generate_subs):
     Returns:
         tuple: (formatted_result, srt_file, correction_text)
     """
+    logger.info(f"Starting YouTube transcription process for URL: {url}")
+    logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
     try:
         # Download audio from YouTube
+        logger.info("Step 1: Downloading audio from YouTube...")
         audio_path = download_youtube_audio(url)
+        logger.info(f"Successfully downloaded audio to: {audio_path}")
         # Transcribe the downloaded audio
+        logger.info("Step 2: Transcribing downloaded audio...")
         result = transcribe(audio_path, return_timestamps, generate_subs)
+        logger.info("Successfully completed transcription")
         # Clean up the temporary file
+        logger.info("Step 3: Cleaning up temporary files...")
         try:
             os.unlink(audio_path)
+            logger.info(f"Successfully deleted temporary file: {audio_path}")
         except Exception as e:
             logger.warning(f"Failed to delete temporary file: {str(e)}")
     Raises:
         gr.Error: If no audio file is provided or transcription fails.
     """
+    logger.info(f"Starting transcription process for file: {inputs}")
+    logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
     if inputs is None:
         logger.warning("No audio file submitted")
         raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
     try:
         # Read the audio file
+        logger.info("Step 1: Reading audio file...")
         with open(inputs, "rb") as f:
             data = f.read()
+        file_size = len(data)
+        logger.info(f"Successfully read audio file, size: {file_size/1024/1024:.2f} MB")
         # Send request to API
+        logger.info("Step 2: Sending request to Whisper API...")
         response = requests.post(API_URL, headers=headers, data=data)
+        response.raise_for_status()
         result = response.json()
         logger.debug(f"API response: {result}")
+        logger.info("Successfully received response from API")
         # Format response as JSON
+        logger.info("Step 3: Processing API response...")
         formatted_result = {
             "text": result.get("text", "")
         }
+        logger.info(f"Transcribed text length: {len(formatted_result['text'])} characters")
         chunks = []
         if return_timestamps and "chunks" in result:
+            logger.info(f"Processing {len(result['chunks'])} chunks for timestamps")
             for i, chunk in enumerate(result["chunks"]):
                 logger.debug(f"Processing chunk {i}: {chunk}")
                 try:
                     continue
             formatted_result["chunks"] = chunks
+            logger.info(f"Successfully processed {len(chunks)} chunks with timestamps")
         # Generate subtitles if requested
         srt_file = None
         if generate_subs and chunks:
+            logger.info("Step 4: Generating SRT subtitles...")
             srt_content = generate_srt(chunks)
             srt_file = save_srt_to_file(srt_content)
+            logger.info(f"Successfully generated SRT file: {srt_file}")
+        logger.info("Transcription process completed successfully")
         return formatted_result, srt_file, ""  # Return empty string for correction textbox
     except requests.exceptions.RequestException as e:
         logger.exception(f"API request failed: {str(e)}")
         raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
     description=(
         "Transcribe Tajik language audio from YouTube videos. "
         "Paste a YouTube URL and get accurate transcription with optional timestamps "
+        "and subtitles.\n\n"
+        "⚠️ Note: YouTube downloads may occasionally fail due to YouTube's restrictions "
+        "or temporary service issues. If this happens, please try again in a few minutes "
+        "or use the audio file upload option instead."
     )
 )
     )
 )
 with demo:
     gr.TabbedInterface(
+        [file_transcribe, mf_transcribe, youtube_transcribe],
+        ["Audio file", "Microphone", "YouTube"]
     )
 logger.info("Starting Gradio interface")