import gradio as gr import whisper import torch from transformers import pipeline import tempfile import os import subprocess import logging from typing import Optional, Tuple import re import warnings warnings.filterwarnings("ignore") # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class SubtitleTranslator: def __init__(self): # Use the smallest Whisper model for speed self.whisper_model = None self.translator = None self.device = "cuda" if torch.cuda.is_available() else "cpu" logger.info(f"Using device: {self.device}") def load_models(self): """Load models lazily to save memory""" if self.whisper_model is None: logger.info("Loading Whisper model...") self.whisper_model = whisper.load_model("base", device=self.device) if self.translator is None: logger.info("Loading translation model...") # Use a lightweight translation model try: self.translator = pipeline( "translation", model="Helsinki-NLP/opus-mt-mul-en", device=0 if self.device == "cuda" else -1 ) except Exception as e: logger.warning(f"Failed to load Helsinki model, using Facebook model: {e}") self.translator = pipeline( "translation", model="facebook/m2m100_418M", device=0 if self.device == "cuda" else -1 ) def extract_audio(self, video_path: str) -> str: """Extract audio from video file""" audio_path = tempfile.mktemp(suffix=".wav") try: # Use ffmpeg to extract audio - works with any video format/size cmd = [ "ffmpeg", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", audio_path, "-y" ] subprocess.run(cmd, check=True, capture_output=True) logger.info(f"Audio extracted to: {audio_path}") return audio_path except subprocess.CalledProcessError as e: logger.error(f"Audio extraction failed: {e}") raise Exception("Failed to extract audio from video") def transcribe_audio(self, audio_path: str) -> dict: """Transcribe audio using Whisper""" try: logger.info("Starting transcription...") result = self.whisper_model.transcribe( audio_path, task="transcribe", fp16=self.device == "cuda" ) logger.info("Transcription completed") return result except Exception as e: logger.error(f"Transcription failed: {e}") raise Exception("Failed to transcribe audio") def translate_text(self, text: str, source_lang: str = None) -> str: """Translate text to English""" if not text.strip(): return "" try: # If already in English, return as is if source_lang == "en": return text # For Helsinki model, use direct translation if "Helsinki" in str(type(self.translator.model)): result = self.translator(text) return result[0]['translation_text'] if result else text # For M2M100 model, specify target language else: result = self.translator(text, forced_bos_token_id=self.translator.tokenizer.get_lang_id("en")) return result[0]['translation_text'] if result else text except Exception as e: logger.error(f"Translation failed: {e}") return text # Return original if translation fails def format_time(self, seconds: float) -> str: """Format time for SRT subtitle format""" hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = seconds % 60 return f"{hours:02d}:{minutes:02d}:{secs:06.3f}".replace('.', ',') def create_srt(self, segments: list, translated: bool = False) -> str: """Create SRT subtitle format""" srt_content = "" for i, segment in enumerate(segments, 1): start_time = self.format_time(segment['start']) end_time = self.format_time(segment['end']) text = segment.get('translated_text', segment['text']) if translated else segment['text'] srt_content += f"{i}\n{start_time} --> {end_time}\n{text}\n\n" return srt_content def process_video(self, video_path: str, translate: bool = True) -> Tuple[str, str, str]: """Main processing function""" try: # Load models self.load_models() # Extract audio audio_path = self.extract_audio(video_path) try: # Transcribe result = self.transcribe_audio(audio_path) detected_language = result.get('language', 'unknown') # Process segments segments = result['segments'] if translate and detected_language != 'en': logger.info(f"Translating from {detected_language} to English...") for segment in segments: segment['translated_text'] = self.translate_text( segment['text'], detected_language ) # Create subtitle files original_srt = self.create_srt(segments, translated=False) translated_srt = self.create_srt(segments, translated=True) if translate else "" # Save to temporary files original_file = tempfile.mktemp(suffix=".srt") with open(original_file, 'w', encoding='utf-8') as f: f.write(original_srt) translated_file = None if translate and detected_language != 'en': translated_file = tempfile.mktemp(suffix=".srt") with open(translated_file, 'w', encoding='utf-8') as f: f.write(translated_srt) return original_file, translated_file, f"Detected language: {detected_language}" finally: # Clean up audio file if os.path.exists(audio_path): os.unlink(audio_path) except Exception as e: logger.error(f"Processing failed: {e}") raise gr.Error(f"Processing failed: {str(e)}") # Initialize the translator translator = SubtitleTranslator() def process_video_interface(video_file, translate_option): """Gradio interface function""" if video_file is None: raise gr.Error("Please upload a video file") translate = translate_option == "Yes" try: original_srt, translated_srt, info = translator.process_video(video_file, translate) outputs = [original_srt, info] if translated_srt: outputs.append(translated_srt) return outputs[0], outputs[1], outputs[2] else: return outputs[0], outputs[1], None except Exception as e: raise gr.Error(f"Error processing video: {str(e)}") # Create Gradio interface def create_interface(): with gr.Blocks( title="Video Subtitle Translator", theme=gr.themes.Soft(), css=""" .gradio-container {max-width: 1000px; margin: auto;} .subtitle-info {background: #f0f8ff; padding: 15px; border-radius: 10px; margin: 10px 0;} """ ) as demo: gr.HTML("""

🎬 Video Subtitle Translator

Generate and translate subtitles for any video - No size or duration limits!

Supports all video formats • Automatic language detection • Fast processing

""") with gr.Row(): with gr.Column(scale=2): video_input = gr.File( label="Upload Video File", file_types=["video"], type="filepath" ) translate_option = gr.Radio( choices=["Yes", "No"], value="Yes", label="Translate to English?", info="Choose 'No' if you only want transcription in original language" ) process_btn = gr.Button( "🚀 Generate Subtitles", variant="primary", size="lg" ) with gr.Column(scale=3): info_output = gr.Textbox( label="Processing Info", interactive=False, elem_classes=["subtitle-info"] ) original_output = gr.File( label="📝 Original Subtitles (.srt)", interactive=False ) translated_output = gr.File( label="🌍 English Translated Subtitles (.srt)", interactive=False, visible=True ) gr.HTML("""

📋 Instructions:

  1. Upload any video file - MP4, AVI, MOV, MKV, etc.
  2. Choose translation option - Yes for English translation, No for original language only
  3. Click "Generate Subtitles" - Processing time depends on video length
  4. Download your subtitle files - Use them with any video player

✨ Features:

""") # Set up the processing process_btn.click( fn=process_video_interface, inputs=[video_input, translate_option], outputs=[original_output, info_output, translated_output] ) return demo # Launch the app if __name__ == "__main__": demo = create_interface() demo.launch(share=True)