Spaces:

Nick021402
/

SubGen

Sleeping

App Files Files Community

Nick021402 commited on Jun 12

Commit

8a5a458

verified ·

1 Parent(s): 6e5f830

Create App.py

Browse files

Files changed (1) hide show

App.py +289 -0

App.py ADDED Viewed

	@@ -0,0 +1,289 @@

+# app.py - Main Gradio application
+import gradio as gr
+import whisper
+import torch
+from transformers import MarianMTModel, MarianTokenizer
+import yt_dlp
+import os
+import tempfile
+import subprocess
+from pathlib import Path
+import re
+class SubtitleTranslator:
+    def __init__(self):
+        # Use the smallest Whisper model for speed
+        self.whisper_model = whisper.load_model("tiny")
+        # Translation model cache
+        self.translation_models = {}
+        self.tokenizers = {}
+    def download_youtube_audio(self, url):
+        """Download audio from YouTube video"""
+        try:
+            ydl_opts = {
+                'format': 'bestaudio/best',
+                'outtmpl': 'temp_audio.%(ext)s',
+                'postprocessors': [{
+                    'key': 'FFmpegExtractAudio',
+                    'preferredcodec': 'mp3',
+                    'preferredquality': '192',
+                }],
+            }
+            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                ydl.download([url])
+            # Find the downloaded file
+            for file in os.listdir('.'):
+                if file.startswith('temp_audio') and file.endswith('.mp3'):
+                    return file
+            return None
+        except Exception as e:
+            return None
+    def extract_audio_from_video(self, video_path):
+        """Extract audio from uploaded video file"""
+        try:
+            audio_path = "temp_extracted_audio.wav"
+            cmd = [
+                'ffmpeg', '-i', video_path,
+                '-acodec', 'pcm_s16le',
+                '-ac', '1',
+                '-ar', '16000',
+                audio_path, '-y'
+            ]
+            subprocess.run(cmd, check=True, capture_output=True)
+            return audio_path
+        except Exception as e:
+            return None
+    def transcribe_audio(self, audio_path):
+        """Transcribe audio using Whisper"""
+        result = self.whisper_model.transcribe(audio_path)
+        return result
+    def get_translation_model(self, source_lang, target_lang="en"):
+        """Load translation model for language pair"""
+        model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
+        try:
+            if model_name not in self.translation_models:
+                self.tokenizers[model_name] = MarianTokenizer.from_pretrained(model_name)
+                self.translation_models[model_name] = MarianMTModel.from_pretrained(model_name)
+            return self.translation_models[model_name], self.tokenizers[model_name]
+        except:
+            # Fallback to multilingual model
+            fallback_model = "Helsinki-NLP/opus-mt-mul-en"
+            if fallback_model not in self.translation_models:
+                self.tokenizers[fallback_model] = MarianTokenizer.from_pretrained(fallback_model)
+                self.translation_models[fallback_model] = MarianMTModel.from_pretrained(fallback_model)
+            return self.translation_models[fallback_model], self.tokenizers[fallback_model]
+    def translate_text(self, text, source_lang, target_lang="en"):
+        """Translate text using MarianMT"""
+        if source_lang == target_lang:
+            return text
+        try:
+            model, tokenizer = self.get_translation_model(source_lang, target_lang)
+            inputs = tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=512)
+            translated = model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
+            return tokenizer.decode(translated[0], skip_special_tokens=True)
+        except:
+            return text  # Return original if translation fails
+    def format_timestamp(self, seconds):
+        """Convert seconds to SRT timestamp format"""
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = int(seconds % 60)
+        millisecs = int((seconds % 1) * 1000)
+        return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
+    def create_srt(self, segments, source_lang):
+        """Create SRT subtitle content"""
+        srt_content = ""
+        for i, segment in enumerate(segments, 1):
+            start_time = self.format_timestamp(segment['start'])
+            end_time = self.format_timestamp(segment['end'])
+            original_text = segment['text'].strip()
+            translated_text = self.translate_text(original_text, source_lang, "en")
+            srt_content += f"{i}\n"
+            srt_content += f"{start_time} --> {end_time}\n"
+            srt_content += f"{translated_text}\n\n"
+        return srt_content
+    def process_video(self, video_input, youtube_url):
+        """Main processing function"""
+        try:
+            # Determine input source
+            if youtube_url and youtube_url.strip():
+                audio_path = self.download_youtube_audio(youtube_url.strip())
+                if not audio_path:
+                    return "Error: Could not download YouTube video", None
+            elif video_input:
+                audio_path = self.extract_audio_from_video(video_input)
+                if not audio_path:
+                    return "Error: Could not extract audio from video", None
+            else:
+                return "Please provide either a video file or YouTube URL", None
+            # Transcribe audio
+            result = self.transcribe_audio(audio_path)
+            # Detect language
+            detected_lang = result.get('language', 'unknown')
+            # Language code mapping for translation models
+            lang_mapping = {
+                'spanish': 'es', 'french': 'fr', 'german': 'de', 'italian': 'it',
+                'portuguese': 'pt', 'russian': 'ru', 'chinese': 'zh', 'japanese': 'ja',
+                'korean': 'ko', 'arabic': 'ar', 'hindi': 'hi', 'dutch': 'nl',
+                'swedish': 'sv', 'norwegian': 'no', 'danish': 'da', 'finnish': 'fi'
+            }
+            source_lang_code = lang_mapping.get(detected_lang, detected_lang)
+            # Create SRT content
+            srt_content = self.create_srt(result['segments'], source_lang_code)
+            # Save SRT file
+            srt_filename = "translated_subtitles.srt"
+            with open(srt_filename, 'w', encoding='utf-8') as f:
+                f.write(srt_content)
+            # Clean up temporary files
+            if os.path.exists(audio_path):
+                os.remove(audio_path)
+            status_msg = f"✅ Processing complete!\n"
+            status_msg += f"🔍 Detected language: {detected_lang}\n"
+            status_msg += f"📝 Generated {len(result['segments'])} subtitle segments\n"
+            status_msg += f"🌍 Translated to English"
+            return status_msg, srt_filename
+        except Exception as e:
+            return f"Error during processing: {str(e)}", None
+# Initialize the translator
+translator = SubtitleTranslator()
+# Create Gradio interface
+def process_video_interface(video_file, youtube_url, progress=gr.Progress()):
+    progress(0.1, desc="Starting processing...")
+    progress(0.3, desc="Extracting audio...")
+    result = translator.process_video(video_file, youtube_url)
+    progress(0.7, desc="Transcribing and translating...")
+    progress(1.0, desc="Complete!")
+    return result
+# Custom CSS for better UI
+css = """
+.gradio-container {
+    max-width: 900px !important;
+}
+.title {
+    text-align: center;
+    color: #2563eb;
+    font-size: 2.5rem;
+    font-weight: bold;
+    margin-bottom: 1rem;
+}
+.subtitle {
+    text-align: center;
+    color: #64748b;
+    font-size: 1.2rem;
+    margin-bottom: 2rem;
+}
+.feature-box {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    color: white;
+    padding: 1rem;
+    border-radius: 10px;
+    margin: 1rem 0;
+}
+"""
+# Create the Gradio app
+with gr.Blocks(css=css, title="Video Subtitle Translator") as app:
+    gr.HTML("""
+    <div class="title">🎬 Video Subtitle Translator</div>
+    <div class="subtitle">Generate English subtitles from any language video using AI</div>
+    """)
+    with gr.Row():
+        with gr.Column():
+            gr.HTML("""
+            <div class="feature-box">
+                <h3>🚀 Features:</h3>
+                <ul>
+                    <li>📹 Upload video files or paste YouTube links</li>
+                    <li>🎯 Automatic speech recognition with Whisper AI</li>
+                    <li>🌍 Auto-detect source language</li>
+                    <li>📝 Generate accurate English subtitles</li>
+                    <li>⏱️ Perfect timing synchronization</li>
+                    <li>💾 Download ready-to-use SRT files</li>
+                </ul>
+            </div>
+            """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            video_input = gr.File(
+                label="📁 Upload Video File",
+                file_types=[".mp4", ".avi", ".mov", ".mkv", ".webm", ".m4v"],
+                type="filepath"
+            )
+            youtube_input = gr.Textbox(
+                label="🔗 Or paste YouTube URL",
+                placeholder="https://www.youtube.com/watch?v=...",
+                lines=1
+            )
+            process_btn = gr.Button(
+                "🚀 Generate Subtitles",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            status_output = gr.Textbox(
+                label="📊 Processing Status",
+                lines=6,
+                interactive=False
+            )
+            srt_output = gr.File(
+                label="💾 Download SRT File",
+                interactive=False
+            )
+    gr.HTML("""
+    <div style="text-align: center; margin-top: 2rem; color: #64748b;">
+        <p>⚡ Powered by Whisper AI & MarianMT | 🤗 Running on Hugging Face Spaces</p>
+        <p>💡 Tip: For best results, use videos with clear audio and minimal background noise</p>
+    </div>
+    """)
+    # Connect the processing function
+    process_btn.click(
+        fn=process_video_interface,
+        inputs=[video_input, youtube_input],
+        outputs=[status_output, srt_output],
+        show_progress=True
+    )
+if __name__ == "__main__":
+    app.launch()