Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

Athspi commited on Mar 11

Commit

e4d42f1

verified ·

1 Parent(s): 9fafdf1

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -194

app.py CHANGED Viewed

@@ -1,231 +1,191 @@
 import os
-import re
 import google.generativeai as genai
 from moviepy.video.io.VideoFileClip import VideoFileClip
 import tempfile
 import logging
 import gradio as gr
-from datetime import timedelta
-from pydub import AudioSegment
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
 # Configure Gemini API
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
-model = genai.GenerativeModel("gemini-2.0-flash")
-# Supported languages
 SUPPORTED_LANGUAGES = [
-    "Auto Detect", "English", "Spanish", "French", "German", "Italian",
-    "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
-    "Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai"
 ]
-# Magic Prompts
-TRANSCRIPTION_PROMPT = """Generate precise subtitles with accurate timestamps:
-1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
-2. Each subtitle 3-7 words
-3. Include speaker changes
-4. Preserve emotional tone
-5. Example:
-[00:00:05.250 -> 00:00:08.100]
-Example subtitle text
-Return ONLY subtitles with timestamps."""
-TRANSLATION_PROMPT = """Translate these subtitles to {target_language}:
-1. Keep timestamps identical
-2. Match text length to timing
-3. Preserve technical terms
-4. Use natural speech patterns
-ORIGINAL:
-{subtitles}
-TRANSLATED:"""
-def extract_audio(video_path):
-    """Extract high-quality audio from video"""
-    video = VideoFileClip(video_path)
-    audio_path = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
-    video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
-    return audio_path
-def split_audio(audio_path, chunk_duration=60):
-    """Split audio into smaller chunks (default: 60 seconds)"""
-    audio = AudioSegment.from_wav(audio_path)
-    chunks = []
-    for i in range(0, len(audio), chunk_duration * 1000):
-        chunk = audio[i:i + chunk_duration * 1000]
-        chunk_path = os.path.join(tempfile.gettempdir(), f"chunk_{i//1000}.wav")
-        chunk.export(chunk_path, format="wav")
-        chunks.append(chunk_path)
-    return chunks
-def process_audio_chunk(chunk_path, start_time):
-    """Transcribe a single audio chunk"""
-    try:
-        # Upload file using Gemini's File API
-        uploaded_file = genai.upload_file(path=chunk_path)
-        # Get transcription
-        response = model.generate_content(
-            [TRANSCRIPTION_PROMPT, uploaded_file]
-        )
-        # Adjust timestamps relative to chunk start
-        adjusted_transcription = []
-        for line in response.text.splitlines():
-            if '->' in line:
-                start, end = line.split('->')
-                adjusted_start = parse_timestamp(start.strip()) + start_time
-                adjusted_end = parse_timestamp(end.strip()) + start_time
-                adjusted_line = f"[{format_timestamp(adjusted_start)} -> {format_timestamp(adjusted_end)}]"
-                adjusted_transcription.append(adjusted_line)
-            else:
-                adjusted_transcription.append(line)
-        return "\n".join(adjusted_transcription)
-    finally:
-        os.remove(chunk_path)
-def parse_timestamp(timestamp_str):
-    """Flexible timestamp parser"""
-    clean_ts = timestamp_str.strip("[] ").replace(',', '.')
-    parts = clean_ts.split(':')
-    seconds = 0.0
-    if len(parts) == 3:  # HH:MM:SS.ss
-        hours, minutes, seconds_part = parts
-        seconds += float(hours) * 3600
-    elif len(parts) == 2:  # MM:SS.ss
-        minutes, seconds_part = parts
-    else:
-        raise ValueError(f"Invalid timestamp: {timestamp_str}")
-    seconds += float(minutes) * 60
-    seconds += float(seconds_part)
-    return seconds
-def format_timestamp(seconds):
-    """Convert seconds to SRT format"""
-    return str(timedelta(seconds=seconds)).replace('.', ',')
-def create_srt(subtitles_text):
-    """Convert raw transcription to SRT format"""
-    entries = re.split(r'\n{2,}', subtitles_text.strip())
-    srt_output = []
-    for idx, entry in enumerate(entries, 1):
-        try:
-            time_match = re.search(
-                r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
-                entry
-            )
-            if not time_match:
-                continue
-            start_time = parse_timestamp(time_match.group(1))
-            end_time = parse_timestamp(time_match.group(2))
-            text = entry.split(']', 1)[-1].strip()
-            srt_entry = (
-                f"{idx}\n"
-                f"{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n"
-                f"{text}\n"
-            )
-            srt_output.append(srt_entry)
-        except Exception as e:
-            print(f"Skipping invalid entry {idx}: {str(e)}")
-            continue
-    return "\n".join(srt_output)
-def translate_subtitles(subtitles, target_lang):
-    """Translate subtitles while preserving timestamps"""
-    prompt = TRANSLATION_PROMPT.format(
-        target_language=target_lang,
-        subtitles=subtitles
-    )
-    response = model.generate_content(prompt)
-    return response.text
-def process_video(video_path, source_lang, target_lang):
-    """Complete processing pipeline"""
-    audio_path = None
-    try:
-        # Extract audio
-        audio_path = extract_audio(video_path)
-        # Split into chunks
-        chunks = split_audio(audio_path)
-        full_transcription = []
-        # Process each chunk
-        for i, chunk_path in enumerate(chunks):
-            start_time = i * 60  # 60 seconds per chunk
-            chunk_transcription = process_audio_chunk(chunk_path, start_time)
-            full_transcription.append(chunk_transcription)
-        # Combine results
-        srt_original = create_srt("\n\n".join(full_transcription))
-        # Save original subtitles
-        original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
-        with open(original_srt, "w") as f:
-            f.write(srt_original)
-        # Translate if needed
-        translated_srt = None
-        if target_lang != "None":
-            translated_text = translate_subtitles(srt_original, target_lang)
-            translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
-            with open(translated_srt, "w") as f:
-                f.write(create_srt(translated_text))
-        return original_srt, translated_srt
-    except Exception as e:
-        print(f"Processing error: {str(e)}")
-        return None, None
-    finally:
-        if audio_path and os.path.exists(audio_path):
-            os.remove(audio_path)
-# Gradio Interface
-with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
-    gr.Markdown("# 🎬 Professional Subtitle Generator")
-    with gr.Row():
-        video_input = gr.Video(label="Upload Video", sources=["upload"])
-        with gr.Column():
-            source_lang = gr.Dropdown(
-                label="Source Language",
                 choices=SUPPORTED_LANGUAGES,
-                value="Auto Detect"
             )
-            target_lang = gr.Dropdown(
                 label="Translate To",
-                choices=["None"] + SUPPORTED_LANGUAGES[1:],
-                value="None"
             )
-            process_btn = gr.Button("Generate", variant="primary")
-    with gr.Row():
-        original_sub = gr.File(label="Original Subtitles")
-        translated_sub = gr.File(label="Translated Subtitles")
-    process_btn.click(
         process_video,
-        inputs=[video_input, source_lang, target_lang],
-        outputs=[original_sub, translated_sub]
     )
-if __name__ == "__main__":
-    app.launch(server_port=7860, share=True)

 import os
 import google.generativeai as genai
 from moviepy.video.io.VideoFileClip import VideoFileClip
 import tempfile
 import logging
 import gradio as gr
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
 # Configure Gemini API
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+# Create the Gemini model
+generation_config = {
+    "temperature": 0.7,  # Lower temperature for more accurate results
+    "top_p": 0.9,
+    "top_k": 40,
+    "max_output_tokens": 8192,
+    "response_mime_type": "text/plain",
+}
+model = genai.GenerativeModel(
+    model_name="gemini-2.0-flash-exp",
+    generation_config=generation_config,
+)
+# List of all supported languages
 SUPPORTED_LANGUAGES = [
+    "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
+    "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
+    "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
+    "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
+    "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
+    "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
+    "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
+    "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
+    "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
+    "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
+    "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
+    "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
+    "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
+    "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
+    "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
+    "Sundanese"
 ]
+def extract_audio_from_video(video_file):
+    """Extract audio from a video file and save it as a WAV file."""
+    video = VideoFileClip(video_file)
+    audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
+    video.audio.write_audiofile(audio_file, fps=16000, logger=None)  # Suppress logs
+    return audio_file
+def transcribe_audio_with_gemini(audio_file):
+    """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
+    with open(audio_file, "rb") as f:
+        audio_data = f.read()
+    # Create proper audio blob
+    audio_blob = {
+        'mime_type': 'audio/wav',
+        'data': audio_data
+    }
+    # Magic prompt for transcription with timestamps
+    prompt = """
+    You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
+    Include timestamps for each sentence in the following format:
+    [HH:MM:SS] Sentence 1
+    [HH:MM:SS] Sentence 2
+    ...
+    Respond only with the transcription and timestamps. Do not add explanations or extra text.
+    """
+    # Transcribe audio
+    convo = model.start_chat()
+    convo.send_message(prompt)
+    response = convo.send_message(audio_blob)
+    return response.text.strip()
+def generate_subtitles(transcription):
+    """Generate SRT subtitles from transcription with timestamps."""
+    lines = transcription.split("\n")
+    srt_subtitles = ""
+    for i, line in enumerate(lines, start=1):
+        if not line.strip():
+            continue
+        # Extract timestamp and text
+        if line.startswith("["):
+            timestamp = line.split("]")[0] + "]"
+            text = line.split("]")[1].strip()
+        else:
+            timestamp = "[00:00:00]"
+            text = line.strip()
+        # Convert timestamp to SRT format
+        start_time = timestamp[1:-1]  # Remove brackets
+        end_time = "00:00:05"  # Placeholder: 5 seconds per line
+        srt_subtitles += f"{i}\n{start_time},000 --> {end_time},000\n{text}\n\n"
+    return srt_subtitles
+def translate_srt(srt_text, target_language):
+    """Translate an SRT file while preserving timestamps using a magic prompt."""
+    # Magic prompt for translation
+    prompt = f"""
+    Translate the following SRT subtitles into {target_language}.
+    Preserve the SRT format (timestamps and structure).
+    Translate only the text after the timestamp.
+    Do not add explanations or extra text.
+    Ensure the translation is accurate and culturally appropriate.
+    Here is the SRT file:
+    {srt_text}
+    """
+    response = model.generate_content(prompt)
+    return response.text
+def process_video(video_file, language="Auto Detect", translate_to=None):
+    """Process a video file to generate and translate subtitles."""
+    # Extract audio from the video
+    audio_file = extract_audio_from_video(video_file)
+    # Transcribe audio using Gemini
+    transcription = transcribe_audio_with_gemini(audio_file)
+    # Generate subtitles
+    subtitles = generate_subtitles(transcription)
+    # Save original subtitles to an SRT file
+    original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
+    with open(original_srt_file, "w", encoding="utf-8") as f:
+        f.write(subtitles)
+    # Translate subtitles if a target language is provided
+    translated_srt_file = None
+    if translate_to and translate_to != "None":
+        translated_subtitles = translate_srt(subtitles, translate_to)
+        translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
+        with open(translated_srt_file, "w", encoding="utf-8") as f:
+            f.write(translated_subtitles)
+    # Clean up extracted audio file
+    os.remove(audio_file)
+    return original_srt_file, translated_srt_file, "Detected Language: Auto"
+# Define the Gradio interface
+with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
+    # Header
+    with gr.Column():
+        gr.Markdown("# 🎥 AutoSubGen")
+        gr.Markdown("### AI-Powered Video Subtitle Generator")
+        gr.Markdown("Automatically generate and translate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
+    # Main content
+    with gr.Tab("Generate Subtitles"):
+        gr.Markdown("### Upload a video file to generate subtitles.")
+        with gr.Row():
+            video_input = gr.Video(label="Upload Video File", scale=2)
+            language_dropdown = gr.Dropdown(
                 choices=SUPPORTED_LANGUAGES,
+                label="Select Language",
+                value="Auto Detect",
+                scale=1
             )
+            translate_to_dropdown = gr.Dropdown(
+                choices=["None"] + SUPPORTED_LANGUAGES[1:],  # Exclude "Auto Detect"
                 label="Translate To",
+                value="None",
+                scale=1
             )
+        generate_button = gr.Button("Generate Subtitles", variant="primary")
+        with gr.Row():
+            original_subtitle_output = gr.File(label="Download Original Subtitles (SRT)")
+            translated_subtitle_output = gr.File(label="Download Translated Subtitles (SRT)")
+        detected_language_output = gr.Textbox(label="Detected Language")
+    # Link button to function
+    generate_button.click(
         process_video,
+        inputs=[video_input, language_dropdown, translate_to_dropdown],
+        outputs=[original_subtitle_output, translated_subtitle_output, detected_language_output]
     )
+# Launch the Gradio interface with a public link
+demo.launch(share=True)