Spaces:

Athspi-ai
/

Translate

Running

App Files Files Community

Athspi commited on 5 days ago

Commit

d24a2f3

verified ·

1 Parent(s): c605b09

Create app.py

Browse files

Files changed (1) hide show

app.py +221 -0

app.py ADDED Viewed

	@@ -0,0 +1,221 @@

+# Step 1: Install required libraries
+# Run this command in your terminal or notebook before running the script:
+# pip install gradio requests google-generativeai moviepy pydub
+# Step 2: Import libraries
+import os
+import time
+import re
+import google.generativeai as genai
+import requests
+import gradio as gr
+import tempfile
+from pydub import AudioSegment
+from moviepy.editor import VideoFileClip, AudioFileClip
+from google.generativeai.types import HarmCategory, HarmBlockThreshold
+# --- SECRET MANAGEMENT ---
+# Step 3: Load secrets from the environment
+# This securely loads the API key and URL you set in the Colab/Hugging Face secrets manager.
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+TTS_API_URL = os.getenv("TTS_API_URL", "https://athspi-aitools-aittsg.hf.space/api/generate-tts/") # Default fallback
+# Check if the secrets were loaded correctly
+if not GEMINI_API_KEY:
+    raise ValueError("GEMINI_API_KEY secret not found! Please set it in your environment or Colab/Hugging Face secrets.")
+if not TTS_API_URL:
+    raise ValueError("TTS_API_URL secret not found or empty! Please set it.")
+# Configure the Gemini API with the loaded key
+genai.configure(api_key=GEMINI_API_KEY)
+# Step 4: Define Voice Choices
+VOICE_CHOICES = {
+    "Male (Charon)": "Charon",
+    "Female (Zephyr)": "Zephyr"
+}
+# Step 5: "Single Narrator" Magic Prompt for Gemini
+GEMINI_PROMPT = """
+You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
+**CRITICAL INSTRUCTIONS:**
+1.  **Single Script:** Combine all dialogue from all speakers into one continuous script. The final output should be a single paragraph of text.
+2.  **NO Timestamps:** Do NOT include any timestamps (e.g., [00:01 - 00:03]).
+3.  **NO Speaker Labels:** Do NOT include any speaker labels or identifiers (e.g., ஆண்_1, பெண்_2).
+4.  **Incorporate Performance:** To make the script expressive for a single narrator, add English style prompts and performance tags directly into the text.
+    -   Use style prompts like `Say happily:`, `Whisper mysteriously:`, `Shout angrily:`.
+    -   Use performance tags like `[laugh]`, `[singing]`, `[sigh]`.
+**EXAMPLE OUTPUT:**
+Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும். Shout angrily: உடனே இங்கிருந்து போ!
+"""
+# Step 6: Function to upload files to Gemini
+def upload_to_gemini(path, mime_type=None):
+    """Uploads the given file to Gemini."""
+    print(f"Uploading file: {path}")
+    file = genai.upload_file(path, mime_type=mime_type)
+    print(f"Uploaded file '{file.display_name}' as: {file.uri}")
+    return file
+# Step 7: Function to wait for files to be active
+def wait_for_files_active(files):
+    """Waits for the given files to be active."""
+    print("Waiting for file processing...")
+    for name in (file.name for file in files):
+        file = genai.get_file(name)
+        while file.state.name == "PROCESSING":
+            print(".", end="", flush=True)
+            time.sleep(10)
+            file = genai.get_file(name)
+        if file.state.name != "ACTIVE":
+            raise Exception(f"File {file.name} failed to process")
+    print("...all files ready")
+    print()
+# Step 8: Function to generate the single Tamil script
+def generate_tamil_script(video_file_path):
+    """Generates a single, continuous Tamil script from the video."""
+    try:
+        video_file = upload_to_gemini(video_file_path, mime_type="video/mp4")
+        wait_for_files_active([video_file])
+        model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
+        print("Generating single narrator script...")
+        response = model.generate_content(
+            [GEMINI_PROMPT, video_file],
+            request_options={"timeout": 1000},
+             safety_settings={
+                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+            }
+        )
+        print(f"Deleting uploaded file from Gemini: {video_file.name}")
+        genai.delete_file(video_file.name)
+        if response.text:
+            return " ".join(response.text.strip().splitlines())
+        else:
+            return "Error: No valid script was generated by Gemini."
+    except Exception as e:
+        return f"Error in Gemini generation: {str(e)}"
+# Step 9: Simplified function to generate a single audio file
+def generate_single_audio_track(dialogue_text, voice_name, is_cheerful, output_path):
+    """Generates one continuous audio track for the entire script."""
+    try:
+        print(f"Generating single audio track with voice '{voice_name}' | Cheerful: {is_cheerful}")
+        payload = {
+            "text": dialogue_text,
+            "voice_name": voice_name,
+            "cheerful": is_cheerful
+        }
+        response = requests.post(TTS_API_URL, json=payload)
+        if response.status_code == 200:
+            with open(output_path, "wb") as f:
+                f.write(response.content)
+            print(f"Audio track saved successfully to {output_path}")
+            return True
+        else:
+            print(f"Error from TTS API: {response.status_code} - {response.text}")
+            return False
+    except Exception as e:
+        print(f"An error occurred in generate_single_audio_track: {str(e)}")
+        return False
+# Step 10: Function to replace video audio
+def replace_video_audio(video_path, new_audio_path, output_path):
+    """Replaces the audio of a video with a new audio file."""
+    try:
+        video_clip = VideoFileClip(video_path)
+        audio_clip = AudioFileClip(new_audio_path)
+        final_clip = video_clip.set_audio(audio_clip)
+        final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
+        video_clip.close()
+        audio_clip.close()
+        final_clip.close()
+        return output_path
+    except Exception as e:
+        return f"Error replacing video audio: {str(e)}"
+# Step 11: Main processing function
+def process_video_single_speaker(video_path, voice_choice, is_cheerful):
+    """Processes the video for single-speaker dubbing."""
+    if not video_path:
+        return None, "Please upload a video file first."
+    try:
+        print("-" * 50)
+        print(f"Starting single-speaker processing for: {video_path}")
+        script = generate_tamil_script(video_path)
+        if script.startswith("Error:"):
+            return None, script
+        print("\n--- Generated Script ---\n", script, "\n------------------------\n")
+        voice_name = VOICE_CHOICES[voice_choice]
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            temp_audio_path = temp_audio.name
+        success = generate_single_audio_track(script, voice_name, is_cheerful, temp_audio_path)
+        if not success:
+            return None, "Failed to generate the audio track."
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
+            output_video_path = temp_video.name
+        result = replace_video_audio(video_path, temp_audio_path, output_video_path)
+        if isinstance(result, str) and result.startswith("Error:"):
+            return None, result
+        os.remove(temp_audio_path)
+        print("Processing complete!")
+        print("-" * 50)
+        return result, script
+    except Exception as e:
+        return None, f"An unexpected error occurred: {str(e)}"
+# Step 12: Gradio Interface
+def create_gradio_interface():
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# AI Single-Speaker Video Dubbing")
+        gr.Markdown("Upload a video and choose a voice. The AI will transcribe all speech into a single script and re-voice the entire video with the selected narrator.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                video_input = gr.Video(label="Upload Video File")
+                voice_selector = gr.Radio(
+                    list(VOICE_CHOICES.keys()),
+                    label="Select Narrator Voice",
+                    value="Male (Charon)"
+                )
+                cheerful_checkbox = gr.Checkbox(label="Enable Cheerful Tone", value=False)
+                process_button = gr.Button("Generate Dubbed Video", variant="primary")
+            with gr.Column(scale=1):
+                video_output = gr.Video(label="Dubbed Video")
+                dialogue_output = gr.Textbox(label="Generated Full Script", lines=15, interactive=False)
+        process_button.click(
+            fn=process_video_single_speaker,
+            inputs=[video_input, voice_selector, cheerful_checkbox],
+            outputs=[video_output, dialogue_output]
+        )
+    return demo
+# Step 13: Launch the Gradio app
+if __name__ == "__main__":
+    gradio_app = create_gradio_interface()
+    gradio_app.launch(debug=True)