Spaces:

Athspi-ai
/

Translate

Running

App Files Files Community

Athspi commited on 5 days ago

Commit

8a409a5

verified ·

1 Parent(s): 9fd5ca4

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -183

app.py CHANGED Viewed

@@ -1,221 +1,159 @@
-# Step 1: Install required libraries
-# Run this command in your terminal or notebook before running the script:
-# pip install gradio requests google-generativeai moviepy pydub
-# Step 2: Import libraries
 import os
-import time
-import re
 import google.generativeai as genai
 import requests
-import gradio as gr
-import tempfile
-from pydub import AudioSegment
 from moviepy.editor import VideoFileClip, AudioFileClip
 from google.generativeai.types import HarmCategory, HarmBlockThreshold
-# --- SECRET MANAGEMENT ---
-# Step 3: Load secrets from the environment
-# This securely loads the API key and URL you set in the Colab/Hugging Face secrets manager.
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-TTS_API_URL = os.getenv("TTS_API_URL", "https://athspi-aitools-aittsg.hf.space/api/generate-tts/") # Default fallback
-# Check if the secrets were loaded correctly
 if not GEMINI_API_KEY:
-    raise ValueError("GEMINI_API_KEY secret not found! Please set it in your environment or Colab/Hugging Face secrets.")
 if not TTS_API_URL:
-    raise ValueError("TTS_API_URL secret not found or empty! Please set it.")
 # Configure the Gemini API with the loaded key
 genai.configure(api_key=GEMINI_API_KEY)
-# Step 4: Define Voice Choices
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
 }
-# Step 5: "Single Narrator" Magic Prompt for Gemini
 GEMINI_PROMPT = """
 You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 **CRITICAL INSTRUCTIONS:**
-1.  **Single Script:** Combine all dialogue from all speakers into one continuous script. The final output should be a single paragraph of text.
-2.  **NO Timestamps:** Do NOT include any timestamps (e.g., [00:01 - 00:03]).
-3.  **NO Speaker Labels:** Do NOT include any speaker labels or identifiers (e.g., ஆண்_1, பெண்_2).
-4.  **Incorporate Performance:** To make the script expressive for a single narrator, add English style prompts and performance tags directly into the text.
-    -   Use style prompts like `Say happily:`, `Whisper mysteriously:`, `Shout angrily:`.
-    -   Use performance tags like `[laugh]`, `[singing]`, `[sigh]`.
 **EXAMPLE OUTPUT:**
-Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும். Shout angrily: உடனே இங்கிருந்து போ!
 """
-# Step 6: Function to upload files to Gemini
-def upload_to_gemini(path, mime_type=None):
-    """Uploads the given file to Gemini."""
-    print(f"Uploading file: {path}")
-    file = genai.upload_file(path, mime_type=mime_type)
-    print(f"Uploaded file '{file.display_name}' as: {file.uri}")
-    return file
-# Step 7: Function to wait for files to be active
-def wait_for_files_active(files):
-    """Waits for the given files to be active."""
-    print("Waiting for file processing...")
-    for name in (file.name for file in files):
-        file = genai.get_file(name)
-        while file.state.name == "PROCESSING":
-            print(".", end="", flush=True)
-            time.sleep(10)
-            file = genai.get_file(name)
-        if file.state.name != "ACTIVE":
-            raise Exception(f"File {file.name} failed to process")
-    print("...all files ready")
-    print()
-# Step 8: Function to generate the single Tamil script
 def generate_tamil_script(video_file_path):
     """Generates a single, continuous Tamil script from the video."""
-    try:
-        video_file = upload_to_gemini(video_file_path, mime_type="video/mp4")
-        wait_for_files_active([video_file])
-        model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
-        print("Generating single narrator script...")
-        response = model.generate_content(
-            [GEMINI_PROMPT, video_file],
-            request_options={"timeout": 1000},
-             safety_settings={
-                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-            }
-        )
-        print(f"Deleting uploaded file from Gemini: {video_file.name}")
-        genai.delete_file(video_file.name)
-        if response.text:
-            return " ".join(response.text.strip().splitlines())
-        else:
-            return "Error: No valid script was generated by Gemini."
-    except Exception as e:
-        return f"Error in Gemini generation: {str(e)}"
-# Step 9: Simplified function to generate a single audio file
 def generate_single_audio_track(dialogue_text, voice_name, is_cheerful, output_path):
     """Generates one continuous audio track for the entire script."""
-    try:
-        print(f"Generating single audio track with voice '{voice_name}' | Cheerful: {is_cheerful}")
-        payload = {
-            "text": dialogue_text,
-            "voice_name": voice_name,
-            "cheerful": is_cheerful
-        }
-        response = requests.post(TTS_API_URL, json=payload)
-        if response.status_code == 200:
-            with open(output_path, "wb") as f:
-                f.write(response.content)
-            print(f"Audio track saved successfully to {output_path}")
-            return True
-        else:
-            print(f"Error from TTS API: {response.status_code} - {response.text}")
-            return False
-    except Exception as e:
-        print(f"An error occurred in generate_single_audio_track: {str(e)}")
-        return False
-# Step 10: Function to replace video audio
 def replace_video_audio(video_path, new_audio_path, output_path):
     """Replaces the audio of a video with a new audio file."""
-    try:
-        video_clip = VideoFileClip(video_path)
-        audio_clip = AudioFileClip(new_audio_path)
-        final_clip = video_clip.set_audio(audio_clip)
-        final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
-        video_clip.close()
-        audio_clip.close()
-        final_clip.close()
-        return output_path
-    except Exception as e:
-        return f"Error replacing video audio: {str(e)}"
-# Step 11: Main processing function
-def process_video_single_speaker(video_path, voice_choice, is_cheerful):
-    """Processes the video for single-speaker dubbing."""
-    if not video_path:
-        return None, "Please upload a video file first."
-    try:
-        print("-" * 50)
-        print(f"Starting single-speaker processing for: {video_path}")
-        script = generate_tamil_script(video_path)
-        if script.startswith("Error:"):
-            return None, script
-        print("\n--- Generated Script ---\n", script, "\n------------------------\n")
         voice_name = VOICE_CHOICES[voice_choice]
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
-            temp_audio_path = temp_audio.name
-        success = generate_single_audio_track(script, voice_name, is_cheerful, temp_audio_path)
-        if not success:
-            return None, "Failed to generate the audio track."
-        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_video:
-            output_video_path = temp_video.name
-        result = replace_video_audio(video_path, temp_audio_path, output_video_path)
-        if isinstance(result, str) and result.startswith("Error:"):
-            return None, result
-        os.remove(temp_audio_path)
-        print("Processing complete!")
-        print("-" * 50)
-        return result, script
-    except Exception as e:
-        return None, f"An unexpected error occurred: {str(e)}"
-# Step 12: Gradio Interface
-def create_gradio_interface():
-    with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# AI Single-Speaker Video Dubbing")
-        gr.Markdown("Upload a video and choose a voice. The AI will transcribe all speech into a single script and re-voice the entire video with the selected narrator.")
-        with gr.Row():
-            with gr.Column(scale=1):
-                video_input = gr.Video(label="Upload Video File")
-                voice_selector = gr.Radio(
-                    list(VOICE_CHOICES.keys()),
-                    label="Select Narrator Voice",
-                    value="Male (Charon)"
-                )
-                cheerful_checkbox = gr.Checkbox(label="Enable Cheerful Tone", value=False)
-                process_button = gr.Button("Generate Dubbed Video", variant="primary")
-            with gr.Column(scale=1):
-                video_output = gr.Video(label="Dubbed Video")
-                dialogue_output = gr.Textbox(label="Generated Full Script", lines=15, interactive=False)
-        process_button.click(
-            fn=process_video_single_speaker,
-            inputs=[video_input, voice_selector, cheerful_checkbox],
-            outputs=[video_output, dialogue_output]
-        )
-    return demo
-# Step 13: Launch the Gradio app
-if __name__ == "__main__":
-    gradio_app = create_gradio_interface()
-    gradio_app.launch(debug=True)

 import os
+import tempfile
 import google.generativeai as genai
 import requests
+from flask import Flask, request, render_template, send_from_directory, url_for
 from moviepy.editor import VideoFileClip, AudioFileClip
+from pydub import AudioSegment
+from werkzeug.utils import secure_filename
 from google.generativeai.types import HarmCategory, HarmBlockThreshold
+# --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
+app = Flask(__name__)
+# Load secrets from environment variables
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+TTS_API_URL = os.getenv("TTS_API_URL")
+# Check if secrets were loaded correctly
 if not GEMINI_API_KEY:
+    raise ValueError("GEMINI_API_KEY secret not found! Please set it as an environment variable.")
 if not TTS_API_URL:
+    raise ValueError("TTS_API_URL secret not found! Please set it as an environment variable.")
 # Configure the Gemini API with the loaded key
 genai.configure(api_key=GEMINI_API_KEY)
+# Configure directories for file uploads and downloads
+UPLOAD_FOLDER = 'uploads'
+DOWNLOAD_FOLDER = 'downloads'
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
+# --- 2. DEFINE VOICE CHOICES AND GEMINI PROMPT ---
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
 }
 GEMINI_PROMPT = """
 You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 **CRITICAL INSTRUCTIONS:**
+1.  **Single Script:** Combine all dialogue from all speakers into one continuous script.
+2.  **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
+3.  **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text.
 **EXAMPLE OUTPUT:**
+Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
 """
+# --- 3. HELPER FUNCTIONS (CORE LOGIC) ---
 def generate_tamil_script(video_file_path):
     """Generates a single, continuous Tamil script from the video."""
+    print("Uploading file to Gemini for transcription...")
+    video_file = genai.upload_file(video_file_path, mime_type="video/mp4")
+    print("Waiting for file processing...")
+    while video_file.state.name == "PROCESSING":
+        time.sleep(10)
+        video_file = genai.get_file(video_file.name)
+    if video_file.state.name != "ACTIVE":
+        raise Exception(f"File {video_file.name} failed to process")
+    print("Generating single narrator script...")
+    model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
+    response = model.generate_content([GEMINI_PROMPT, video_file])
+    genai.delete_file(video_file.name)
+    print("Deleted file from Gemini.")
+    if response.text:
+        return " ".join(response.text.strip().splitlines())
+    raise Exception("No valid script was generated by Gemini.")
 def generate_single_audio_track(dialogue_text, voice_name, is_cheerful, output_path):
     """Generates one continuous audio track for the entire script."""
+    print(f"Generating audio with voice '{voice_name}' | Cheerful: {is_cheerful}")
+    payload = {"text": dialogue_text, "voice_name": voice_name, "cheerful": is_cheerful}
+    response = requests.post(TTS_API_URL, json=payload)
+    if response.status_code == 200:
+        with open(output_path, "wb") as f:
+            f.write(response.content)
+        print(f"Audio track saved successfully to {output_path}")
+        return True
+    raise Exception(f"Error from TTS API: {response.status_code} - {response.text}")
 def replace_video_audio(video_path, new_audio_path, output_path):
     """Replaces the audio of a video with a new audio file."""
+    print("Replacing video audio...")
+    video_clip = VideoFileClip(video_path)
+    audio_clip = AudioFileClip(new_audio_path)
+    final_clip = video_clip.set_audio(audio_clip)
+    final_clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
+    video_clip.close()
+    audio_clip.close()
+    final_clip.close()
+    print(f"Final video saved to {output_path}")
+# --- 4. FLASK ROUTES ---
+@app.route('/', methods=['GET'])
+def index():
+    """Renders the main upload page."""
+    return render_template('index.html')
+@app.route('/process', methods=['POST'])
+def process_video():
+    """Handles the video upload and dubbing process."""
+    if 'video' not in request.files:
+        return "No video file part", 400
+    file = request.files['video']
+    if file.filename == '':
+        return "No selected file", 400
+    if file:
+        filename = secure_filename(file.filename)
+        upload_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(upload_path)
+        voice_choice = request.form['voice_choice']
+        is_cheerful = 'cheerful' in request.form
         voice_name = VOICE_CHOICES[voice_choice]
+        try:
+            # Generate the script
+            script = generate_tamil_script(upload_path)
+            # Generate the audio track
+            temp_audio_path = tempfile.mktemp(suffix=".wav")
+            generate_single_audio_track(script, voice_name, is_cheerful, temp_audio_path)
+            # Create the final video
+            final_video_name = f"dubbed_{filename}"
+            final_video_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_video_name)
+            replace_video_audio(upload_path, temp_audio_path, final_video_path)
+            # Clean up temporary audio file
+            os.remove(temp_audio_path)
+            # Render the same page but now with the results
+            return render_template('index.html',
+                                   result_video=url_for('serve_video', filename=final_video_name),
+                                   script=script)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return f"An error occurred during processing: {e}", 500
+@app.route('/downloads/<filename>')
+def serve_video(filename):
+    """Serves the final dubbed video from the downloads directory."""
+    return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
+if __name__ == '__main__':
+    # Use host='0.0.0.0' to make it accessible on your local network
+    app.run(debug=True, host='0.0.0.0', port=5001)