File size: 7,144 Bytes
d24a2f3 74d8a08 8a409a5 21e6f34 d24a2f3 d6208ae 202112a 8a409a5 08c3547 d24a2f3 21e6f34 08c3547 8a409a5 893e301 21e6f34 893e301 21e6f34 893e301 21e6f34 893e301 21e6f34 893e301 21e6f34 d24a2f3 8a814dc ab46005 8a814dc 893e301 ab46005 8a814dc d24a2f3 893e301 21e6f34 893e301 21e6f34 893e301 21e6f34 893e301 21e6f34 893e301 fe92f5a 21e6f34 893e301 21e6f34 893e301 21e6f34 893e301 21e6f34 893e301 352553f 21e6f34 893e301 74d8a08 893e301 21e6f34 893e301 8a409a5 21e6f34 893e301 8a409a5 21e6f34 893e301 d6208ae 893e301 21e6f34 893e301 21e6f34 9814147 21e6f34 893e301 21e6f34 a30e87b 893e301 21e6f34 d6208ae 893e301 21e6f34 d6208ae 21e6f34 893e301 d6208ae 893e301 08c3547 893e301 21e6f34 8a409a5 21e6f34 893e301 8a409a5 21e6f34 8a409a5 c599c00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import os
import time
import tempfile
import uuid
import google.generativeai as genai
import requests
from flask import Flask, request, render_template, send_from_directory, url_for, flash
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.audio.io.AudioFileClip import AudioFileClip
from werkzeug.utils import secure_filename
from dotenv import load_dotenv
# --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
load_dotenv()
app = Flask(__name__)
# Load secrets from environment variables
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TTS_API_URL = os.getenv("TTS_API_URL")
# Validate required configurations
if not GEMINI_API_KEY:
raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
if not TTS_API_URL:
raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
# Configure Gemini AI
genai.configure(api_key=GEMINI_API_KEY)
# Configure directories
UPLOAD_FOLDER = 'uploads'
DOWNLOAD_FOLDER = 'downloads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB upload limit
app.secret_key = os.urandom(24) # Secure key for flash messages
# --- 2. APPLICATION CONFIGURATION ---
VOICE_CHOICES = {
"Male (Charon)": "Charon",
"Female (Zephyr)": "Zephyr"
}
GEMINI_PROMPT = """
You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
**CRITICAL INSTRUCTIONS:**
1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
**EXAMPLE OUTPUT:**
Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
"""
# --- 3. CORE APPLICATION FUNCTIONS ---
def generate_tamil_script(video_file_path):
"""Generates a Tamil script from the video using Gemini AI."""
print("Uploading video to Gemini for transcription...")
video_file = genai.upload_file(video_file_path, mime_type="video/mp4")
# Wait for file processing
while video_file.state.name == "PROCESSING":
time.sleep(5)
video_file = genai.get_file(video_file.name)
if video_file.state.name != "ACTIVE":
raise Exception(f"Gemini file processing failed: {video_file.state.name}")
print("Generating script...")
model = genai.GenerativeModel(model_name="models/gemini-2.5-flash")
response = model.generate_content([GEMINI_PROMPT, video_file])
genai.delete_file(video_file.name)
if hasattr(response, 'text') and response.text:
return " ".join(response.text.strip().splitlines())
raise Exception("No valid script was generated by Gemini.")
def generate_audio_track(script_text, voice_name, is_cheerful, output_path):
"""Generates audio from script using TTS API."""
print(f"Generating audio (Voice: {voice_name}, Cheerful: {is_cheerful})")
payload = {
"text": script_text,
"voice_name": voice_name,
"cheerful": is_cheerful
}
response = requests.post(TTS_API_URL, json=payload, timeout=300)
if response.status_code == 200:
with open(output_path, "wb") as f:
f.write(response.content)
return True
raise Exception(f"TTS API Error: {response.status_code} - {response.text}")
def replace_video_audio(video_path, new_audio_path, output_path):
"""Replaces the audio track of a video file."""
print("Replacing video audio...")
video_clip = None
audio_clip = None
try:
video_clip = VideoFileClip(video_path)
audio_clip = AudioFileClip(new_audio_path)
video_clip.audio = audio_clip
video_clip.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
logger='bar'
)
finally:
if audio_clip:
audio_clip.close()
if video_clip:
video_clip.close()
# --- 4. FLASK ROUTES ---
@app.route('/', methods=['GET'])
def index():
"""Render the main upload page."""
return render_template('index.html', voices=VOICE_CHOICES)
@app.route('/process', methods=['POST'])
def process_video():
"""Handle video upload and processing."""
input_video_path = None
temp_audio_path = None
try:
# Validate file upload
if 'video' not in request.files or request.files['video'].filename == '':
flash("Please upload a video file.", "error")
return render_template('index.html', voices=VOICE_CHOICES)
# Save uploaded file
file = request.files['video']
filename = secure_filename(file.filename)
input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(input_video_path)
# Get processing options
voice_choice = request.form.get('voice', 'Charon')
is_cheerful = request.form.get('tone') == 'on'
# Generate script and audio
script = generate_tamil_script(input_video_path)
# Create temporary audio file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
temp_audio_path = temp_audio.name
generate_audio_track(script, voice_choice, is_cheerful, temp_audio_path)
# Create dubbed video
final_video_name = f"dubbed_{filename}"
final_video_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_video_name)
replace_video_audio(input_video_path, temp_audio_path, final_video_path)
flash("Video processing complete!", "success")
return render_template(
'index.html',
voices=VOICE_CHOICES,
result_video=url_for('serve_video', filename=final_video_name),
script=script
)
except Exception as e:
print(f"Processing error: {str(e)}")
flash(f"An error occurred: {str(e)}", "error")
return render_template('index.html', voices=VOICE_CHOICES)
finally:
# Clean up temporary files
if input_video_path and os.path.exists(input_video_path):
os.remove(input_video_path)
if temp_audio_path and os.path.exists(temp_audio_path):
os.remove(temp_audio_path)
@app.route('/downloads/<filename>')
def serve_video(filename):
"""Serve the processed video file."""
return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
# --- 5. APPLICATION ENTRY POINT ---
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860) |