Translate / app.py
Athspi's picture
Update app.py
dd03a74 verified
raw
history blame
6.67 kB
import os
import time
import tempfile
import uuid
import google.generativeai as genai
import requests
from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.audio.io.AudioFileClip import AudioFileClip
from werkzeug.utils import secure_filename
from dotenv import load_dotenv
# Initialize Flask app
load_dotenv()
app = Flask(__name__, template_folder='templates', static_folder='static')
# Configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TTS_API_URL = os.getenv("TTS_API_URL")
if not GEMINI_API_KEY or not TTS_API_URL:
raise ValueError("Missing required environment variables")
genai.configure(api_key=GEMINI_API_KEY)
# File storage setup
UPLOAD_FOLDER = 'uploads'
DOWNLOAD_FOLDER = 'downloads'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB
app.secret_key = os.urandom(24)
# Voice options
VOICE_CHOICES = {
"Male (Charon)": "Charon",
"Female (Zephyr)": "Zephyr"
}
GEMINI_PROMPT = """
You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
**CRITICAL INSTRUCTIONS:**
1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
**EXAMPLE OUTPUT:**
Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
"""
def generate_tamil_script(video_path):
"""Generate Tamil script using Gemini"""
try:
print("Uploading video to Gemini...")
video_file = genai.upload_file(video_path, mime_type="video/mp4")
# Wait for processing
while video_file.state.name == "PROCESSING":
time.sleep(5)
video_file = genai.get_file(video_file.name)
if video_file.state.name != "ACTIVE":
raise Exception(f"Gemini processing failed: {video_file.state.name}")
print("Generating script...")
model = genai.GenerativeModel(model_name="models/gemini-2.5-flash")
response = model.generate_content([GEMINI_PROMPT, video_file])
genai.delete_file(video_file.name)
if hasattr(response, 'text') and response.text:
return " ".join(response.text.strip().splitlines())
raise Exception("No valid script generated")
except Exception as e:
print(f"Script generation failed: {str(e)}")
raise
def generate_audio_track(text, voice, cheerful, output_path):
"""Generate audio using TTS API"""
try:
print("Generating audio track...")
payload = {
"text": text,
"voice_name": voice,
"cheerful": cheerful
}
response = requests.post(TTS_API_URL, json=payload, timeout=300)
if response.status_code != 200:
raise Exception(f"TTS API error: {response.status_code}")
with open(output_path, "wb") as f:
f.write(response.content)
print("Audio track generated successfully")
except Exception as e:
print(f"Audio generation failed: {str(e)}")
raise
def replace_video_audio(video_path, audio_path, output_path):
"""Replace video audio track"""
video = None
audio = None
try:
print("Replacing audio track...")
video = VideoFileClip(video_path)
audio = AudioFileClip(audio_path)
video.audio = audio
video.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
threads=4,
verbose=False
)
print("Video processing complete!")
except Exception as e:
print(f"Video processing failed: {str(e)}")
raise
finally:
if video:
video.close()
if audio:
audio.close()
@app.route('/')
def index():
"""Main page"""
return render_template('index.html', voices=VOICE_CHOICES)
@app.route('/process', methods=['POST'])
def process_video():
"""Handle video processing"""
if 'video' not in request.files:
flash("No video file uploaded", "error")
return render_template('index.html', voices=VOICE_CHOICES)
file = request.files['video']
if file.filename == '':
flash("No file selected", "error")
return render_template('index.html', voices=VOICE_CHOICES)
# Save uploaded file
filename = secure_filename(f"{uuid.uuid4()}_{file.filename}")
video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(video_path)
# Get processing options
voice = request.form.get('voice', 'Charon')
cheerful = request.form.get('cheerful') == 'on'
try:
# Generate script
script = generate_tamil_script(video_path)
# Generate audio
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
audio_path = temp_audio.name
generate_audio_track(script, voice, cheerful, audio_path)
# Create dubbed video
final_filename = f"dubbed_{filename}"
final_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_filename)
replace_video_audio(video_path, audio_path, final_path)
# Cleanup
os.unlink(audio_path)
os.unlink(video_path)
return jsonify({
'status': 'success',
'video_url': url_for('download', filename=final_filename),
'script': script
})
except Exception as e:
# Cleanup on error
if os.path.exists(video_path):
os.unlink(video_path)
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/download/<filename>')
def download(filename):
"""Serve processed video"""
return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)