File size: 12,398 Bytes
d24a2f3 0a39518 8a409a5 21e6f34 d24a2f3 358d8c6 c901468 0a39518 08c3547 1d99855 358d8c6 d24a2f3 358d8c6 08c3547 1d99855 8a409a5 dd03a74 a2554b6 358d8c6 893e301 dd03a74 0a39518 358d8c6 0a39518 358d8c6 dd03a74 0a39518 1d99855 358d8c6 893e301 21e6f34 358d8c6 b3273f6 358d8c6 b3273f6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 dd03a74 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 dd03a74 358d8c6 b3273f6 358d8c6 dd03a74 358d8c6 1d99855 358d8c6 1d99855 dd03a74 b3273f6 dd03a74 358d8c6 1d99855 b3273f6 1d99855 358d8c6 b3273f6 358d8c6 dd03a74 358d8c6 c3c3d92 dd03a74 b3273f6 358d8c6 b3273f6 1d99855 358d8c6 dd03a74 1d99855 dd03a74 1d99855 358d8c6 1d99855 dd03a74 358d8c6 dd03a74 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 dd03a74 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 358d8c6 1d99855 0a39518 358d8c6 1d99855 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 |
import os
import time
import tempfile
import uuid
import google.generativeai as genai
import requests
import re
from flask import Flask, request, render_template, send_from_directory, jsonify
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.audio.io.AudioFileClip import AudioFileClip
from werkzeug.utils import secure_filename
from dotenv import load_dotenv
import threading
import logging
from gtts import gTTS
import io
from pathlib import Path
# Initialize Flask app
load_dotenv()
app = Flask(__name__)
# Configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TTS_API_URL = os.getenv("TTS_API_URL", "") # Optional
MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB
MAX_TTS_RETRIES = 3
TTS_CHUNK_SIZE = 2000 # Characters per chunk
# File storage setup
UPLOAD_FOLDER = 'uploads'
DOWNLOAD_FOLDER = 'downloads'
Path(UPLOAD_FOLDER).mkdir(exist_ok=True)
Path(DOWNLOAD_FOLDER).mkdir(exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
app.secret_key = os.urandom(24)
# Processing status tracking
processing_status = {}
# Language and voice options
LANGUAGE_MAPPING = {
"Arabic (Egyptian)": "ar-EG",
"English (US)": "en-US",
"Hindi (India)": "hi-IN",
"Tamil (India)": "ta-IN",
"Telugu (India)": "te-IN"
}
VOICE_TYPES = {
"Male": "male",
"Female": "female"
}
GEMINI_PROMPTS = {
"api": """
You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
CONTINUOUS block of modern {language}. Include natural speech patterns and
performance directions (e.g., [pause], [laugh]) where appropriate.
""",
"gtts": """
You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
CONTINUOUS block of modern {language}. Return ONLY the clean transcribed text.
"""
}
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def split_text_into_chunks(text, chunk_size=TTS_CHUNK_SIZE):
"""Split text into chunks respecting sentence boundaries"""
sentences = re.split(r'(?<=[.!?])\s+', text)
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < chunk_size:
current_chunk += sentence + " "
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + " "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
def generate_tts_audio(text, language_code, voice_type, tts_provider):
"""Generate TTS audio using selected provider with retry logic"""
chunks = split_text_into_chunks(text)
audio_segments = []
for chunk in chunks:
for attempt in range(MAX_TTS_RETRIES):
try:
if tts_provider == "api":
# Use custom TTS API
payload = {
"text": chunk,
"language": language_code,
"voice_type": voice_type
}
response = requests.post(TTS_API_URL, json=payload, timeout=300)
if response.status_code == 200:
audio_segments.append(io.BytesIO(response.content))
break
elif response.status_code == 429: # Rate limit
retry_after = int(response.headers.get('Retry-After', 5))
logger.warning(f"TTS API rate limited. Retrying after {retry_after}s")
time.sleep(retry_after)
continue
else:
raise Exception(f"TTS API error: {response.status_code}")
else:
# Use gTTS
tts = gTTS(
text=chunk,
lang=language_code.split('-')[0],
slow=False
)
buffer = io.BytesIO()
tts.write_to_fp(buffer)
buffer.seek(0)
audio_segments.append(buffer)
break
except Exception as e:
logger.warning(f"TTS attempt {attempt + 1} failed: {str(e)}")
if attempt == MAX_TTS_RETRIES - 1:
raise Exception(f"Failed to generate TTS after {MAX_TTS_RETRIES} attempts")
time.sleep(2 ** attempt) # Exponential backoff
# Combine audio segments
combined_audio = io.BytesIO()
for segment in audio_segments:
combined_audio.write(segment.getvalue())
combined_audio.seek(0)
return combined_audio
def generate_transcription(video_path, prompt):
"""Generate transcript using Gemini with retry logic"""
max_retries = 3
for attempt in range(max_retries):
try:
video_file = genai.upload_file(video_path, mime_type="video/mp4")
model = genai.GenerativeModel("models/gemini-pro-vision")
response = model.generate_content([prompt, video_file])
genai.delete_file(video_file.name)
if hasattr(response, 'text'):
return response.text.strip()
raise Exception("No valid transcription generated")
except Exception as e:
if attempt == max_retries - 1:
raise
logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
time.sleep(5 * (attempt + 1))
def dub_video(video_path, audio_buffer):
"""Dub video with new audio"""
video = None
audio = None
temp_audio_path = None
try:
# Save audio buffer to temp file
temp_audio_path = f"temp_audio_{uuid.uuid4().hex}.mp3"
with open(temp_audio_path, 'wb') as f:
f.write(audio_buffer.read())
# Process video
video = VideoFileClip(video_path)
audio = AudioFileClip(temp_audio_path)
# Ensure audio length matches video
if audio.duration > video.duration:
audio = audio.subclip(0, video.duration)
video = video.set_audio(audio)
# Save output
output_filename = f"dubbed_{uuid.uuid4().hex}.mp4"
output_path = os.path.join(app.config['DOWNLOAD_FOLDER'], output_filename)
video.write_videofile(
output_path,
codec="libx264",
audio_codec="aac",
threads=4,
verbose=False,
preset='medium',
ffmpeg_params=['-crf', '23', '-movflags', '+faststart']
)
return output_path
finally:
# Cleanup resources
if video:
video.close()
if audio:
audio.close()
if temp_audio_path and os.path.exists(temp_audio_path):
os.unlink(temp_audio_path)
def process_video_background(task_id, video_path, language, voice_type, tts_provider):
"""Background video processing"""
try:
processing_status[task_id] = {
'status': 'processing',
'progress': 0,
'message': 'Starting transcription',
'start_time': time.time()
}
# Stage 1: Transcription
processing_status[task_id]['message'] = 'Transcribing video content'
prompt = GEMINI_PROMPTS[tts_provider].format(language=language)
script = generate_transcription(video_path, prompt)
processing_status[task_id]['progress'] = 33
processing_status[task_id]['script'] = script
# Stage 2: Audio Generation
processing_status[task_id]['message'] = 'Generating audio narration'
language_code = LANGUAGE_MAPPING.get(language, "en-US")
audio_buffer = generate_tts_audio(script, language_code, voice_type, tts_provider)
processing_status[task_id]['progress'] = 66
# Stage 3: Video Dubbing
processing_status[task_id]['message'] = 'Creating dubbed video'
output_path = dub_video(video_path, audio_buffer)
processing_status[task_id]['progress'] = 100
processing_status[task_id]['status'] = 'complete'
processing_status[task_id]['result_path'] = output_path
except Exception as e:
processing_status[task_id]['status'] = 'error'
processing_status[task_id]['message'] = str(e)
logger.error(f"Processing failed: {str(e)}")
finally:
# Cleanup
if os.path.exists(video_path):
os.unlink(video_path)
@app.route('/')
def index():
"""Render main page"""
return render_template(
'index.html',
languages=list(LANGUAGE_MAPPING.keys()),
voice_types=list(VOICE_TYPES.keys()),
default_language="English (US)",
tts_api_available=bool(TTS_API_URL)
)
@app.route('/upload', methods=['POST'])
def upload_video():
"""Handle video upload"""
if 'video' not in request.files:
return jsonify({'error': 'No file uploaded'}), 400
file = request.files['video']
if file.filename == '':
return jsonify({'error': 'No file selected'}), 400
# Validate file extension
allowed_extensions = {'mp4', 'mov', 'webm', 'avi'}
if '.' not in file.filename or file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
return jsonify({'error': 'Invalid file type'}), 400
# Save file with unique name
task_id = uuid.uuid4().hex
filename = secure_filename(f"{task_id}_{file.filename}")
video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
try:
file.save(video_path)
except Exception as e:
return jsonify({'error': f'Failed to save file: {str(e)}'}), 500
# Get processing options
language = request.form.get('language', 'English (US)')
voice_type = request.form.get('voice_type', 'Male')
tts_provider = request.form.get('tts_provider', 'gtts')
# Validate TTS provider selection
if tts_provider == "api" and not TTS_API_URL:
return jsonify({'error': 'TTS API is not configured'}), 400
# Start background processing
processing_status[task_id] = {
'status': 'uploaded',
'progress': 0,
'message': 'Starting processing',
'start_time': time.time()
}
thread = threading.Thread(
target=process_video_background,
args=(task_id, video_path, language, voice_type, tts_provider)
)
thread.start()
return jsonify({'task_id': task_id})
@app.route('/status/<task_id>')
def get_status(task_id):
"""Check processing status"""
if task_id not in processing_status:
return jsonify({'error': 'Invalid task ID'}), 404
status = processing_status[task_id]
response = {
'status': status['status'],
'progress': status.get('progress', 0),
'message': status.get('message', ''),
}
if status['status'] == 'complete':
response['result_url'] = url_for(
'download',
filename=os.path.basename(status['result_path'])
)
response['script'] = status.get('script', '')
elif status['status'] == 'error':
response['error_details'] = status.get('message', 'Unknown error')
return jsonify(response)
@app.route('/download/<filename>')
def download(filename):
"""Serve processed video with security checks"""
try:
# Security check
if not filename.startswith('dubbed_') or not filename.endswith('.mp4'):
return "Invalid file", 400
# Validate path
download_path = Path(app.config['DOWNLOAD_FOLDER']) / filename
if not download_path.exists():
return "File not found", 404
return send_from_directory(
app.config['DOWNLOAD_FOLDER'],
filename,
as_attachment=True,
mimetype='video/mp4'
)
except Exception as e:
logger.error(f"Download failed: {str(e)}")
return "Download error", 500
if __name__ == '__main__':
if not GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY is required in .env file")
app.run(host="0.0.0.0", port=7860, threaded=True) |