Athspi commited on
Commit
358d8c6
·
verified ·
1 Parent(s): c6b149b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -285
app.py CHANGED
@@ -4,334 +4,313 @@ import tempfile
4
  import uuid
5
  import google.generativeai as genai
6
  import requests
7
- from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
 
8
  from moviepy.video.io.VideoFileClip import VideoFileClip
9
  from moviepy.audio.io.AudioFileClip import AudioFileClip
10
  from werkzeug.utils import secure_filename
11
  from dotenv import load_dotenv
12
  import threading
13
- from datetime import datetime, timedelta
14
  import logging
 
 
 
15
 
16
- # Initialize Flask app and load secrets
17
  load_dotenv()
18
  app = Flask(__name__)
19
 
20
  # Configuration
21
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
22
- TTS_API_URL = os.getenv("TTS_API_URL")
23
-
24
- if not GEMINI_API_KEY or not TTS_API_URL:
25
- raise ValueError("Missing required environment variables")
26
-
27
- genai.configure(api_key=GEMINI_API_KEY)
28
 
29
  # File storage setup
30
  UPLOAD_FOLDER = 'uploads'
31
  DOWNLOAD_FOLDER = 'downloads'
32
- os.makedirs(UPLOAD_FOLDER, exist_ok=True)
33
- os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
34
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
35
  app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
36
- app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB
37
  app.secret_key = os.urandom(24)
38
 
39
  # Processing status tracking
40
  processing_status = {}
41
- processing_times = {
42
- 'upload': 0,
43
- 'transcription': 0,
44
- 'tts': 0,
45
- 'dubbing': 0
46
- }
47
 
48
- # Voice options
49
- VOICE_CHOICES = {
50
- "Male (Charon)": "Charon",
51
- "Female (Zephyr)": "Zephyr"
 
 
 
52
  }
53
 
54
- GEMINI_PROMPT = """
55
- You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
56
-
57
- **CRITICAL INSTRUCTIONS:**
58
- 1. **Single Script:** Combine all dialogue into one continuous script.
59
- 2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
60
- 3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
61
 
62
- **EXAMPLE OUTPUT:**
63
- Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
64
- """
 
 
 
 
 
 
 
 
65
 
66
  # Configure logging
67
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
68
  logger = logging.getLogger(__name__)
69
 
70
- def track_processing_time(task_id, stage, duration):
71
- """Track processing times for each stage"""
72
- processing_times[stage] = duration
73
- if task_id in processing_status:
74
- processing_status[task_id]['timings'][stage] = duration
75
-
76
- def estimate_remaining_time(task_id):
77
- """Estimate remaining processing time"""
78
- if task_id not in processing_status:
79
- return "Calculating..."
80
-
81
- status = processing_status[task_id]
82
- completed_stages = [s for s in status['timings'] if status['timings'][s] is not None]
83
 
84
- if len(completed_stages) == 0:
85
- return "Starting soon..."
 
 
 
 
86
 
87
- # Weighted average based on stage complexity
88
- weights = {
89
- 'transcription': 2.0,
90
- 'tts': 1.5,
91
- 'dubbing': 1.0
92
- }
93
-
94
- total_weighted_time = 0
95
- total_weights = 0
96
 
97
- for stage in completed_stages:
98
- weight = weights.get(stage, 1.0)
99
- total_weighted_time += status['timings'][stage] * weight
100
- total_weights += weight
 
 
101
 
102
- if total_weights == 0:
103
- return "Estimating..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- avg_time = total_weighted_time / total_weights
106
- remaining_stages = 4 - len(completed_stages)
107
- return remaining_stages * avg_time
108
-
109
- def process_video_background(task_id, video_path, voice, cheerful):
110
- """Background processing function with enhanced logging"""
111
- try:
112
- start_time = time.time()
113
- processing_status[task_id] = {
114
- 'status': 'processing',
115
- 'progress': 0,
116
- 'message': 'Starting transcription',
117
- 'timings': {'upload': None, 'transcription': None, 'tts': None, 'dubbing': None},
118
- 'start_time': start_time,
119
- 'video_duration': get_video_duration(video_path)
120
- }
121
-
122
- # Stage 1: Transcription
123
- processing_status[task_id]['message'] = 'Transcribing video content'
124
- logger.info(f"Task {task_id}: Starting transcription")
125
- script_start = time.time()
126
- script = generate_tamil_script(video_path)
127
- transcription_time = time.time() - script_start
128
- track_processing_time(task_id, 'transcription', transcription_time)
129
- processing_status[task_id]['progress'] = 25
130
- processing_status[task_id]['script'] = script
131
- logger.info(f"Task {task_id}: Transcription completed in {transcription_time:.1f}s")
132
-
133
- # Stage 2: TTS Generation
134
- processing_status[task_id]['message'] = 'Generating audio narration'
135
- logger.info(f"Task {task_id}: Starting TTS generation")
136
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
137
- audio_path = temp_audio.name
138
-
139
- tts_start = time.time()
140
- generate_audio_track(script, voice, cheerful, audio_path)
141
- tts_time = time.time() - tts_start
142
- track_processing_time(task_id, 'tts', tts_time)
143
- processing_status[task_id]['progress'] = 50
144
- logger.info(f"Task {task_id}: TTS completed in {tts_time:.1f}s")
145
-
146
- # Stage 3: Dubbing
147
- processing_status[task_id]['message'] = 'Creating dubbed video'
148
- logger.info(f"Task {task_id}: Starting dubbing")
149
- final_filename = f"dubbed_{task_id}.mp4"
150
- final_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_filename)
151
-
152
- dubbing_start = time.time()
153
- replace_video_audio(video_path, audio_path, final_path)
154
- dubbing_time = time.time() - dubbing_start
155
- track_processing_time(task_id, 'dubbing', dubbing_time)
156
- processing_status[task_id]['progress'] = 75
157
- logger.info(f"Task {task_id}: Dubbing completed in {dubbing_time:.1f}s")
158
 
159
- # Cleanup
160
- os.unlink(audio_path)
161
-
162
- # Finalize
163
- processing_status[task_id].update({
164
- 'status': 'complete',
165
- 'progress': 100,
166
- 'message': 'Processing complete',
167
- 'result_path': final_path,
168
- 'end_time': time.time()
169
- })
170
- logger.info(f"Task {task_id}: Processing completed successfully")
171
-
172
- except Exception as e:
173
- logger.error(f"Task {task_id} failed: {str(e)}")
174
- processing_status[task_id].update({
175
- 'status': 'error',
176
- 'message': f'Error: {str(e)}'
177
- })
178
- # Cleanup temporary files
179
- if 'video_path' in locals() and os.path.exists(video_path):
180
- os.unlink(video_path)
181
- if 'audio_path' in locals() and os.path.exists(audio_path):
182
- os.unlink(audio_path)
183
-
184
- def get_video_duration(video_path):
185
- """Get duration of video in seconds"""
186
- try:
187
- with VideoFileClip(video_path) as video:
188
- return video.duration
189
- except:
190
- return 0
191
-
192
- def generate_tamil_script(video_path):
193
- """Generate Tamil script using Gemini with retry logic"""
194
  max_retries = 3
195
- retry_delay = 10 # seconds
196
-
197
  for attempt in range(max_retries):
198
  try:
199
  video_file = genai.upload_file(video_path, mime_type="video/mp4")
200
-
201
- # Wait for file processing with timeout
202
- start_wait = time.time()
203
- while video_file.state.name == "PROCESSING":
204
- if time.time() - start_wait > 300: # 5 minutes timeout
205
- raise TimeoutError("Gemini processing timed out")
206
- time.sleep(5)
207
- video_file = genai.get_file(video_file.name)
208
-
209
- if video_file.state.name != "ACTIVE":
210
- raise Exception(f"Gemini processing failed: {video_file.state.name}")
211
-
212
- model = genai.GenerativeModel(model_name="models/gemini-2.5-flash")
213
- response = model.generate_content([GEMINI_PROMPT, video_file])
214
  genai.delete_file(video_file.name)
215
 
216
- if hasattr(response, 'text') and response.text:
217
- return " ".join(response.text.strip().splitlines())
218
- raise Exception("No valid script generated")
219
 
220
  except Exception as e:
221
- if attempt < max_retries - 1:
222
- logger.warning(f"Gemini error (attempt {attempt+1}/{max_retries}): {str(e)}")
223
- time.sleep(retry_delay * (attempt + 1))
224
- else:
225
  raise
 
 
226
 
227
- def generate_audio_track(text, voice, cheerful, output_path):
228
- """Generate audio using TTS API with retry logic"""
229
- max_retries = 3
230
- retry_delay = 5 # seconds
231
-
232
- for attempt in range(max_retries):
233
- try:
234
- payload = {
235
- "text": text,
236
- "voice_name": voice,
237
- "cheerful": cheerful
238
- }
239
-
240
- response = requests.post(TTS_API_URL, json=payload, timeout=300)
241
- if response.status_code != 200:
242
- raise Exception(f"TTS API error: {response.status_code} - {response.text}")
243
-
244
- with open(output_path, "wb") as f:
245
- f.write(response.content)
246
- return
247
-
248
- except Exception as e:
249
- if attempt < max_retries - 1:
250
- logger.warning(f"TTS error (attempt {attempt+1}/{max_retries}): {str(e)}")
251
- time.sleep(retry_delay * (attempt + 1))
252
- else:
253
- raise
254
-
255
- def replace_video_audio(video_path, audio_path, output_path):
256
- """Replace video audio track with enhanced error handling"""
257
  video = None
258
  audio = None
 
 
259
  try:
260
- # Open video and audio files
 
 
 
 
 
261
  video = VideoFileClip(video_path)
262
- audio = AudioFileClip(audio_path)
263
 
264
- # Set video audio
265
- video.audio = audio
 
 
 
 
 
 
 
266
 
267
- # Write output with optimized settings
268
  video.write_videofile(
269
  output_path,
270
  codec="libx264",
271
  audio_codec="aac",
272
- logger=None,
273
  threads=4,
 
274
  preset='medium',
275
  ffmpeg_params=['-crf', '23', '-movflags', '+faststart']
276
  )
277
 
278
- except Exception as e:
279
- logger.error(f"Video processing error: {str(e)}")
280
- # Cleanup partially created file
281
- if os.path.exists(output_path):
282
- os.unlink(output_path)
283
- raise
284
  finally:
 
285
  if video:
286
  video.close()
287
  if audio:
288
  audio.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
  @app.route('/')
291
  def index():
292
- """Main page"""
293
- return render_template('index.html', voices=VOICE_CHOICES)
 
 
 
 
 
 
294
 
295
  @app.route('/upload', methods=['POST'])
296
  def upload_video():
297
- """Handle video upload and start processing"""
298
  if 'video' not in request.files:
299
  return jsonify({'error': 'No file uploaded'}), 400
300
 
301
  file = request.files['video']
302
  if file.filename == '':
303
  return jsonify({'error': 'No file selected'}), 400
304
-
305
- # Generate unique task ID
306
- task_id = str(uuid.uuid4())
 
 
 
 
 
307
  filename = secure_filename(f"{task_id}_{file.filename}")
308
  video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
309
- file.save(video_path)
310
 
 
 
 
 
 
311
  # Get processing options
312
- voice = request.form.get('voice', 'Charon')
313
- cheerful = request.form.get('cheerful', 'false') == 'true'
314
-
 
 
 
 
 
315
  # Start background processing
316
  processing_status[task_id] = {
317
  'status': 'uploaded',
318
  'progress': 0,
319
  'message': 'Starting processing',
320
- 'timings': {'upload': time.time(), 'transcription': None, 'tts': None, 'dubbing': None},
321
- 'start_time': time.time(),
322
- 'video_duration': get_video_duration(video_path)
323
  }
324
-
325
  thread = threading.Thread(
326
  target=process_video_background,
327
- args=(task_id, video_path, voice, cheerful)
328
  )
329
  thread.start()
330
-
331
- return jsonify({
332
- 'task_id': task_id,
333
- 'video_duration': processing_status[task_id]['video_duration']
334
- })
335
 
336
  @app.route('/status/<task_id>')
337
  def get_status(task_id):
@@ -340,70 +319,47 @@ def get_status(task_id):
340
  return jsonify({'error': 'Invalid task ID'}), 404
341
 
342
  status = processing_status[task_id]
343
-
344
- # Calculate ETA if processing
345
- eta = None
346
- if status['status'] == 'processing':
347
- elapsed = time.time() - status['start_time']
348
- remaining = estimate_remaining_time(task_id)
349
- if isinstance(remaining, (int, float)):
350
- eta = str(timedelta(seconds=int(remaining)))
351
-
352
  response = {
353
  'status': status['status'],
354
  'progress': status.get('progress', 0),
355
  'message': status.get('message', ''),
356
- 'eta': eta
357
  }
358
-
359
  if status['status'] == 'complete':
360
- response['result_url'] = url_for('download', filename=os.path.basename(status['result_path']))
 
 
 
361
  response['script'] = status.get('script', '')
 
 
362
 
363
  return jsonify(response)
364
 
365
  @app.route('/download/<filename>')
366
  def download(filename):
367
- """Serve processed video"""
368
- return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
369
-
370
- @app.route('/cleanup', methods=['POST'])
371
- def cleanup():
372
- """Cleanup old files"""
373
  try:
374
- # Cleanup uploads older than 1 hour
375
- for filename in os.listdir(UPLOAD_FOLDER):
376
- file_path = os.path.join(UPLOAD_FOLDER, filename)
377
- if os.path.getmtime(file_path) < time.time() - 3600:
378
- os.unlink(file_path)
379
 
380
- # Cleanup downloads older than 24 hours
381
- for filename in os.listdir(DOWNLOAD_FOLDER):
382
- file_path = os.path.join(DOWNLOAD_FOLDER, filename)
383
- if os.path.getmtime(file_path) < time.time() - 86400:
384
- os.unlink(file_path)
385
-
386
- return jsonify({'status': 'success', 'message': 'Cleanup completed'})
 
 
 
 
387
  except Exception as e:
388
- return jsonify({'status': 'error', 'message': str(e)}), 500
 
389
 
390
  if __name__ == '__main__':
391
- # Schedule cleanup thread
392
- import schedule
393
- import time as t
394
- def cleanup_job():
395
- with app.app_context():
396
- app.test_client().post('/cleanup')
397
-
398
- schedule.every().hour.do(cleanup_job)
399
-
400
- # Start scheduler in background thread
401
- def scheduler_thread():
402
- while True:
403
- schedule.run_pending()
404
- t.sleep(1)
405
-
406
- threading.Thread(target=scheduler_thread, daemon=True).start()
407
-
408
- # Start Flask app
409
  app.run(host="0.0.0.0", port=7860, threaded=True)
 
4
  import uuid
5
  import google.generativeai as genai
6
  import requests
7
+ import re
8
+ from flask import Flask, request, render_template, send_from_directory, jsonify
9
  from moviepy.video.io.VideoFileClip import VideoFileClip
10
  from moviepy.audio.io.AudioFileClip import AudioFileClip
11
  from werkzeug.utils import secure_filename
12
  from dotenv import load_dotenv
13
  import threading
 
14
  import logging
15
+ from gtts import gTTS
16
+ import io
17
+ from pathlib import Path
18
 
19
+ # Initialize Flask app
20
  load_dotenv()
21
  app = Flask(__name__)
22
 
23
  # Configuration
24
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
25
+ TTS_API_URL = os.getenv("TTS_API_URL", "") # Optional
26
+ MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB
27
+ MAX_TTS_RETRIES = 3
28
+ TTS_CHUNK_SIZE = 2000 # Characters per chunk
 
 
29
 
30
  # File storage setup
31
  UPLOAD_FOLDER = 'uploads'
32
  DOWNLOAD_FOLDER = 'downloads'
33
+ Path(UPLOAD_FOLDER).mkdir(exist_ok=True)
34
+ Path(DOWNLOAD_FOLDER).mkdir(exist_ok=True)
35
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
36
  app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
37
+ app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
38
  app.secret_key = os.urandom(24)
39
 
40
  # Processing status tracking
41
  processing_status = {}
 
 
 
 
 
 
42
 
43
+ # Language and voice options
44
+ LANGUAGE_MAPPING = {
45
+ "Arabic (Egyptian)": "ar-EG",
46
+ "English (US)": "en-US",
47
+ "Hindi (India)": "hi-IN",
48
+ "Tamil (India)": "ta-IN",
49
+ "Telugu (India)": "te-IN"
50
  }
51
 
52
+ VOICE_TYPES = {
53
+ "Male": "male",
54
+ "Female": "female"
55
+ }
 
 
 
56
 
57
+ GEMINI_PROMPTS = {
58
+ "api": """
59
+ You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
60
+ CONTINUOUS block of modern {language}. Include natural speech patterns and
61
+ performance directions (e.g., [pause], [laugh]) where appropriate.
62
+ """,
63
+ "gtts": """
64
+ You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
65
+ CONTINUOUS block of modern {language}. Return ONLY the clean transcribed text.
66
+ """
67
+ }
68
 
69
  # Configure logging
70
+ logging.basicConfig(
71
+ level=logging.INFO,
72
+ format='%(asctime)s - %(levelname)s - %(message)s'
73
+ )
74
  logger = logging.getLogger(__name__)
75
 
76
+ def split_text_into_chunks(text, chunk_size=TTS_CHUNK_SIZE):
77
+ """Split text into chunks respecting sentence boundaries"""
78
+ sentences = re.split(r'(?<=[.!?])\s+', text)
79
+ chunks = []
80
+ current_chunk = ""
 
 
 
 
 
 
 
 
81
 
82
+ for sentence in sentences:
83
+ if len(current_chunk) + len(sentence) < chunk_size:
84
+ current_chunk += sentence + " "
85
+ else:
86
+ chunks.append(current_chunk.strip())
87
+ current_chunk = sentence + " "
88
 
89
+ if current_chunk:
90
+ chunks.append(current_chunk.strip())
 
 
 
 
 
 
 
91
 
92
+ return chunks
93
+
94
+ def generate_tts_audio(text, language_code, voice_type, tts_provider):
95
+ """Generate TTS audio using selected provider with retry logic"""
96
+ chunks = split_text_into_chunks(text)
97
+ audio_segments = []
98
 
99
+ for chunk in chunks:
100
+ for attempt in range(MAX_TTS_RETRIES):
101
+ try:
102
+ if tts_provider == "api":
103
+ # Use custom TTS API
104
+ payload = {
105
+ "text": chunk,
106
+ "language": language_code,
107
+ "voice_type": voice_type
108
+ }
109
+ response = requests.post(TTS_API_URL, json=payload, timeout=300)
110
+
111
+ if response.status_code == 200:
112
+ audio_segments.append(io.BytesIO(response.content))
113
+ break
114
+ elif response.status_code == 429: # Rate limit
115
+ retry_after = int(response.headers.get('Retry-After', 5))
116
+ logger.warning(f"TTS API rate limited. Retrying after {retry_after}s")
117
+ time.sleep(retry_after)
118
+ continue
119
+ else:
120
+ raise Exception(f"TTS API error: {response.status_code}")
121
+ else:
122
+ # Use gTTS
123
+ tts = gTTS(
124
+ text=chunk,
125
+ lang=language_code.split('-')[0],
126
+ slow=False
127
+ )
128
+ buffer = io.BytesIO()
129
+ tts.write_to_fp(buffer)
130
+ buffer.seek(0)
131
+ audio_segments.append(buffer)
132
+ break
133
+
134
+ except Exception as e:
135
+ logger.warning(f"TTS attempt {attempt + 1} failed: {str(e)}")
136
+ if attempt == MAX_TTS_RETRIES - 1:
137
+ raise Exception(f"Failed to generate TTS after {MAX_TTS_RETRIES} attempts")
138
+ time.sleep(2 ** attempt) # Exponential backoff
139
 
140
+ # Combine audio segments
141
+ combined_audio = io.BytesIO()
142
+ for segment in audio_segments:
143
+ combined_audio.write(segment.getvalue())
144
+ combined_audio.seek(0)
145
+ return combined_audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ def generate_transcription(video_path, prompt):
148
+ """Generate transcript using Gemini with retry logic"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  max_retries = 3
 
 
150
  for attempt in range(max_retries):
151
  try:
152
  video_file = genai.upload_file(video_path, mime_type="video/mp4")
153
+ model = genai.GenerativeModel("models/gemini-pro-vision")
154
+ response = model.generate_content([prompt, video_file])
 
 
 
 
 
 
 
 
 
 
 
 
155
  genai.delete_file(video_file.name)
156
 
157
+ if hasattr(response, 'text'):
158
+ return response.text.strip()
159
+ raise Exception("No valid transcription generated")
160
 
161
  except Exception as e:
162
+ if attempt == max_retries - 1:
 
 
 
163
  raise
164
+ logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
165
+ time.sleep(5 * (attempt + 1))
166
 
167
+ def dub_video(video_path, audio_buffer):
168
+ """Dub video with new audio"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  video = None
170
  audio = None
171
+ temp_audio_path = None
172
+
173
  try:
174
+ # Save audio buffer to temp file
175
+ temp_audio_path = f"temp_audio_{uuid.uuid4().hex}.mp3"
176
+ with open(temp_audio_path, 'wb') as f:
177
+ f.write(audio_buffer.read())
178
+
179
+ # Process video
180
  video = VideoFileClip(video_path)
181
+ audio = AudioFileClip(temp_audio_path)
182
 
183
+ # Ensure audio length matches video
184
+ if audio.duration > video.duration:
185
+ audio = audio.subclip(0, video.duration)
186
+
187
+ video = video.set_audio(audio)
188
+
189
+ # Save output
190
+ output_filename = f"dubbed_{uuid.uuid4().hex}.mp4"
191
+ output_path = os.path.join(app.config['DOWNLOAD_FOLDER'], output_filename)
192
 
 
193
  video.write_videofile(
194
  output_path,
195
  codec="libx264",
196
  audio_codec="aac",
 
197
  threads=4,
198
+ verbose=False,
199
  preset='medium',
200
  ffmpeg_params=['-crf', '23', '-movflags', '+faststart']
201
  )
202
 
203
+ return output_path
204
+
 
 
 
 
205
  finally:
206
+ # Cleanup resources
207
  if video:
208
  video.close()
209
  if audio:
210
  audio.close()
211
+ if temp_audio_path and os.path.exists(temp_audio_path):
212
+ os.unlink(temp_audio_path)
213
+
214
+ def process_video_background(task_id, video_path, language, voice_type, tts_provider):
215
+ """Background video processing"""
216
+ try:
217
+ processing_status[task_id] = {
218
+ 'status': 'processing',
219
+ 'progress': 0,
220
+ 'message': 'Starting transcription',
221
+ 'start_time': time.time()
222
+ }
223
+
224
+ # Stage 1: Transcription
225
+ processing_status[task_id]['message'] = 'Transcribing video content'
226
+ prompt = GEMINI_PROMPTS[tts_provider].format(language=language)
227
+ script = generate_transcription(video_path, prompt)
228
+ processing_status[task_id]['progress'] = 33
229
+ processing_status[task_id]['script'] = script
230
+
231
+ # Stage 2: Audio Generation
232
+ processing_status[task_id]['message'] = 'Generating audio narration'
233
+ language_code = LANGUAGE_MAPPING.get(language, "en-US")
234
+ audio_buffer = generate_tts_audio(script, language_code, voice_type, tts_provider)
235
+ processing_status[task_id]['progress'] = 66
236
+
237
+ # Stage 3: Video Dubbing
238
+ processing_status[task_id]['message'] = 'Creating dubbed video'
239
+ output_path = dub_video(video_path, audio_buffer)
240
+ processing_status[task_id]['progress'] = 100
241
+ processing_status[task_id]['status'] = 'complete'
242
+ processing_status[task_id]['result_path'] = output_path
243
+
244
+ except Exception as e:
245
+ processing_status[task_id]['status'] = 'error'
246
+ processing_status[task_id]['message'] = str(e)
247
+ logger.error(f"Processing failed: {str(e)}")
248
+
249
+ finally:
250
+ # Cleanup
251
+ if os.path.exists(video_path):
252
+ os.unlink(video_path)
253
 
254
  @app.route('/')
255
  def index():
256
+ """Render main page"""
257
+ return render_template(
258
+ 'index.html',
259
+ languages=list(LANGUAGE_MAPPING.keys()),
260
+ voice_types=list(VOICE_TYPES.keys()),
261
+ default_language="English (US)",
262
+ tts_api_available=bool(TTS_API_URL)
263
+ )
264
 
265
  @app.route('/upload', methods=['POST'])
266
  def upload_video():
267
+ """Handle video upload"""
268
  if 'video' not in request.files:
269
  return jsonify({'error': 'No file uploaded'}), 400
270
 
271
  file = request.files['video']
272
  if file.filename == '':
273
  return jsonify({'error': 'No file selected'}), 400
274
+
275
+ # Validate file extension
276
+ allowed_extensions = {'mp4', 'mov', 'webm', 'avi'}
277
+ if '.' not in file.filename or file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
278
+ return jsonify({'error': 'Invalid file type'}), 400
279
+
280
+ # Save file with unique name
281
+ task_id = uuid.uuid4().hex
282
  filename = secure_filename(f"{task_id}_{file.filename}")
283
  video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
 
284
 
285
+ try:
286
+ file.save(video_path)
287
+ except Exception as e:
288
+ return jsonify({'error': f'Failed to save file: {str(e)}'}), 500
289
+
290
  # Get processing options
291
+ language = request.form.get('language', 'English (US)')
292
+ voice_type = request.form.get('voice_type', 'Male')
293
+ tts_provider = request.form.get('tts_provider', 'gtts')
294
+
295
+ # Validate TTS provider selection
296
+ if tts_provider == "api" and not TTS_API_URL:
297
+ return jsonify({'error': 'TTS API is not configured'}), 400
298
+
299
  # Start background processing
300
  processing_status[task_id] = {
301
  'status': 'uploaded',
302
  'progress': 0,
303
  'message': 'Starting processing',
304
+ 'start_time': time.time()
 
 
305
  }
306
+
307
  thread = threading.Thread(
308
  target=process_video_background,
309
+ args=(task_id, video_path, language, voice_type, tts_provider)
310
  )
311
  thread.start()
312
+
313
+ return jsonify({'task_id': task_id})
 
 
 
314
 
315
  @app.route('/status/<task_id>')
316
  def get_status(task_id):
 
319
  return jsonify({'error': 'Invalid task ID'}), 404
320
 
321
  status = processing_status[task_id]
 
 
 
 
 
 
 
 
 
322
  response = {
323
  'status': status['status'],
324
  'progress': status.get('progress', 0),
325
  'message': status.get('message', ''),
 
326
  }
327
+
328
  if status['status'] == 'complete':
329
+ response['result_url'] = url_for(
330
+ 'download',
331
+ filename=os.path.basename(status['result_path'])
332
+ )
333
  response['script'] = status.get('script', '')
334
+ elif status['status'] == 'error':
335
+ response['error_details'] = status.get('message', 'Unknown error')
336
 
337
  return jsonify(response)
338
 
339
  @app.route('/download/<filename>')
340
  def download(filename):
341
+ """Serve processed video with security checks"""
 
 
 
 
 
342
  try:
343
+ # Security check
344
+ if not filename.startswith('dubbed_') or not filename.endswith('.mp4'):
345
+ return "Invalid file", 400
 
 
346
 
347
+ # Validate path
348
+ download_path = Path(app.config['DOWNLOAD_FOLDER']) / filename
349
+ if not download_path.exists():
350
+ return "File not found", 404
351
+
352
+ return send_from_directory(
353
+ app.config['DOWNLOAD_FOLDER'],
354
+ filename,
355
+ as_attachment=True,
356
+ mimetype='video/mp4'
357
+ )
358
  except Exception as e:
359
+ logger.error(f"Download failed: {str(e)}")
360
+ return "Download error", 500
361
 
362
  if __name__ == '__main__':
363
+ if not GEMINI_API_KEY:
364
+ raise ValueError("GEMINI_API_KEY is required in .env file")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  app.run(host="0.0.0.0", port=7860, threaded=True)