Athspi commited on
Commit
b3273f6
·
verified ·
1 Parent(s): c901468

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -200
app.py CHANGED
@@ -7,267 +7,270 @@ import requests
7
  from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
8
  from moviepy.video.io.VideoFileClip import VideoFileClip
9
  from moviepy.audio.io.AudioFileClip import AudioFileClip
10
- from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
11
- from moviepy.video.fx.all import resize, speedx
12
  from werkzeug.utils import secure_filename
13
  from dotenv import load_dotenv
14
- from PIL import Image, ImageDraw, ImageFont
15
- import numpy as np
16
 
17
- # --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
18
  load_dotenv()
19
  app = Flask(__name__)
20
 
21
- # Load secrets from environment variables
22
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
23
  TTS_API_URL = os.getenv("TTS_API_URL")
24
 
25
- # Validate required configurations
26
- if not GEMINI_API_KEY:
27
- raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
28
- if not TTS_API_URL:
29
- raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
30
 
31
- # Configure Gemini AI
32
  genai.configure(api_key=GEMINI_API_KEY)
33
 
34
- # Configure directories
35
  UPLOAD_FOLDER = 'uploads'
36
  DOWNLOAD_FOLDER = 'downloads'
37
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
38
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
39
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
40
  app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
41
- app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB upload limit
42
- app.secret_key = os.urandom(24) # Secure key for flash messages
43
 
44
- # --- 2. APPLICATION CONFIGURATION ---
 
 
 
 
 
 
 
 
 
45
  VOICE_CHOICES = {
46
  "Male (Charon)": "Charon",
47
  "Female (Zephyr)": "Zephyr"
48
  }
49
 
50
- EDITING_PRESETS = {
51
- "fast_cuts": {
52
- "speed": 1.2,
53
- "transition_duration": 0.3,
54
- "max_clip_duration": 5
55
- },
56
- "cinematic": {
57
- "speed": 0.95,
58
- "transition_duration": 1.0,
59
- "black_bars": True
60
- },
61
- "social_media": {
62
- "speed": 1.0,
63
- "aspect_ratio": (9, 16),
64
- "add_captions": True
65
- }
66
- }
67
-
68
  GEMINI_PROMPT = """
69
- You are an expert AI scriptwriter. Your task is to watch the provided video and:
70
- 1. Transcribe ALL spoken dialogue into modern, colloquial Tamil
71
- 2. Identify key moments for editing (action, emotion, important points)
72
- 3. Suggest timestamps for cuts/transitions
73
 
74
- **OUTPUT FORMAT:**
75
- {
76
- "script": "Combined Tamil dialogue with performance cues",
77
- "editing_notes": [
78
- {"timestamp": 12.5, "type": "cut", "reason": "action moment"},
79
- {"timestamp": 24.3, "type": "slow_mo", "reason": "emotional highlight"}
80
- ]
81
- }
82
  """
83
 
84
- # --- 3. CORE APPLICATION FUNCTIONS ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- def analyze_video(video_path):
87
- """Analyze video content and generate script with editing suggestions."""
88
- print("Analyzing video with Gemini...")
89
  video_file = genai.upload_file(video_path, mime_type="video/mp4")
90
 
91
- # Wait for file processing
92
  while video_file.state.name == "PROCESSING":
93
  time.sleep(5)
94
  video_file = genai.get_file(video_file.name)
95
 
96
  if video_file.state.name != "ACTIVE":
97
- raise Exception(f"Gemini file processing failed: {video_file.state.name}")
98
 
99
  model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
100
  response = model.generate_content([GEMINI_PROMPT, video_file])
101
  genai.delete_file(video_file.name)
102
 
103
  if hasattr(response, 'text') and response.text:
104
- try:
105
- return eval(response.text) # Convert string to dict
106
- except:
107
- return {"script": response.text, "editing_notes": []}
108
- raise Exception("No valid analysis was generated by Gemini.")
109
 
110
- def generate_audio(script_text, voice_name, is_cheerful):
111
- """Generate audio from script using TTS API."""
112
- print(f"Generating audio (Voice: {voice_name}, Cheerful: {is_cheerful})")
113
  payload = {
114
- "text": script_text,
115
- "voice_name": voice_name,
116
- "cheerful": is_cheerful
117
  }
118
 
119
  response = requests.post(TTS_API_URL, json=payload, timeout=300)
120
- if response.status_code == 200:
121
- return response.content
122
- raise Exception(f"TTS API Error: {response.status_code} - {response.text}")
123
-
124
- def apply_editing(video_path, audio_data, editing_notes, preset_name):
125
- """Apply editing effects to video based on analysis and preset."""
126
- print(f"Applying {preset_name} editing preset...")
127
- preset = EDITING_PRESETS[preset_name]
128
 
129
- # Save audio to temp file
130
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
131
- temp_audio.write(audio_data)
132
- temp_audio_path = temp_audio.name
133
-
134
- # Load video and audio
135
- video = VideoFileClip(video_path)
136
- audio = AudioFileClip(temp_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- # Apply basic preset effects
139
- if preset.get('speed'):
140
- video = video.fx(speedx, preset['speed'])
141
 
142
- # Apply black bars for cinematic
143
- if preset.get('black_bars'):
144
- def add_black_bars(get_frame, t):
145
- frame = get_frame(t)
146
- height, width = frame.shape[:2]
147
- new_height = int(height * 0.85)
148
- bar_size = (height - new_height) // 2
149
-
150
- # Create black image
151
- black_bar = np.zeros((bar_size, width, 3), dtype=np.uint8)
152
- processed_frame = np.vstack([black_bar, frame, black_bar])
153
- return processed_frame
154
-
155
- video = video.fl(add_black_bars)
156
 
157
- # Apply editing notes
158
- clips = []
159
- current_start = 0
160
 
161
- for note in editing_notes:
162
- if current_start >= note['timestamp']:
163
- continue
164
-
165
- clip = video.subclip(current_start, note['timestamp'])
166
-
167
- # Apply effect based on note type
168
- if note['type'] == 'slow_mo':
169
- clip = clip.fx(speedx, 0.5)
170
- elif note['type'] == 'fast_cut':
171
- clip = clip.fx(speedx, 1.5)
172
-
173
- clips.append(clip)
174
- current_start = note['timestamp']
175
 
176
- # Add remaining video
177
- if current_start < video.duration:
178
- clips.append(video.subclip(current_start))
 
179
 
180
- # Concatenate all clips
181
- final_video = concatenate_videoclips(clips)
182
- final_video = final_video.set_audio(audio)
 
 
 
 
183
 
184
- # Apply aspect ratio if specified
185
- if preset.get('aspect_ratio'):
186
- target_ratio = preset['aspect_ratio']
187
- final_video = final_video.resize(height=target_ratio[1])
188
 
189
- # Generate output path
190
- output_path = os.path.join(app.config['DOWNLOAD_FOLDER'], f"edited_{os.path.basename(video_path)}")
191
- final_video.write_videofile(
192
- output_path,
193
- codec="libx264",
194
- audio_codec="aac",
195
- threads=4,
196
- preset='fast'
197
- )
198
 
199
- # Cleanup
200
- video.close()
201
- audio.close()
202
- os.unlink(temp_audio_path)
 
 
203
 
204
- return output_path
205
-
206
- # --- 4. FLASK ROUTES ---
207
-
208
- @app.route('/', methods=['GET'])
209
- def index():
210
- """Render the main upload page."""
211
- return render_template('index.html', voices=VOICE_CHOICES, presets=EDITING_PRESETS.keys())
212
-
213
- @app.route('/process', methods=['POST'])
214
- def process_video():
215
- """Handle video upload and processing."""
216
- input_video_path = None
217
 
218
- try:
219
- # Validate file upload
220
- if 'video' not in request.files or request.files['video'].filename == '':
221
- flash("Please upload a video file.", "error")
222
- return render_template('index.html',
223
- voices=VOICE_CHOICES,
224
- presets=EDITING_PRESETS.keys())
225
-
226
- # Save uploaded file
227
- file = request.files['video']
228
- filename = secure_filename(file.filename)
229
- input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
230
- file.save(input_video_path)
231
-
232
- # Get processing options
233
- voice_choice = request.form.get('voice', 'Charon')
234
- is_cheerful = request.form.get('tone') == 'on'
235
- preset_name = request.form.get('preset', 'fast_cuts')
236
-
237
- # Analyze video
238
- analysis = analyze_video(input_video_path)
239
- script = analysis.get('script', '')
240
- editing_notes = analysis.get('editing_notes', [])
241
-
242
- # Generate audio
243
- audio_data = generate_audio(script, voice_choice, is_cheerful)
244
-
245
- # Apply editing and generate final video
246
- final_video_path = apply_editing(input_video_path, audio_data, editing_notes, preset_name)
247
-
248
- return jsonify({
249
- 'status': 'success',
250
- 'video_url': url_for('serve_video', filename=os.path.basename(final_video_path)),
251
- 'script': script
252
- })
253
-
254
- except Exception as e:
255
- print(f"Processing error: {str(e)}")
256
- return jsonify({
257
- 'status': 'error',
258
- 'message': str(e)
259
- }), 500
260
-
261
- finally:
262
- # Clean up uploaded file
263
- if input_video_path and os.path.exists(input_video_path):
264
- os.remove(input_video_path)
265
 
266
- @app.route('/downloads/<filename>')
267
- def serve_video(filename):
268
- """Serve the processed video file."""
269
  return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
270
 
271
- # --- 5. APPLICATION ENTRY POINT ---
272
  if __name__ == '__main__':
273
  app.run(host="0.0.0.0", port=7860)
 
7
  from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
8
  from moviepy.video.io.VideoFileClip import VideoFileClip
9
  from moviepy.audio.io.AudioFileClip import AudioFileClip
 
 
10
  from werkzeug.utils import secure_filename
11
  from dotenv import load_dotenv
12
+ import threading
13
+ from datetime import datetime, timedelta
14
 
15
+ # Initialize Flask app and load secrets
16
  load_dotenv()
17
  app = Flask(__name__)
18
 
19
+ # Configuration
20
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
  TTS_API_URL = os.getenv("TTS_API_URL")
22
 
23
+ if not GEMINI_API_KEY or not TTS_API_URL:
24
+ raise ValueError("Missing required environment variables")
 
 
 
25
 
 
26
  genai.configure(api_key=GEMINI_API_KEY)
27
 
28
+ # File storage setup
29
  UPLOAD_FOLDER = 'uploads'
30
  DOWNLOAD_FOLDER = 'downloads'
31
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
32
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
33
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
34
  app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
35
+ app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB
36
+ app.secret_key = os.urandom(24)
37
 
38
+ # Processing status tracking
39
+ processing_status = {}
40
+ processing_times = {
41
+ 'upload': 0,
42
+ 'transcription': 0,
43
+ 'tts': 0,
44
+ 'dubbing': 0
45
+ }
46
+
47
+ # Voice options
48
  VOICE_CHOICES = {
49
  "Male (Charon)": "Charon",
50
  "Female (Zephyr)": "Zephyr"
51
  }
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  GEMINI_PROMPT = """
54
+ You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 
 
 
55
 
56
+ **CRITICAL INSTRUCTIONS:**
57
+ 1. Combine all dialogue into one continuous script.
58
+ 2. NO timestamps or speaker labels.
59
+ 3. Add performance directions (e.g., `Say happily:`, `[laugh]`) directly in the text.
 
 
 
 
60
  """
61
 
62
+ def track_processing_time(task_id, stage, duration):
63
+ """Track processing times for each stage"""
64
+ processing_times[stage] = duration
65
+ if task_id in processing_status:
66
+ processing_status[task_id]['timings'][stage] = duration
67
+
68
+ def estimate_remaining_time(task_id):
69
+ """Estimate remaining processing time"""
70
+ if task_id not in processing_status:
71
+ return "Calculating..."
72
+
73
+ status = processing_status[task_id]
74
+ completed_stages = [s for s in status['timings'] if status['timings'][s] is not None]
75
+
76
+ if len(completed_stages) == 0:
77
+ return "Starting soon..."
78
+
79
+ avg_time = sum(status['timings'][s] for s in completed_stages) / len(completed_stages)
80
+ remaining_stages = 4 - len(completed_stages) # Total stages: upload, transcription, tts, dubbing
81
+ return remaining_stages * avg_time
82
+
83
+ def process_video_background(task_id, video_path, voice, cheerful):
84
+ """Background processing function"""
85
+ try:
86
+ start_time = time.time()
87
+ processing_status[task_id] = {
88
+ 'status': 'processing',
89
+ 'progress': 0,
90
+ 'message': 'Starting transcription',
91
+ 'timings': {'upload': None, 'transcription': None, 'tts': None, 'dubbing': None},
92
+ 'start_time': start_time
93
+ }
94
+
95
+ # Stage 1: Transcription
96
+ processing_status[task_id]['message'] = 'Transcribing video content'
97
+ script_start = time.time()
98
+ script = generate_tamil_script(video_path)
99
+ transcription_time = time.time() - script_start
100
+ track_processing_time(task_id, 'transcription', transcription_time)
101
+ processing_status[task_id]['progress'] = 25
102
+
103
+ # Stage 2: TTS Generation
104
+ processing_status[task_id]['message'] = 'Generating audio narration'
105
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
106
+ audio_path = temp_audio.name
107
+
108
+ tts_start = time.time()
109
+ generate_audio_track(script, voice, cheerful, audio_path)
110
+ tts_time = time.time() - tts_start
111
+ track_processing_time(task_id, 'tts', tts_time)
112
+ processing_status[task_id]['progress'] = 50
113
+
114
+ # Stage 3: Dubbing
115
+ processing_status[task_id]['message'] = 'Creating dubbed video'
116
+ final_filename = f"dubbed_{task_id}.mp4"
117
+ final_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_filename)
118
+
119
+ dubbing_start = time.time()
120
+ replace_video_audio(video_path, audio_path, final_path)
121
+ dubbing_time = time.time() - dubbing_start
122
+ track_processing_time(task_id, 'dubbing', dubbing_time)
123
+ processing_status[task_id]['progress'] = 75
124
+
125
+ # Cleanup
126
+ os.unlink(audio_path)
127
+ os.unlink(video_path)
128
+
129
+ # Finalize
130
+ processing_status[task_id].update({
131
+ 'status': 'complete',
132
+ 'progress': 100,
133
+ 'message': 'Processing complete',
134
+ 'result_path': final_path,
135
+ 'script': script,
136
+ 'end_time': time.time()
137
+ })
138
+
139
+ except Exception as e:
140
+ processing_status[task_id].update({
141
+ 'status': 'error',
142
+ 'message': f'Error: {str(e)}'
143
+ })
144
+ raise
145
 
146
+ def generate_tamil_script(video_path):
147
+ """Generate Tamil script using Gemini"""
 
148
  video_file = genai.upload_file(video_path, mime_type="video/mp4")
149
 
 
150
  while video_file.state.name == "PROCESSING":
151
  time.sleep(5)
152
  video_file = genai.get_file(video_file.name)
153
 
154
  if video_file.state.name != "ACTIVE":
155
+ raise Exception(f"Gemini processing failed: {video_file.state.name}")
156
 
157
  model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
158
  response = model.generate_content([GEMINI_PROMPT, video_file])
159
  genai.delete_file(video_file.name)
160
 
161
  if hasattr(response, 'text') and response.text:
162
+ return " ".join(response.text.strip().splitlines())
163
+ raise Exception("No valid script generated")
 
 
 
164
 
165
+ def generate_audio_track(text, voice, cheerful, output_path):
166
+ """Generate audio using TTS API"""
 
167
  payload = {
168
+ "text": text,
169
+ "voice_name": voice,
170
+ "cheerful": cheerful
171
  }
172
 
173
  response = requests.post(TTS_API_URL, json=payload, timeout=300)
174
+ if response.status_code != 200:
175
+ raise Exception(f"TTS API error: {response.status_code}")
 
 
 
 
 
 
176
 
177
+ with open(output_path, "wb") as f:
178
+ f.write(response.content)
179
+
180
+ def replace_video_audio(video_path, audio_path, output_path):
181
+ """Replace video audio track"""
182
+ video = AudioFileClip = None
183
+ try:
184
+ video = VideoFileClip(video_path)
185
+ audio = AudioFileClip(audio_path)
186
+ video.audio = audio
187
+ video.write_videofile(
188
+ output_path,
189
+ codec="libx264",
190
+ audio_codec="aac",
191
+ logger=None,
192
+ threads=4
193
+ )
194
+ finally:
195
+ if video:
196
+ video.close()
197
+ if audio:
198
+ audio.close()
199
+
200
+ @app.route('/')
201
+ def index():
202
+ """Main page"""
203
+ return render_template('index.html', voices=VOICE_CHOICES)
204
+
205
+ @app.route('/upload', methods=['POST'])
206
+ def upload_video():
207
+ """Handle video upload and start processing"""
208
+ if 'video' not in request.files:
209
+ return jsonify({'error': 'No file uploaded'}), 400
210
 
211
+ file = request.files['video']
212
+ if file.filename == '':
213
+ return jsonify({'error': 'No file selected'}), 400
214
 
215
+ # Generate unique task ID
216
+ task_id = str(uuid.uuid4())
217
+ filename = secure_filename(f"{task_id}_{file.filename}")
218
+ video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
219
+ file.save(video_path)
 
 
 
 
 
 
 
 
 
220
 
221
+ # Get processing options
222
+ voice = request.form.get('voice', 'Charon')
223
+ cheerful = request.form.get('cheerful', 'false') == 'true'
224
 
225
+ # Start background processing
226
+ processing_status[task_id] = {
227
+ 'status': 'uploaded',
228
+ 'progress': 0,
229
+ 'message': 'Starting processing',
230
+ 'timings': {'upload': time.time(), 'transcription': None, 'tts': None, 'dubbing': None},
231
+ 'start_time': time.time()
232
+ }
 
 
 
 
 
 
233
 
234
+ thread = threading.Thread(
235
+ target=process_video_background,
236
+ args=(task_id, video_path, voice, cheerful)
237
+ thread.start()
238
 
239
+ return jsonify({'task_id': task_id})
240
+
241
+ @app.route('/status/<task_id>')
242
+ def get_status(task_id):
243
+ """Check processing status"""
244
+ if task_id not in processing_status:
245
+ return jsonify({'error': 'Invalid task ID'}), 404
246
 
247
+ status = processing_status[task_id]
 
 
 
248
 
249
+ # Calculate ETA if processing
250
+ eta = None
251
+ if status['status'] == 'processing':
252
+ elapsed = time.time() - status['start_time']
253
+ remaining = estimate_remaining_time(task_id)
254
+ if isinstance(remaining, (int, float)):
255
+ eta = str(timedelta(seconds=int(remaining)))
 
 
256
 
257
+ response = {
258
+ 'status': status['status'],
259
+ 'progress': status.get('progress', 0),
260
+ 'message': status.get('message', ''),
261
+ 'eta': eta
262
+ }
263
 
264
+ if status['status'] == 'complete':
265
+ response['result_url'] = url_for('download', filename=os.path.basename(status['result_path']))
266
+ response['script'] = status.get('script', '')
 
 
 
 
 
 
 
 
 
 
267
 
268
+ return jsonify(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
+ @app.route('/download/<filename>')
271
+ def download(filename):
272
+ """Serve processed video"""
273
  return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
274
 
 
275
  if __name__ == '__main__':
276
  app.run(host="0.0.0.0", port=7860)