Athspi commited on
Commit
c3c3d92
·
verified ·
1 Parent(s): 8dd1a38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -205
app.py CHANGED
@@ -4,47 +4,40 @@ import tempfile
4
  import uuid
5
  import google.generativeai as genai
6
  import requests
7
- from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
8
  from moviepy.video.io.VideoFileClip import VideoFileClip
9
  from moviepy.audio.io.AudioFileClip import AudioFileClip
10
  from werkzeug.utils import secure_filename
11
  from dotenv import load_dotenv
12
- import threading
13
- from datetime import datetime, timedelta
14
 
15
- # Initialize Flask app and load secrets
16
  load_dotenv()
17
  app = Flask(__name__)
18
 
19
- # Configuration
20
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
  TTS_API_URL = os.getenv("TTS_API_URL")
22
 
23
- if not GEMINI_API_KEY or not TTS_API_URL:
24
- raise ValueError("Missing required environment variables")
 
 
 
25
 
 
26
  genai.configure(api_key=GEMINI_API_KEY)
27
 
28
- # File storage setup
29
  UPLOAD_FOLDER = 'uploads'
30
  DOWNLOAD_FOLDER = 'downloads'
31
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
32
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
33
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
34
  app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
35
- app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB
36
- app.secret_key = os.urandom(24)
37
 
38
- # Processing status tracking
39
- processing_status = {}
40
- processing_times = {
41
- 'upload': 0,
42
- 'transcription': 0,
43
- 'tts': 0,
44
- 'dubbing': 0
45
- }
46
-
47
- # Voice options
48
  VOICE_CHOICES = {
49
  "Male (Charon)": "Charon",
50
  "Female (Zephyr)": "Zephyr"
@@ -54,223 +47,144 @@ GEMINI_PROMPT = """
54
  You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
55
 
56
  **CRITICAL INSTRUCTIONS:**
57
- 1. Combine all dialogue into one continuous script.
58
- 2. NO timestamps or speaker labels.
59
- 3. Add performance directions (e.g., `Say happily:`, `[laugh]`) directly in the text.
60
- """
61
-
62
- def track_processing_time(task_id, stage, duration):
63
- """Track processing times for each stage"""
64
- processing_times[stage] = duration
65
- if task_id in processing_status:
66
- processing_status[task_id]['timings'][stage] = duration
67
 
68
- def estimate_remaining_time(task_id):
69
- """Estimate remaining processing time"""
70
- if task_id not in processing_status:
71
- return "Calculating..."
72
-
73
- status = processing_status[task_id]
74
- completed_stages = [s for s in status['timings'] if status['timings'][s] is not None]
75
-
76
- if len(completed_stages) == 0:
77
- return "Starting soon..."
78
-
79
- avg_time = sum(status['timings'][s] for s in completed_stages) / len(completed_stages)
80
- remaining_stages = 4 - len(completed_stages) # Total stages: upload, transcription, tts, dubbing
81
- return remaining_stages * avg_time
82
-
83
- def process_video_background(task_id, video_path, voice, cheerful):
84
- """Background processing function"""
85
- try:
86
- start_time = time.time()
87
- processing_status[task_id] = {
88
- 'status': 'processing',
89
- 'progress': 0,
90
- 'message': 'Starting transcription',
91
- 'timings': {'upload': None, 'transcription': None, 'tts': None, 'dubbing': None},
92
- 'start_time': start_time
93
- }
94
-
95
- # Stage 1: Transcription
96
- processing_status[task_id]['message'] = 'Transcribing video content'
97
- script_start = time.time()
98
- script = generate_tamil_script(video_path)
99
- transcription_time = time.time() - script_start
100
- track_processing_time(task_id, 'transcription', transcription_time)
101
- processing_status[task_id]['progress'] = 25
102
-
103
- # Stage 2: TTS Generation
104
- processing_status[task_id]['message'] = 'Generating audio narration'
105
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
106
- audio_path = temp_audio.name
107
-
108
- tts_start = time.time()
109
- generate_audio_track(script, voice, cheerful, audio_path)
110
- tts_time = time.time() - tts_start
111
- track_processing_time(task_id, 'tts', tts_time)
112
- processing_status[task_id]['progress'] = 50
113
-
114
- # Stage 3: Dubbing
115
- processing_status[task_id]['message'] = 'Creating dubbed video'
116
- final_filename = f"dubbed_{task_id}.mp4"
117
- final_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_filename)
118
-
119
- dubbing_start = time.time()
120
- replace_video_audio(video_path, audio_path, final_path)
121
- dubbing_time = time.time() - dubbing_start
122
- track_processing_time(task_id, 'dubbing', dubbing_time)
123
- processing_status[task_id]['progress'] = 75
124
-
125
- # Cleanup
126
- os.unlink(audio_path)
127
- os.unlink(video_path)
128
-
129
- # Finalize
130
- processing_status[task_id].update({
131
- 'status': 'complete',
132
- 'progress': 100,
133
- 'message': 'Processing complete',
134
- 'result_path': final_path,
135
- 'script': script,
136
- 'end_time': time.time()
137
- })
138
 
139
- except Exception as e:
140
- processing_status[task_id].update({
141
- 'status': 'error',
142
- 'message': f'Error: {str(e)}'
143
- })
144
- raise
145
 
146
- def generate_tamil_script(video_path):
147
- """Generate Tamil script using Gemini"""
148
- video_file = genai.upload_file(video_path, mime_type="video/mp4")
 
149
 
 
150
  while video_file.state.name == "PROCESSING":
151
  time.sleep(5)
152
  video_file = genai.get_file(video_file.name)
153
 
154
  if video_file.state.name != "ACTIVE":
155
- raise Exception(f"Gemini processing failed: {video_file.state.name}")
156
 
157
- model = genai.GenerativeModel(model_name="models/gemini-2.5-flash")
 
158
  response = model.generate_content([GEMINI_PROMPT, video_file])
159
  genai.delete_file(video_file.name)
160
 
161
  if hasattr(response, 'text') and response.text:
162
  return " ".join(response.text.strip().splitlines())
163
- raise Exception("No valid script generated")
164
 
165
- def generate_audio_track(text, voice, cheerful, output_path):
166
- """Generate audio using TTS API"""
 
167
  payload = {
168
- "text": text,
169
- "voice_name": voice,
170
- "cheerful": cheerful
171
  }
172
 
173
  response = requests.post(TTS_API_URL, json=payload, timeout=300)
174
- if response.status_code != 200:
175
- raise Exception(f"TTS API error: {response.status_code}")
 
 
 
 
 
 
 
 
 
176
 
177
- with open(output_path, "wb") as f:
178
- f.write(response.content)
179
-
180
- def replace_video_audio(video_path, audio_path, output_path):
181
- """Replace video audio track"""
182
- video = AudioFileClip = None
183
  try:
184
- video = VideoFileClip(video_path)
185
- audio = AudioFileClip(audio_path)
186
- video.audio = audio
187
- video.write_videofile(
188
  output_path,
189
  codec="libx264",
190
  audio_codec="aac",
191
- logger=None,
192
- threads=4
193
  )
194
  finally:
195
- if video:
196
- video.close()
197
- if audio:
198
- audio.close()
 
 
199
 
200
- @app.route('/')
201
  def index():
202
- """Main page"""
203
  return render_template('index.html', voices=VOICE_CHOICES)
204
 
205
- @app.route('/upload', methods=['POST'])
206
- def upload_video():
207
- """Handle video upload and start processing"""
208
- if 'video' not in request.files:
209
- return jsonify({'error': 'No file uploaded'}), 400
210
-
211
- file = request.files['video']
212
- if file.filename == '':
213
- return jsonify({'error': 'No file selected'}), 400
214
 
215
- # Generate unique task ID
216
- task_id = str(uuid.uuid4())
217
- filename = secure_filename(f"{task_id}_{file.filename}")
218
- video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
219
- file.save(video_path)
220
-
221
- # Get processing options
222
- voice = request.form.get('voice', 'Charon')
223
- cheerful = request.form.get('cheerful', 'false') == 'true'
224
-
225
- # Start background processing
226
- processing_status[task_id] = {
227
- 'status': 'uploaded',
228
- 'progress': 0,
229
- 'message': 'Starting processing',
230
- 'timings': {'upload': time.time(), 'transcription': None, 'tts': None, 'dubbing': None},
231
- 'start_time': time.time()
232
- }
233
-
234
- thread = threading.Thread(
235
- target=process_video_background,
236
- args=(task_id, video_path, voice, cheerful)
237
- thread.start()
238
-
239
- return jsonify({'task_id': task_id})
240
-
241
- @app.route('/status/<task_id>')
242
- def get_status(task_id):
243
- """Check processing status"""
244
- if task_id not in processing_status:
245
- return jsonify({'error': 'Invalid task ID'}), 404
246
-
247
- status = processing_status[task_id]
248
-
249
- # Calculate ETA if processing
250
- eta = None
251
- if status['status'] == 'processing':
252
- elapsed = time.time() - status['start_time']
253
- remaining = estimate_remaining_time(task_id)
254
- if isinstance(remaining, (int, float)):
255
- eta = str(timedelta(seconds=int(remaining)))
256
-
257
- response = {
258
- 'status': status['status'],
259
- 'progress': status.get('progress', 0),
260
- 'message': status.get('message', ''),
261
- 'eta': eta
262
- }
263
-
264
- if status['status'] == 'complete':
265
- response['result_url'] = url_for('download', filename=os.path.basename(status['result_path']))
266
- response['script'] = status.get('script', '')
267
-
268
- return jsonify(response)
269
-
270
- @app.route('/download/<filename>')
271
- def download(filename):
272
- """Serve processed video"""
273
  return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
274
 
 
275
  if __name__ == '__main__':
276
- app.run(host="0.0.0.0", port=7860, threaded=True)
 
4
  import uuid
5
  import google.generativeai as genai
6
  import requests
7
+ from flask import Flask, request, render_template, send_from_directory, url_for, flash
8
  from moviepy.video.io.VideoFileClip import VideoFileClip
9
  from moviepy.audio.io.AudioFileClip import AudioFileClip
10
  from werkzeug.utils import secure_filename
11
  from dotenv import load_dotenv
 
 
12
 
13
+ # --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
14
  load_dotenv()
15
  app = Flask(__name__)
16
 
17
+ # Load secrets from environment variables
18
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
19
  TTS_API_URL = os.getenv("TTS_API_URL")
20
 
21
+ # Validate required configurations
22
+ if not GEMINI_API_KEY:
23
+ raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
24
+ if not TTS_API_URL:
25
+ raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
26
 
27
+ # Configure Gemini AI
28
  genai.configure(api_key=GEMINI_API_KEY)
29
 
30
+ # Configure directories
31
  UPLOAD_FOLDER = 'uploads'
32
  DOWNLOAD_FOLDER = 'downloads'
33
  os.makedirs(UPLOAD_FOLDER, exist_ok=True)
34
  os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
35
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
36
  app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
37
+ app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB upload limit
38
+ app.secret_key = os.urandom(24) # Secure key for flash messages
39
 
40
+ # --- 2. APPLICATION CONFIGURATION ---
 
 
 
 
 
 
 
 
 
41
  VOICE_CHOICES = {
42
  "Male (Charon)": "Charon",
43
  "Female (Zephyr)": "Zephyr"
 
47
  You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
48
 
49
  **CRITICAL INSTRUCTIONS:**
50
+ 1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
51
+ 2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
52
+ 3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
 
 
 
 
 
 
 
53
 
54
+ **EXAMPLE OUTPUT:**
55
+ Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
56
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # --- 3. CORE APPLICATION FUNCTIONS ---
 
 
 
 
 
59
 
60
+ def generate_tamil_script(video_file_path):
61
+ """Generates a Tamil script from the video using Gemini AI."""
62
+ print("Uploading video to Gemini for transcription...")
63
+ video_file = genai.upload_file(video_file_path, mime_type="video/mp4")
64
 
65
+ # Wait for file processing
66
  while video_file.state.name == "PROCESSING":
67
  time.sleep(5)
68
  video_file = genai.get_file(video_file.name)
69
 
70
  if video_file.state.name != "ACTIVE":
71
+ raise Exception(f"Gemini file processing failed: {video_file.state.name}")
72
 
73
+ print("Generating script...")
74
+ model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
75
  response = model.generate_content([GEMINI_PROMPT, video_file])
76
  genai.delete_file(video_file.name)
77
 
78
  if hasattr(response, 'text') and response.text:
79
  return " ".join(response.text.strip().splitlines())
80
+ raise Exception("No valid script was generated by Gemini.")
81
 
82
+ def generate_audio_track(script_text, voice_name, is_cheerful, output_path):
83
+ """Generates audio from script using TTS API."""
84
+ print(f"Generating audio (Voice: {voice_name}, Cheerful: {is_cheerful})")
85
  payload = {
86
+ "text": script_text,
87
+ "voice_name": voice_name,
88
+ "cheerful": is_cheerful
89
  }
90
 
91
  response = requests.post(TTS_API_URL, json=payload, timeout=300)
92
+ if response.status_code == 200:
93
+ with open(output_path, "wb") as f:
94
+ f.write(response.content)
95
+ return True
96
+ raise Exception(f"TTS API Error: {response.status_code} - {response.text}")
97
+
98
+ def replace_video_audio(video_path, new_audio_path, output_path):
99
+ """Replaces the audio track of a video file."""
100
+ print("Replacing video audio...")
101
+ video_clip = None
102
+ audio_clip = None
103
 
 
 
 
 
 
 
104
  try:
105
+ video_clip = VideoFileClip(video_path)
106
+ audio_clip = AudioFileClip(new_audio_path)
107
+ video_clip.audio = audio_clip
108
+ video_clip.write_videofile(
109
  output_path,
110
  codec="libx264",
111
  audio_codec="aac",
112
+ logger='bar'
 
113
  )
114
  finally:
115
+ if audio_clip:
116
+ audio_clip.close()
117
+ if video_clip:
118
+ video_clip.close()
119
+
120
+ # --- 4. FLASK ROUTES ---
121
 
122
+ @app.route('/', methods=['GET'])
123
  def index():
124
+ """Render the main upload page."""
125
  return render_template('index.html', voices=VOICE_CHOICES)
126
 
127
+ @app.route('/process', methods=['POST'])
128
+ def process_video():
129
+ """Handle video upload and processing."""
130
+ input_video_path = None
131
+ temp_audio_path = None
 
 
 
 
132
 
133
+ try:
134
+ # Validate file upload
135
+ if 'video' not in request.files or request.files['video'].filename == '':
136
+ flash("Please upload a video file.", "error")
137
+ return render_template('index.html', voices=VOICE_CHOICES)
138
+
139
+ # Save uploaded file
140
+ file = request.files['video']
141
+ filename = secure_filename(file.filename)
142
+ input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
143
+ file.save(input_video_path)
144
+
145
+ # Get processing options
146
+ voice_choice = request.form.get('voice', 'Charon')
147
+ is_cheerful = request.form.get('tone') == 'on'
148
+
149
+ # Generate script and audio
150
+ script = generate_tamil_script(input_video_path)
151
+
152
+ # Create temporary audio file
153
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
154
+ temp_audio_path = temp_audio.name
155
+
156
+ generate_audio_track(script, voice_choice, is_cheerful, temp_audio_path)
157
+
158
+ # Create dubbed video
159
+ final_video_name = f"dubbed_{filename}"
160
+ final_video_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_video_name)
161
+ replace_video_audio(input_video_path, temp_audio_path, final_video_path)
162
+
163
+ flash("Video processing complete!", "success")
164
+ return render_template(
165
+ 'index.html',
166
+ voices=VOICE_CHOICES,
167
+ result_video=url_for('serve_video', filename=final_video_name),
168
+ script=script
169
+ )
170
+
171
+ except Exception as e:
172
+ print(f"Processing error: {str(e)}")
173
+ flash(f"An error occurred: {str(e)}", "error")
174
+ return render_template('index.html', voices=VOICE_CHOICES)
175
+
176
+ finally:
177
+ # Clean up temporary files
178
+ if input_video_path and os.path.exists(input_video_path):
179
+ os.remove(input_video_path)
180
+ if temp_audio_path and os.path.exists(temp_audio_path):
181
+ os.remove(temp_audio_path)
182
+
183
+ @app.route('/downloads/<filename>')
184
+ def serve_video(filename):
185
+ """Serve the processed video file."""
 
 
 
 
 
186
  return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
187
 
188
+ # --- 5. APPLICATION ENTRY POINT ---
189
  if __name__ == '__main__':
190
+ app.run(host="0.0.0.0", port=7860)