Athspi commited on
Commit
be0d1aa
·
verified ·
1 Parent(s): 655ec8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -17
app.py CHANGED
@@ -26,6 +26,7 @@ TTS_API_URL = os.getenv("TTS_API_URL", "") # Optional
26
  MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB
27
  MAX_TTS_RETRIES = 3
28
  TTS_CHUNK_SIZE = 2000 # Characters per chunk
 
29
 
30
  # File storage setup
31
  UPLOAD_FOLDER = 'uploads'
@@ -56,15 +57,9 @@ VOICE_TYPES = {
56
 
57
  GEMINI_PROMPTS = {
58
  "api": """
59
- You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
60
-
61
- **CRITICAL INSTRUCTIONS:**
62
- 1. **Single Script:** Combine all dialogue into one continuous script.
63
- 2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
64
- 3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
65
-
66
- **EXAMPLE OUTPUT:**
67
- Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்
68
  """,
69
  "gtts": """
70
  You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
@@ -150,25 +145,49 @@ def generate_tts_audio(text, language_code, voice_type, tts_provider):
150
  combined_audio.seek(0)
151
  return combined_audio
152
 
 
 
 
 
 
 
 
 
 
 
153
  def generate_transcription(video_path, prompt):
154
- """Generate transcript using Gemini with retry logic"""
155
  max_retries = 3
156
  for attempt in range(max_retries):
157
  try:
 
158
  video_file = genai.upload_file(video_path, mime_type="video/mp4")
 
 
 
 
 
 
 
159
  model = genai.GenerativeModel("models/gemini-2.5-flash")
160
  response = model.generate_content([prompt, video_file])
161
- genai.delete_file(video_file.name)
162
 
163
  if hasattr(response, 'text'):
164
  return response.text.strip()
165
  raise Exception("No valid transcription generated")
166
 
167
  except Exception as e:
 
168
  if attempt == max_retries - 1:
169
  raise
170
- logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
171
  time.sleep(5 * (attempt + 1))
 
 
 
 
 
 
 
172
 
173
  def dub_video(video_path, audio_buffer):
174
  """Dub video with new audio"""
@@ -215,10 +234,13 @@ def dub_video(video_path, audio_buffer):
215
  if audio:
216
  audio.close()
217
  if temp_audio_path and os.path.exists(temp_audio_path):
218
- os.unlink(temp_audio_path)
 
 
 
219
 
220
  def process_video_background(task_id, video_path, language, voice_type, tts_provider):
221
- """Background video processing"""
222
  try:
223
  processing_status[task_id] = {
224
  'status': 'processing',
@@ -251,11 +273,14 @@ def process_video_background(task_id, video_path, language, voice_type, tts_prov
251
  processing_status[task_id]['status'] = 'error'
252
  processing_status[task_id]['message'] = str(e)
253
  logger.error(f"Processing failed: {str(e)}")
254
-
255
  finally:
256
- # Cleanup
257
  if os.path.exists(video_path):
258
- os.unlink(video_path)
 
 
 
 
259
 
260
  @app.route('/')
261
  def index():
 
26
  MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB
27
  MAX_TTS_RETRIES = 3
28
  TTS_CHUNK_SIZE = 2000 # Characters per chunk
29
+ MAX_WAIT_TIME = 300 # 5 minutes max wait for file processing
30
 
31
  # File storage setup
32
  UPLOAD_FOLDER = 'uploads'
 
57
 
58
  GEMINI_PROMPTS = {
59
  "api": """
60
+ You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
61
+ CONTINUOUS block of modern {language}. Include natural speech patterns and
62
+ performance directions (e.g., [pause], [laugh]) where appropriate.
 
 
 
 
 
 
63
  """,
64
  "gtts": """
65
  You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
 
145
  combined_audio.seek(0)
146
  return combined_audio
147
 
148
+ def wait_for_file_processing(file):
149
+ """Wait for file to be processed with timeout handling"""
150
+ start_time = time.time()
151
+ while file.state.name == "PROCESSING":
152
+ if time.time() - start_time > MAX_WAIT_TIME:
153
+ raise TimeoutError("File processing timed out")
154
+ time.sleep(5)
155
+ file = genai.get_file(file.name)
156
+ return file
157
+
158
  def generate_transcription(video_path, prompt):
159
+ """Generate transcript using Gemini with enhanced file handling"""
160
  max_retries = 3
161
  for attempt in range(max_retries):
162
  try:
163
+ # Upload file with explicit timeout
164
  video_file = genai.upload_file(video_path, mime_type="video/mp4")
165
+
166
+ # Wait for processing with timeout
167
+ video_file = wait_for_file_processing(video_file)
168
+
169
+ if video_file.state.name != "ACTIVE":
170
+ raise Exception(f"File processing failed: {video_file.state.name}")
171
+
172
  model = genai.GenerativeModel("models/gemini-2.5-flash")
173
  response = model.generate_content([prompt, video_file])
 
174
 
175
  if hasattr(response, 'text'):
176
  return response.text.strip()
177
  raise Exception("No valid transcription generated")
178
 
179
  except Exception as e:
180
+ logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
181
  if attempt == max_retries - 1:
182
  raise
 
183
  time.sleep(5 * (attempt + 1))
184
+ finally:
185
+ # Always delete the file if it was created
186
+ if 'video_file' in locals() and hasattr(video_file, 'name'):
187
+ try:
188
+ genai.delete_file(video_file.name)
189
+ except Exception as delete_error:
190
+ logger.error(f"Failed to delete file: {str(delete_error)}")
191
 
192
  def dub_video(video_path, audio_buffer):
193
  """Dub video with new audio"""
 
234
  if audio:
235
  audio.close()
236
  if temp_audio_path and os.path.exists(temp_audio_path):
237
+ try:
238
+ os.unlink(temp_audio_path)
239
+ except Exception as e:
240
+ logger.error(f"Failed to delete temp audio: {str(e)}")
241
 
242
  def process_video_background(task_id, video_path, language, voice_type, tts_provider):
243
+ """Background video processing with enhanced error handling"""
244
  try:
245
  processing_status[task_id] = {
246
  'status': 'processing',
 
273
  processing_status[task_id]['status'] = 'error'
274
  processing_status[task_id]['message'] = str(e)
275
  logger.error(f"Processing failed: {str(e)}")
 
276
  finally:
277
+ # Cleanup original video
278
  if os.path.exists(video_path):
279
+ try:
280
+ os.unlink(video_path)
281
+ except Exception as e:
282
+ logger.error(f"Failed to delete video: {str(e)}")
283
+
284
 
285
  @app.route('/')
286
  def index():