Update app.py
Browse files
app.py
CHANGED
@@ -26,6 +26,7 @@ TTS_API_URL = os.getenv("TTS_API_URL", "") # Optional
|
|
26 |
MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB
|
27 |
MAX_TTS_RETRIES = 3
|
28 |
TTS_CHUNK_SIZE = 2000 # Characters per chunk
|
|
|
29 |
|
30 |
# File storage setup
|
31 |
UPLOAD_FOLDER = 'uploads'
|
@@ -56,15 +57,9 @@ VOICE_TYPES = {
|
|
56 |
|
57 |
GEMINI_PROMPTS = {
|
58 |
"api": """
|
59 |
-
You are an AI scriptwriter.
|
60 |
-
|
61 |
-
|
62 |
-
1. **Single Script:** Combine all dialogue into one continuous script.
|
63 |
-
2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
|
64 |
-
3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
|
65 |
-
|
66 |
-
**EXAMPLE OUTPUT:**
|
67 |
-
Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்
|
68 |
""",
|
69 |
"gtts": """
|
70 |
You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
|
@@ -150,25 +145,49 @@ def generate_tts_audio(text, language_code, voice_type, tts_provider):
|
|
150 |
combined_audio.seek(0)
|
151 |
return combined_audio
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
def generate_transcription(video_path, prompt):
|
154 |
-
"""Generate transcript using Gemini with
|
155 |
max_retries = 3
|
156 |
for attempt in range(max_retries):
|
157 |
try:
|
|
|
158 |
video_file = genai.upload_file(video_path, mime_type="video/mp4")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
model = genai.GenerativeModel("models/gemini-2.5-flash")
|
160 |
response = model.generate_content([prompt, video_file])
|
161 |
-
genai.delete_file(video_file.name)
|
162 |
|
163 |
if hasattr(response, 'text'):
|
164 |
return response.text.strip()
|
165 |
raise Exception("No valid transcription generated")
|
166 |
|
167 |
except Exception as e:
|
|
|
168 |
if attempt == max_retries - 1:
|
169 |
raise
|
170 |
-
logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
|
171 |
time.sleep(5 * (attempt + 1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
def dub_video(video_path, audio_buffer):
|
174 |
"""Dub video with new audio"""
|
@@ -215,10 +234,13 @@ def dub_video(video_path, audio_buffer):
|
|
215 |
if audio:
|
216 |
audio.close()
|
217 |
if temp_audio_path and os.path.exists(temp_audio_path):
|
218 |
-
|
|
|
|
|
|
|
219 |
|
220 |
def process_video_background(task_id, video_path, language, voice_type, tts_provider):
|
221 |
-
"""Background video processing"""
|
222 |
try:
|
223 |
processing_status[task_id] = {
|
224 |
'status': 'processing',
|
@@ -251,11 +273,14 @@ def process_video_background(task_id, video_path, language, voice_type, tts_prov
|
|
251 |
processing_status[task_id]['status'] = 'error'
|
252 |
processing_status[task_id]['message'] = str(e)
|
253 |
logger.error(f"Processing failed: {str(e)}")
|
254 |
-
|
255 |
finally:
|
256 |
-
# Cleanup
|
257 |
if os.path.exists(video_path):
|
258 |
-
|
|
|
|
|
|
|
|
|
259 |
|
260 |
@app.route('/')
|
261 |
def index():
|
|
|
26 |
MAX_CONTENT_LENGTH = 500 * 1024 * 1024 # 500MB
|
27 |
MAX_TTS_RETRIES = 3
|
28 |
TTS_CHUNK_SIZE = 2000 # Characters per chunk
|
29 |
+
MAX_WAIT_TIME = 300 # 5 minutes max wait for file processing
|
30 |
|
31 |
# File storage setup
|
32 |
UPLOAD_FOLDER = 'uploads'
|
|
|
57 |
|
58 |
GEMINI_PROMPTS = {
|
59 |
"api": """
|
60 |
+
You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
|
61 |
+
CONTINUOUS block of modern {language}. Include natural speech patterns and
|
62 |
+
performance directions (e.g., [pause], [laugh]) where appropriate.
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
""",
|
64 |
"gtts": """
|
65 |
You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
|
|
|
145 |
combined_audio.seek(0)
|
146 |
return combined_audio
|
147 |
|
148 |
+
def wait_for_file_processing(file):
|
149 |
+
"""Wait for file to be processed with timeout handling"""
|
150 |
+
start_time = time.time()
|
151 |
+
while file.state.name == "PROCESSING":
|
152 |
+
if time.time() - start_time > MAX_WAIT_TIME:
|
153 |
+
raise TimeoutError("File processing timed out")
|
154 |
+
time.sleep(5)
|
155 |
+
file = genai.get_file(file.name)
|
156 |
+
return file
|
157 |
+
|
158 |
def generate_transcription(video_path, prompt):
|
159 |
+
"""Generate transcript using Gemini with enhanced file handling"""
|
160 |
max_retries = 3
|
161 |
for attempt in range(max_retries):
|
162 |
try:
|
163 |
+
# Upload file with explicit timeout
|
164 |
video_file = genai.upload_file(video_path, mime_type="video/mp4")
|
165 |
+
|
166 |
+
# Wait for processing with timeout
|
167 |
+
video_file = wait_for_file_processing(video_file)
|
168 |
+
|
169 |
+
if video_file.state.name != "ACTIVE":
|
170 |
+
raise Exception(f"File processing failed: {video_file.state.name}")
|
171 |
+
|
172 |
model = genai.GenerativeModel("models/gemini-2.5-flash")
|
173 |
response = model.generate_content([prompt, video_file])
|
|
|
174 |
|
175 |
if hasattr(response, 'text'):
|
176 |
return response.text.strip()
|
177 |
raise Exception("No valid transcription generated")
|
178 |
|
179 |
except Exception as e:
|
180 |
+
logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
|
181 |
if attempt == max_retries - 1:
|
182 |
raise
|
|
|
183 |
time.sleep(5 * (attempt + 1))
|
184 |
+
finally:
|
185 |
+
# Always delete the file if it was created
|
186 |
+
if 'video_file' in locals() and hasattr(video_file, 'name'):
|
187 |
+
try:
|
188 |
+
genai.delete_file(video_file.name)
|
189 |
+
except Exception as delete_error:
|
190 |
+
logger.error(f"Failed to delete file: {str(delete_error)}")
|
191 |
|
192 |
def dub_video(video_path, audio_buffer):
|
193 |
"""Dub video with new audio"""
|
|
|
234 |
if audio:
|
235 |
audio.close()
|
236 |
if temp_audio_path and os.path.exists(temp_audio_path):
|
237 |
+
try:
|
238 |
+
os.unlink(temp_audio_path)
|
239 |
+
except Exception as e:
|
240 |
+
logger.error(f"Failed to delete temp audio: {str(e)}")
|
241 |
|
242 |
def process_video_background(task_id, video_path, language, voice_type, tts_provider):
|
243 |
+
"""Background video processing with enhanced error handling"""
|
244 |
try:
|
245 |
processing_status[task_id] = {
|
246 |
'status': 'processing',
|
|
|
273 |
processing_status[task_id]['status'] = 'error'
|
274 |
processing_status[task_id]['message'] = str(e)
|
275 |
logger.error(f"Processing failed: {str(e)}")
|
|
|
276 |
finally:
|
277 |
+
# Cleanup original video
|
278 |
if os.path.exists(video_path):
|
279 |
+
try:
|
280 |
+
os.unlink(video_path)
|
281 |
+
except Exception as e:
|
282 |
+
logger.error(f"Failed to delete video: {str(e)}")
|
283 |
+
|
284 |
|
285 |
@app.route('/')
|
286 |
def index():
|