Spaces:

Athspi-ai
/

Translate

Running

App Files Files Community

Athspi commited on 3 days ago

Commit

be0d1aa

verified ·

1 Parent(s): 655ec8f

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -17

app.py CHANGED Viewed

@@ -26,6 +26,7 @@ TTS_API_URL = os.getenv("TTS_API_URL", "")  # Optional
 MAX_CONTENT_LENGTH = 500 * 1024 * 1024  # 500MB
 MAX_TTS_RETRIES = 3
 TTS_CHUNK_SIZE = 2000  # Characters per chunk
 # File storage setup
 UPLOAD_FOLDER = 'uploads'
@@ -56,15 +57,9 @@ VOICE_TYPES = {
 GEMINI_PROMPTS = {
     "api": """
-    You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
-**CRITICAL INSTRUCTIONS:**
-1.  **Single Script:** Combine all dialogue into one continuous script.
-2.  **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
-3.  **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
-**EXAMPLE OUTPUT:**
-Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்
     """,
     "gtts": """
     You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
@@ -150,25 +145,49 @@ def generate_tts_audio(text, language_code, voice_type, tts_provider):
     combined_audio.seek(0)
     return combined_audio
 def generate_transcription(video_path, prompt):
-    """Generate transcript using Gemini with retry logic"""
     max_retries = 3
     for attempt in range(max_retries):
         try:
             video_file = genai.upload_file(video_path, mime_type="video/mp4")
             model = genai.GenerativeModel("models/gemini-2.5-flash")
             response = model.generate_content([prompt, video_file])
-            genai.delete_file(video_file.name)
             if hasattr(response, 'text'):
                 return response.text.strip()
             raise Exception("No valid transcription generated")
         except Exception as e:
             if attempt == max_retries - 1:
                 raise
-            logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
             time.sleep(5 * (attempt + 1))
 def dub_video(video_path, audio_buffer):
     """Dub video with new audio"""
@@ -215,10 +234,13 @@ def dub_video(video_path, audio_buffer):
         if audio:
             audio.close()
         if temp_audio_path and os.path.exists(temp_audio_path):
-            os.unlink(temp_audio_path)
 def process_video_background(task_id, video_path, language, voice_type, tts_provider):
-    """Background video processing"""
     try:
         processing_status[task_id] = {
             'status': 'processing',
@@ -251,11 +273,14 @@ def process_video_background(task_id, video_path, language, voice_type, tts_prov
         processing_status[task_id]['status'] = 'error'
         processing_status[task_id]['message'] = str(e)
         logger.error(f"Processing failed: {str(e)}")
     finally:
-        # Cleanup
         if os.path.exists(video_path):
-            os.unlink(video_path)
 @app.route('/')
 def index():

 MAX_CONTENT_LENGTH = 500 * 1024 * 1024  # 500MB
 MAX_TTS_RETRIES = 3
 TTS_CHUNK_SIZE = 2000  # Characters per chunk
+MAX_WAIT_TIME = 300  # 5 minutes max wait for file processing
 # File storage setup
 UPLOAD_FOLDER = 'uploads'
 GEMINI_PROMPTS = {
     "api": """
+    You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
+    CONTINUOUS block of modern {language}. Include natural speech patterns and
+    performance directions (e.g., [pause], [laugh]) where appropriate.
     """,
     "gtts": """
     You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
     combined_audio.seek(0)
     return combined_audio
+def wait_for_file_processing(file):
+    """Wait for file to be processed with timeout handling"""
+    start_time = time.time()
+    while file.state.name == "PROCESSING":
+        if time.time() - start_time > MAX_WAIT_TIME:
+            raise TimeoutError("File processing timed out")
+        time.sleep(5)
+        file = genai.get_file(file.name)
+    return file
 def generate_transcription(video_path, prompt):
+    """Generate transcript using Gemini with enhanced file handling"""
     max_retries = 3
     for attempt in range(max_retries):
         try:
+            # Upload file with explicit timeout
             video_file = genai.upload_file(video_path, mime_type="video/mp4")
+            # Wait for processing with timeout
+            video_file = wait_for_file_processing(video_file)
+            if video_file.state.name != "ACTIVE":
+                raise Exception(f"File processing failed: {video_file.state.name}")
             model = genai.GenerativeModel("models/gemini-2.5-flash")
             response = model.generate_content([prompt, video_file])
             if hasattr(response, 'text'):
                 return response.text.strip()
             raise Exception("No valid transcription generated")
         except Exception as e:
+            logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
             if attempt == max_retries - 1:
                 raise
             time.sleep(5 * (attempt + 1))
+        finally:
+            # Always delete the file if it was created
+            if 'video_file' in locals() and hasattr(video_file, 'name'):
+                try:
+                    genai.delete_file(video_file.name)
+                except Exception as delete_error:
+                    logger.error(f"Failed to delete file: {str(delete_error)}")
 def dub_video(video_path, audio_buffer):
     """Dub video with new audio"""
         if audio:
             audio.close()
         if temp_audio_path and os.path.exists(temp_audio_path):
+            try:
+                os.unlink(temp_audio_path)
+            except Exception as e:
+                logger.error(f"Failed to delete temp audio: {str(e)}")
 def process_video_background(task_id, video_path, language, voice_type, tts_provider):
+    """Background video processing with enhanced error handling"""
     try:
         processing_status[task_id] = {
             'status': 'processing',
         processing_status[task_id]['status'] = 'error'
         processing_status[task_id]['message'] = str(e)
         logger.error(f"Processing failed: {str(e)}")
     finally:
+        # Cleanup original video
         if os.path.exists(video_path):
+            try:
+                os.unlink(video_path)
+            except Exception as e:
+                logger.error(f"Failed to delete video: {str(e)}")
 @app.route('/')
 def index():