Spaces:

Athspi-ai
/

Translate

Running

App Files Files Community

Athspi commited on about 6 hours ago

Commit

8bc1a84

verified ·

1 Parent(s): 25df726

Update app.py

Browse files

Files changed (1) hide show

app.py +331 -181

app.py CHANGED Viewed

@@ -4,256 +4,406 @@ import tempfile
 import uuid
 import google.generativeai as genai
 import requests
-from flask import Flask, request, render_template, send_from_directory, url_for, flash
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
-from moviepy.audio.AudioClip import concatenate_audioclips
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
-# --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
 load_dotenv()
 app = Flask(__name__)
-# Load secrets from environment variables
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 TTS_API_URL = os.getenv("TTS_API_URL")
-TTS_MAX_TOKENS = 30000  # Conservative limit below 32k token threshold
-# Validate required configurations
-if not GEMINI_API_KEY:
-    raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
-if not TTS_API_URL:
-    raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
-# Configure Gemini AI
 genai.configure(api_key=GEMINI_API_KEY)
-# Configure directories
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
-app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100 MB upload limit
-app.secret_key = os.urandom(24)  # Secure key for flash messages
-# --- 2. APPLICATION CONFIGURATION ---
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
 }
 GEMINI_PROMPT = """
-You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 **CRITICAL INSTRUCTIONS:**
-1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
-2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
-3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
 **EXAMPLE OUTPUT:**
 Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
 """
-# --- 3. CORE APPLICATION FUNCTIONS ---
-def generate_tamil_script(video_file_path):
-    """Generates a Tamil script from the video using Gemini AI."""
-    print("Uploading video to Gemini for transcription...")
-    video_file = genai.upload_file(video_file_path, mime_type="video/mp4")
-    # Wait for file processing
-    while video_file.state.name == "PROCESSING":
-        time.sleep(5)
-        video_file = genai.get_file(video_file.name)
-    if video_file.state.name != "ACTIVE":
-        raise Exception(f"Gemini file processing failed: {video_file.state.name}")
-    print("Generating script...")
-    model = genai.GenerativeModel(model_name="models/gemini-2.5-flash")
-    response = model.generate_content([GEMINI_PROMPT, video_file])
-    genai.delete_file(video_file.name)
-    if hasattr(response, 'text') and response.text:
-        return " ".join(response.text.strip().splitlines())
-    raise Exception("No valid script was generated by Gemini.")
-def split_text_for_tts(text, max_tokens=TTS_MAX_TOKENS):
-    """Splits text into chunks that fit within TTS token limits."""
-    words = text.split()
-    chunks = []
-    current_chunk = []
-    current_length = 0
-    for word in words:
-        word_length = len(word) + 1  # +1 for space
-        if current_length + word_length > max_tokens:
-            chunks.append(" ".join(current_chunk))
-            current_chunk = [word]
-            current_length = word_length
-        else:
-            current_chunk.append(word)
-            current_length += word_length
-    if current_chunk:
-        chunks.append(" ".join(current_chunk))
-    return chunks
-def generate_audio_with_retry(text_chunk, voice_name, is_cheerful, max_retries=3, retry_delay=2):
-    """Generates audio with retry logic for API failures."""
     for attempt in range(max_retries):
         try:
-            payload = {
-                "text": text_chunk,
-                "voice_name": voice_name,
-                "cheerful": is_cheerful
-            }
-            response = requests.post(TTS_API_URL, json=payload, timeout=300)
-            response.raise_for_status()
-            if response.status_code == 200:
-                return response.content
-        except requests.exceptions.RequestException as e:
-            print(f"TTS API attempt {attempt + 1} failed: {str(e)}")
             if attempt < max_retries - 1:
-                time.sleep(retry_delay * (attempt + 1))  # Exponential backoff
             else:
-                raise Exception(f"TTS API failed after {max_retries} attempts: {str(e)}")
-def generate_long_audio(script_text, voice_name, is_cheerful, output_path):
-    """Handles long audio generation by splitting text and combining results."""
-    print("Processing long audio generation...")
-    text_chunks = split_text_for_tts(script_text)
-    audio_clips = []
-    temp_files = []
-    try:
-        for i, chunk in enumerate(text_chunks):
-            print(f"Processing chunk {i+1}/{len(text_chunks)}")
-            chunk_audio = generate_audio_with_retry(chunk, voice_name, is_cheerful)
-            # Save chunk to temporary file
-            temp_file = f"temp_chunk_{i}.wav"
-            with open(temp_file, "wb") as f:
-                f.write(chunk_audio)
-            temp_files.append(temp_file)
-            # Load audio clip
-            audio_clip = AudioFileClip(temp_file)
-            audio_clips.append(audio_clip)
-        # Combine all audio clips
-        print("Combining audio chunks...")
-        final_audio = concatenate_audioclips(audio_clips)
-        final_audio.write_audiofile(output_path)
-    finally:
-        # Clean up temporary files
-        for temp_file in temp_files:
-            if os.path.exists(temp_file):
-                os.remove(temp_file)
-        # Close audio clips
-        for clip in audio_clips:
-            clip.close()
-def replace_video_audio(video_path, new_audio_path, output_path):
-    """Replaces the audio track of a video file."""
-    print("Replacing video audio...")
-    video_clip = None
-    audio_clip = None
     try:
-        video_clip = VideoFileClip(video_path)
-        audio_clip = AudioFileClip(new_audio_path)
-        video_clip.audio = audio_clip
-        video_clip.write_videofile(
             output_path,
             codec="libx264",
             audio_codec="aac",
-            logger='bar'
         )
     finally:
-        if audio_clip:
-            audio_clip.close()
-        if video_clip:
-            video_clip.close()
-# --- 4. FLASK ROUTES ---
-@app.route('/', methods=['GET'])
 def index():
-    """Render the main upload page."""
     return render_template('index.html', voices=VOICE_CHOICES)
-@app.route('/process', methods=['POST'])
-def process_video():
-    """Handle video upload and processing."""
-    input_video_path = None
-    temp_audio_path = None
     try:
-        # Validate file upload
-        if 'video' not in request.files or request.files['video'].filename == '':
-            flash("Please upload a video file.", "error")
-            return render_template('index.html', voices=VOICE_CHOICES)
-        # Save uploaded file
-        file = request.files['video']
-        filename = secure_filename(file.filename)
-        input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-        file.save(input_video_path)
-        # Get processing options
-        voice_choice = request.form.get('voice', 'Charon')
-        is_cheerful = request.form.get('tone') == 'on'
-        # Generate script
-        script = generate_tamil_script(input_video_path)
-        # Create temporary audio file
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
-            temp_audio_path = temp_audio.name
-        # Generate audio with retry and chunking
-        generate_long_audio(script, voice_choice, is_cheerful, temp_audio_path)
-        # Create dubbed video
-        final_video_name = f"dubbed_{filename}"
-        final_video_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_video_name)
-        replace_video_audio(input_video_path, temp_audio_path, final_video_path)
-        flash("Video processing complete!", "success")
-        return render_template(
-            'index.html',
-            voices=VOICE_CHOICES,
-            result_video=url_for('serve_video', filename=final_video_name),
-            script=script
-        )
     except Exception as e:
-        print(f"Processing error: {str(e)}")
-        flash(f"An error occurred: {str(e)}", "error")
-        return render_template('index.html', voices=VOICE_CHOICES)
-    finally:
-        # Clean up temporary files
-        if input_video_path and os.path.exists(input_video_path):
-            os.remove(input_video_path)
-        if temp_audio_path and os.path.exists(temp_audio_path):
-            os.remove(temp_audio_path)
-@app.route('/downloads/<filename>')
-def serve_video(filename):
-    """Serve the processed video file."""
-    return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
-# --- 5. APPLICATION ENTRY POINT ---
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860)

 import uuid
 import google.generativeai as genai
 import requests
+from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
+import threading
+from datetime import datetime, timedelta
+import logging
+# Initialize Flask app and load secrets
 load_dotenv()
 app = Flask(__name__)
+# Configuration
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 TTS_API_URL = os.getenv("TTS_API_URL")
+if not GEMINI_API_KEY or not TTS_API_URL:
+    raise ValueError("Missing required environment variables")
 genai.configure(api_key=GEMINI_API_KEY)
+# File storage setup
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024  # 500MB
+app.secret_key = os.urandom(24)
+# Processing status tracking
+processing_status = {}
+processing_times = {
+    'upload': 0,
+    'transcription': 0,
+    'tts': 0,
+    'dubbing': 0
+}
+# Voice options
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
 }
 GEMINI_PROMPT = """
+You are an AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 **CRITICAL INSTRUCTIONS:**
+1.  **Single Script:** Combine all dialogue into one continuous script.
+2.  **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
+3.  **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
 **EXAMPLE OUTPUT:**
 Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
 """
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+def track_processing_time(task_id, stage, duration):
+    """Track processing times for each stage"""
+    processing_times[stage] = duration
+    if task_id in processing_status:
+        processing_status[task_id]['timings'][stage] = duration
+def estimate_remaining_time(task_id):
+    """Estimate remaining processing time"""
+    if task_id not in processing_status:
+        return "Calculating..."
+    status = processing_status[task_id]
+    completed_stages = [s for s in status['timings'] if status['timings'][s] is not None]
+    if len(completed_stages) == 0:
+        return "Starting soon..."
+    # Weighted average based on stage complexity
+    weights = {
+        'transcription': 2.0,
+        'tts': 1.5,
+        'dubbing': 1.0
+    }
+    total_weighted_time = 0
+    total_weights = 0
+    for stage in completed_stages:
+        weight = weights.get(stage, 1.0)
+        total_weighted_time += status['timings'][stage] * weight
+        total_weights += weight
+    if total_weights == 0:
+        return "Estimating..."
+    avg_time = total_weighted_time / total_weights
+    remaining_stages = 4 - len(completed_stages)
+    return remaining_stages * avg_time
+def process_video_background(task_id, video_path, voice, cheerful):
+    """Background processing function with enhanced logging"""
+    try:
+        start_time = time.time()
+        processing_status[task_id] = {
+            'status': 'processing',
+            'progress': 0,
+            'message': 'Starting transcription',
+            'timings': {'upload': None, 'transcription': None, 'tts': None, 'dubbing': None},
+            'start_time': start_time,
+            'video_duration': get_video_duration(video_path)
+        }
+        # Stage 1: Transcription
+        processing_status[task_id]['message'] = 'Transcribing video content'
+        logger.info(f"Task {task_id}: Starting transcription")
+        script_start = time.time()
+        script = generate_tamil_script(video_path)
+        transcription_time = time.time() - script_start
+        track_processing_time(task_id, 'transcription', transcription_time)
+        processing_status[task_id]['progress'] = 25
+        processing_status[task_id]['script'] = script
+        logger.info(f"Task {task_id}: Transcription completed in {transcription_time:.1f}s")
+        # Stage 2: TTS Generation
+        processing_status[task_id]['message'] = 'Generating audio narration'
+        logger.info(f"Task {task_id}: Starting TTS generation")
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            audio_path = temp_audio.name
+        tts_start = time.time()
+        generate_audio_track(script, voice, cheerful, audio_path)
+        tts_time = time.time() - tts_start
+        track_processing_time(task_id, 'tts', tts_time)
+        processing_status[task_id]['progress'] = 50
+        logger.info(f"Task {task_id}: TTS completed in {tts_time:.1f}s")
+        # Stage 3: Dubbing
+        processing_status[task_id]['message'] = 'Creating dubbed video'
+        logger.info(f"Task {task_id}: Starting dubbing")
+        final_filename = f"dubbed_{task_id}.mp4"
+        final_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_filename)
+        dubbing_start = time.time()
+        replace_video_audio(video_path, audio_path, final_path)
+        dubbing_time = time.time() - dubbing_start
+        track_processing_time(task_id, 'dubbing', dubbing_time)
+        processing_status[task_id]['progress'] = 75
+        logger.info(f"Task {task_id}: Dubbing completed in {dubbing_time:.1f}s")
+        # Cleanup
+        os.unlink(audio_path)
+        # Finalize
+        processing_status[task_id].update({
+            'status': 'complete',
+            'progress': 100,
+            'message': 'Processing complete',
+            'result_path': final_path,
+            'end_time': time.time()
+        })
+        logger.info(f"Task {task_id}: Processing completed successfully")
+    except Exception as e:
+        logger.error(f"Task {task_id} failed: {str(e)}")
+        processing_status[task_id].update({
+            'status': 'error',
+            'message': f'Error: {str(e)}'
+        })
+        # Cleanup temporary files
+        if 'video_path' in locals() and os.path.exists(video_path):
+            os.unlink(video_path)
+        if 'audio_path' in locals() and os.path.exists(audio_path):
+            os.unlink(audio_path)
+def get_video_duration(video_path):
+    """Get duration of video in seconds"""
+    try:
+        with VideoFileClip(video_path) as video:
+            return video.duration
+    except:
+        return 0
+def generate_tamil_script(video_path):
+    """Generate Tamil script using Gemini with retry logic"""
+    max_retries = 3
+    retry_delay = 10  # seconds
     for attempt in range(max_retries):
         try:
+            video_file = genai.upload_file(video_path, mime_type="video/mp4")
+            # Wait for file processing with timeout
+            start_wait = time.time()
+            while video_file.state.name == "PROCESSING":
+                if time.time() - start_wait > 300:  # 5 minutes timeout
+                    raise TimeoutError("Gemini processing timed out")
+                time.sleep(5)
+                video_file = genai.get_file(video_file.name)
+            if video_file.state.name != "ACTIVE":
+                raise Exception(f"Gemini processing failed: {video_file.state.name}")
+            model = genai.GenerativeModel(model_name="models/gemini-2.5-flash")
+            response = model.generate_content([GEMINI_PROMPT, video_file])
+            genai.delete_file(video_file.name)
+            if hasattr(response, 'text') and response.text:
+                return " ".join(response.text.strip().splitlines())
+            raise Exception("No valid script generated")
+        except Exception as e:
             if attempt < max_retries - 1:
+                logger.warning(f"Gemini error (attempt {attempt+1}/{max_retries}): {str(e)}")
+                time.sleep(retry_delay * (attempt + 1))
             else:
+                raise
+def generate_audio_track(text, voice, cheerful, output_path):
+    """Generate audio using TTS API with retry logic"""
+    max_retries = 3
+    retry_delay = 5  # seconds
+    for attempt in range(max_retries):
+        try:
+            payload = {
+                "text": text,
+                "voice_name": voice,
+                "cheerful": cheerful
+            }
+            response = requests.post(TTS_API_URL, json=payload, timeout=300)
+            if response.status_code != 200:
+                raise Exception(f"TTS API error: {response.status_code} - {response.text}")
+            with open(output_path, "wb") as f:
+                f.write(response.content)
+            return
+        except Exception as e:
+            if attempt < max_retries - 1:
+                logger.warning(f"TTS error (attempt {attempt+1}/{max_retries}): {str(e)}")
+                time.sleep(retry_delay * (attempt + 1))
+            else:
+                raise
+def replace_video_audio(video_path, audio_path, output_path):
+    """Replace video audio track with enhanced error handling"""
+    video = None
+    audio = None
     try:
+        # Open video and audio files
+        video = VideoFileClip(video_path)
+        audio = AudioFileClip(audio_path)
+        # Set video audio
+        video.audio = audio
+        # Write output with optimized settings
+        video.write_videofile(
             output_path,
             codec="libx264",
             audio_codec="aac",
+            logger=None,
+            threads=4,
+            preset='medium',
+            ffmpeg_params=['-crf', '23', '-movflags', '+faststart']
         )
+    except Exception as e:
+        logger.error(f"Video processing error: {str(e)}")
+        # Cleanup partially created file
+        if os.path.exists(output_path):
+            os.unlink(output_path)
+        raise
     finally:
+        if video:
+            video.close()
+        if audio:
+            audio.close()
+@app.route('/')
 def index():
+    """Main page"""
     return render_template('index.html', voices=VOICE_CHOICES)
+@app.route('/upload', methods=['POST'])
+def upload_video():
+    """Handle video upload and start processing"""
+    if 'video' not in request.files:
+        return jsonify({'error': 'No file uploaded'}), 400
+    file = request.files['video']
+    if file.filename == '':
+        return jsonify({'error': 'No file selected'}), 400
+    # Generate unique task ID
+    task_id = str(uuid.uuid4())
+    filename = secure_filename(f"{task_id}_{file.filename}")
+    video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    file.save(video_path)
+    # Get processing options
+    voice = request.form.get('voice', 'Charon')
+    cheerful = request.form.get('cheerful', 'false') == 'true'
+    # Start background processing
+    processing_status[task_id] = {
+        'status': 'uploaded',
+        'progress': 0,
+        'message': 'Starting processing',
+        'timings': {'upload': time.time(), 'transcription': None, 'tts': None, 'dubbing': None},
+        'start_time': time.time(),
+        'video_duration': get_video_duration(video_path)
+    }
+    thread = threading.Thread(
+        target=process_video_background,
+        args=(task_id, video_path, voice, cheerful)
+    )
+    thread.start()
+    return jsonify({
+        'task_id': task_id,
+        'video_duration': processing_status[task_id]['video_duration']
+    })
+@app.route('/status/<task_id>')
+def get_status(task_id):
+    """Check processing status"""
+    if task_id not in processing_status:
+        return jsonify({'error': 'Invalid task ID'}), 404
+    status = processing_status[task_id]
+    # Calculate ETA if processing
+    eta = None
+    if status['status'] == 'processing':
+        elapsed = time.time() - status['start_time']
+        remaining = estimate_remaining_time(task_id)
+        if isinstance(remaining, (int, float)):
+            eta = str(timedelta(seconds=int(remaining)))
+    response = {
+        'status': status['status'],
+        'progress': status.get('progress', 0),
+        'message': status.get('message', ''),
+        'eta': eta
+    }
+    if status['status'] == 'complete':
+        response['result_url'] = url_for('download', filename=os.path.basename(status['result_path']))
+        response['script'] = status.get('script', '')
+    return jsonify(response)
+@app.route('/download/<filename>')
+def download(filename):
+    """Serve processed video"""
+    return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
+@app.route('/cleanup', methods=['POST'])
+def cleanup():
+    """Cleanup old files"""
     try:
+        # Cleanup uploads older than 1 hour
+        for filename in os.listdir(UPLOAD_FOLDER):
+            file_path = os.path.join(UPLOAD_FOLDER, filename)
+            if os.path.getmtime(file_path) < time.time() - 3600:
+                os.unlink(file_path)
+        # Cleanup downloads older than 24 hours
+        for filename in os.listdir(DOWNLOAD_FOLDER):
+            file_path = os.path.join(DOWNLOAD_FOLDER, filename)
+            if os.path.getmtime(file_path) < time.time() - 86400:
+                os.unlink(file_path)
+        return jsonify({'status': 'success', 'message': 'Cleanup completed'})
     except Exception as e:
+        return jsonify({'status': 'error', 'message': str(e)}), 500
 if __name__ == '__main__':
+    # Schedule cleanup thread
+    import schedule
+    import time as t
+    def cleanup_job():
+        with app.app_context():
+            app.test_client().post('/cleanup')
+    schedule.every().hour.do(cleanup_job)
+    # Start scheduler in background thread
+    def scheduler_thread():
+        while True:
+            schedule.run_pending()
+            t.sleep(1)
+    threading.Thread(target=scheduler_thread, daemon=True).start()
+    # Start Flask app
+    app.run(host="0.0.0.0", port=7860, threaded=True)