Spaces:

Athspi-ai
/

Translate

Running

App Files Files Community

Athspi commited on about 9 hours ago

Commit

2094e3e

verified ·

1 Parent(s): be0d1aa

Update app.py

Browse files

Files changed (1) hide show

app.py +199 -336

app.py CHANGED Viewed

@@ -4,393 +4,256 @@ import tempfile
 import uuid
 import google.generativeai as genai
 import requests
-import re
-from flask import Flask, request, render_template, send_from_directory, jsonify
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
-import threading
-import logging
-from gtts import gTTS
-import io
-from pathlib import Path
-# Initialize Flask app
 load_dotenv()
 app = Flask(__name__)
-# Configuration
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-TTS_API_URL = os.getenv("TTS_API_URL", "")  # Optional
-MAX_CONTENT_LENGTH = 500 * 1024 * 1024  # 500MB
-MAX_TTS_RETRIES = 3
-TTS_CHUNK_SIZE = 2000  # Characters per chunk
-MAX_WAIT_TIME = 300  # 5 minutes max wait for file processing
-# File storage setup
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
-Path(UPLOAD_FOLDER).mkdir(exist_ok=True)
-Path(DOWNLOAD_FOLDER).mkdir(exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
-app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
-app.secret_key = os.urandom(24)
-# Processing status tracking
-processing_status = {}
-# Language and voice options
-LANGUAGE_MAPPING = {
-    "Arabic (Egyptian)": "ar-EG",
-    "English (US)": "en-US",
-    "Hindi (India)": "hi-IN",
-    "Tamil (India)": "ta-IN",
-    "Telugu (India)": "te-IN"
 }
-VOICE_TYPES = {
-    "Male": "male",
-    "Female": "female"
-}
-GEMINI_PROMPTS = {
-    "api": """
-    You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
-    CONTINUOUS block of modern {language}. Include natural speech patterns and
-    performance directions (e.g., [pause], [laugh]) where appropriate.
-    """,
-    "gtts": """
-    You are an expert AI scriptwriter. Transcribe ALL spoken dialogue into a SINGLE,
-    CONTINUOUS block of modern {language}. Return ONLY the clean transcribed text.
-    """
-}
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-def split_text_into_chunks(text, chunk_size=TTS_CHUNK_SIZE):
-    """Split text into chunks respecting sentence boundaries"""
-    sentences = re.split(r'(?<=[.!?])\s+', text)
     chunks = []
-    current_chunk = ""
-    for sentence in sentences:
-        if len(current_chunk) + len(sentence) < chunk_size:
-            current_chunk += sentence + " "
         else:
-            chunks.append(current_chunk.strip())
-            current_chunk = sentence + " "
     if current_chunk:
-        chunks.append(current_chunk.strip())
     return chunks
-def generate_tts_audio(text, language_code, voice_type, tts_provider):
-    """Generate TTS audio using selected provider with retry logic"""
-    chunks = split_text_into_chunks(text)
-    audio_segments = []
-    for chunk in chunks:
-        for attempt in range(MAX_TTS_RETRIES):
-            try:
-                if tts_provider == "api":
-                    # Use custom TTS API
-                    payload = {
-                        "text": chunk,
-                        "language": language_code,
-                        "voice_type": voice_type
-                    }
-                    response = requests.post(TTS_API_URL, json=payload, timeout=300)
-                    if response.status_code == 200:
-                        audio_segments.append(io.BytesIO(response.content))
-                        break
-                    elif response.status_code == 429:  # Rate limit
-                        retry_after = int(response.headers.get('Retry-After', 5))
-                        logger.warning(f"TTS API rate limited. Retrying after {retry_after}s")
-                        time.sleep(retry_after)
-                        continue
-                    else:
-                        raise Exception(f"TTS API error: {response.status_code}")
-                else:
-                    # Use gTTS
-                    tts = gTTS(
-                        text=chunk,
-                        lang=language_code.split('-')[0],
-                        slow=False
-                    )
-                    buffer = io.BytesIO()
-                    tts.write_to_fp(buffer)
-                    buffer.seek(0)
-                    audio_segments.append(buffer)
-                    break
-            except Exception as e:
-                logger.warning(f"TTS attempt {attempt + 1} failed: {str(e)}")
-                if attempt == MAX_TTS_RETRIES - 1:
-                    raise Exception(f"Failed to generate TTS after {MAX_TTS_RETRIES} attempts")
-                time.sleep(2 ** attempt)  # Exponential backoff
-    # Combine audio segments
-    combined_audio = io.BytesIO()
-    for segment in audio_segments:
-        combined_audio.write(segment.getvalue())
-    combined_audio.seek(0)
-    return combined_audio
-def wait_for_file_processing(file):
-    """Wait for file to be processed with timeout handling"""
-    start_time = time.time()
-    while file.state.name == "PROCESSING":
-        if time.time() - start_time > MAX_WAIT_TIME:
-            raise TimeoutError("File processing timed out")
-        time.sleep(5)
-        file = genai.get_file(file.name)
-    return file
-def generate_transcription(video_path, prompt):
-    """Generate transcript using Gemini with enhanced file handling"""
-    max_retries = 3
     for attempt in range(max_retries):
         try:
-            # Upload file with explicit timeout
-            video_file = genai.upload_file(video_path, mime_type="video/mp4")
-            # Wait for processing with timeout
-            video_file = wait_for_file_processing(video_file)
-            if video_file.state.name != "ACTIVE":
-                raise Exception(f"File processing failed: {video_file.state.name}")
-            model = genai.GenerativeModel("models/gemini-2.5-flash")
-            response = model.generate_content([prompt, video_file])
-            if hasattr(response, 'text'):
-                return response.text.strip()
-            raise Exception("No valid transcription generated")
-        except Exception as e:
-            logger.warning(f"Transcription attempt {attempt + 1} failed: {str(e)}")
-            if attempt == max_retries - 1:
-                raise
-            time.sleep(5 * (attempt + 1))
-        finally:
-            # Always delete the file if it was created
-            if 'video_file' in locals() and hasattr(video_file, 'name'):
-                try:
-                    genai.delete_file(video_file.name)
-                except Exception as delete_error:
-                    logger.error(f"Failed to delete file: {str(delete_error)}")
-def dub_video(video_path, audio_buffer):
-    """Dub video with new audio"""
-    video = None
-    audio = None
-    temp_audio_path = None
     try:
-        # Save audio buffer to temp file
-        temp_audio_path = f"temp_audio_{uuid.uuid4().hex}.mp3"
-        with open(temp_audio_path, 'wb') as f:
-            f.write(audio_buffer.read())
-        # Process video
-        video = VideoFileClip(video_path)
-        audio = AudioFileClip(temp_audio_path)
-        # Ensure audio length matches video
-        if audio.duration > video.duration:
-            audio = audio.subclip(0, video.duration)
-        video = video.set_audio(audio)
-        # Save output
-        output_filename = f"dubbed_{uuid.uuid4().hex}.mp4"
-        output_path = os.path.join(app.config['DOWNLOAD_FOLDER'], output_filename)
-        video.write_videofile(
             output_path,
             codec="libx264",
             audio_codec="aac",
-            threads=4,
-            verbose=False,
-            preset='medium',
-            ffmpeg_params=['-crf', '23', '-movflags', '+faststart']
         )
-        return output_path
-    finally:
-        # Cleanup resources
-        if video:
-            video.close()
-        if audio:
-            audio.close()
-        if temp_audio_path and os.path.exists(temp_audio_path):
-            try:
-                os.unlink(temp_audio_path)
-            except Exception as e:
-                logger.error(f"Failed to delete temp audio: {str(e)}")
-def process_video_background(task_id, video_path, language, voice_type, tts_provider):
-    """Background video processing with enhanced error handling"""
-    try:
-        processing_status[task_id] = {
-            'status': 'processing',
-            'progress': 0,
-            'message': 'Starting transcription',
-            'start_time': time.time()
-        }
-        # Stage 1: Transcription
-        processing_status[task_id]['message'] = 'Transcribing video content'
-        prompt = GEMINI_PROMPTS[tts_provider].format(language=language)
-        script = generate_transcription(video_path, prompt)
-        processing_status[task_id]['progress'] = 33
-        processing_status[task_id]['script'] = script
-        # Stage 2: Audio Generation
-        processing_status[task_id]['message'] = 'Generating audio narration'
-        language_code = LANGUAGE_MAPPING.get(language, "en-US")
-        audio_buffer = generate_tts_audio(script, language_code, voice_type, tts_provider)
-        processing_status[task_id]['progress'] = 66
-        # Stage 3: Video Dubbing
-        processing_status[task_id]['message'] = 'Creating dubbed video'
-        output_path = dub_video(video_path, audio_buffer)
-        processing_status[task_id]['progress'] = 100
-        processing_status[task_id]['status'] = 'complete'
-        processing_status[task_id]['result_path'] = output_path
-    except Exception as e:
-        processing_status[task_id]['status'] = 'error'
-        processing_status[task_id]['message'] = str(e)
-        logger.error(f"Processing failed: {str(e)}")
     finally:
-        # Cleanup original video
-        if os.path.exists(video_path):
-            try:
-                os.unlink(video_path)
-            except Exception as e:
-                logger.error(f"Failed to delete video: {str(e)}")
-@app.route('/')
 def index():
-    """Render main page"""
-    return render_template(
-        'index.html',
-        languages=list(LANGUAGE_MAPPING.keys()),
-        voice_types=list(VOICE_TYPES.keys()),
-        default_language="English (US)",
-        tts_api_available=bool(TTS_API_URL)
-    )
-@app.route('/upload', methods=['POST'])
-def upload_video():
-    """Handle video upload"""
-    if 'video' not in request.files:
-        return jsonify({'error': 'No file uploaded'}), 400
-    file = request.files['video']
-    if file.filename == '':
-        return jsonify({'error': 'No file selected'}), 400
-    # Validate file extension
-    allowed_extensions = {'mp4', 'mov', 'webm', 'avi'}
-    if '.' not in file.filename or file.filename.rsplit('.', 1)[1].lower() not in allowed_extensions:
-        return jsonify({'error': 'Invalid file type'}), 400
-    # Save file with unique name
-    task_id = uuid.uuid4().hex
-    filename = secure_filename(f"{task_id}_{file.filename}")
-    video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-    try:
-        file.save(video_path)
-    except Exception as e:
-        return jsonify({'error': f'Failed to save file: {str(e)}'}), 500
-    # Get processing options
-    language = request.form.get('language', 'English (US)')
-    voice_type = request.form.get('voice_type', 'Male')
-    tts_provider = request.form.get('tts_provider', 'gtts')
-    # Validate TTS provider selection
-    if tts_provider == "api" and not TTS_API_URL:
-        return jsonify({'error': 'TTS API is not configured'}), 400
-    # Start background processing
-    processing_status[task_id] = {
-        'status': 'uploaded',
-        'progress': 0,
-        'message': 'Starting processing',
-        'start_time': time.time()
-    }
-    thread = threading.Thread(
-        target=process_video_background,
-        args=(task_id, video_path, language, voice_type, tts_provider)
-    )
-    thread.start()
-    return jsonify({'task_id': task_id})
-@app.route('/status/<task_id>')
-def get_status(task_id):
-    """Check processing status"""
-    if task_id not in processing_status:
-        return jsonify({'error': 'Invalid task ID'}), 404
-    status = processing_status[task_id]
-    response = {
-        'status': status['status'],
-        'progress': status.get('progress', 0),
-        'message': status.get('message', ''),
-    }
-    if status['status'] == 'complete':
-        response['result_url'] = url_for(
-            'download',
-            filename=os.path.basename(status['result_path'])
-        )
-        response['script'] = status.get('script', '')
-    elif status['status'] == 'error':
-        response['error_details'] = status.get('message', 'Unknown error')
-    return jsonify(response)
-@app.route('/download/<filename>')
-def download(filename):
-    """Serve processed video with security checks"""
     try:
-        # Security check
-        if not filename.startswith('dubbed_') or not filename.endswith('.mp4'):
-            return "Invalid file", 400
-        # Validate path
-        download_path = Path(app.config['DOWNLOAD_FOLDER']) / filename
-        if not download_path.exists():
-            return "File not found", 404
-        return send_from_directory(
-            app.config['DOWNLOAD_FOLDER'],
-            filename,
-            as_attachment=True,
-            mimetype='video/mp4'
         )
     except Exception as e:
-        logger.error(f"Download failed: {str(e)}")
-        return "Download error", 500
 if __name__ == '__main__':
-    if not GEMINI_API_KEY:
-        raise ValueError("GEMINI_API_KEY is required in .env file")
-    app.run(host="0.0.0.0", port=7860, threaded=True)

 import uuid
 import google.generativeai as genai
 import requests
+from flask import Flask, request, render_template, send_from_directory, url_for, flash
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
+from moviepy.audio.AudioClip import concatenate_audioclips
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
+# --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
 load_dotenv()
 app = Flask(__name__)
+# Load secrets from environment variables
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+TTS_API_URL = os.getenv("TTS_API_URL")
+TTS_MAX_TOKENS = 30000  # Conservative limit below 32k token threshold
+# Validate required configurations
+if not GEMINI_API_KEY:
+    raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
+if not TTS_API_URL:
+    raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
+# Configure Gemini AI
+genai.configure(api_key=GEMINI_API_KEY)
+# Configure directories
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100 MB upload limit
+app.secret_key = os.urandom(24)  # Secure key for flash messages
+# --- 2. APPLICATION CONFIGURATION ---
+VOICE_CHOICES = {
+    "Male (Charon)": "Charon",
+    "Female (Zephyr)": "Zephyr"
 }
+GEMINI_PROMPT = """
+You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
+**CRITICAL INSTRUCTIONS:**
+1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
+2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
+3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
+**EXAMPLE OUTPUT:**
+Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
+"""
+# --- 3. CORE APPLICATION FUNCTIONS ---
+def generate_tamil_script(video_file_path):
+    """Generates a Tamil script from the video using Gemini AI."""
+    print("Uploading video to Gemini for transcription...")
+    video_file = genai.upload_file(video_file_path, mime_type="video/mp4")
+    # Wait for file processing
+    while video_file.state.name == "PROCESSING":
+        time.sleep(5)
+        video_file = genai.get_file(video_file.name)
+    if video_file.state.name != "ACTIVE":
+        raise Exception(f"Gemini file processing failed: {video_file.state.name}")
+    print("Generating script...")
+    model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
+    response = model.generate_content([GEMINI_PROMPT, video_file])
+    genai.delete_file(video_file.name)
+    if hasattr(response, 'text') and response.text:
+        return " ".join(response.text.strip().splitlines())
+    raise Exception("No valid script was generated by Gemini.")
+def split_text_for_tts(text, max_tokens=TTS_MAX_TOKENS):
+    """Splits text into chunks that fit within TTS token limits."""
+    words = text.split()
     chunks = []
+    current_chunk = []
+    current_length = 0
+    for word in words:
+        word_length = len(word) + 1  # +1 for space
+        if current_length + word_length > max_tokens:
+            chunks.append(" ".join(current_chunk))
+            current_chunk = [word]
+            current_length = word_length
         else:
+            current_chunk.append(word)
+            current_length += word_length
     if current_chunk:
+        chunks.append(" ".join(current_chunk))
     return chunks
+def generate_audio_with_retry(text_chunk, voice_name, is_cheerful, max_retries=3, retry_delay=2):
+    """Generates audio with retry logic for API failures."""
     for attempt in range(max_retries):
         try:
+            payload = {
+                "text": text_chunk,
+                "voice_name": voice_name,
+                "cheerful": is_cheerful
+            }
+            response = requests.post(TTS_API_URL, json=payload, timeout=300)
+            response.raise_for_status()
+            if response.status_code == 200:
+                return response.content
+        except requests.exceptions.RequestException as e:
+            print(f"TTS API attempt {attempt + 1} failed: {str(e)}")
+            if attempt < max_retries - 1:
+                time.sleep(retry_delay * (attempt + 1))  # Exponential backoff
+            else:
+                raise Exception(f"TTS API failed after {max_retries} attempts: {str(e)}")
+def generate_long_audio(script_text, voice_name, is_cheerful, output_path):
+    """Handles long audio generation by splitting text and combining results."""
+    print("Processing long audio generation...")
+    text_chunks = split_text_for_tts(script_text)
+    audio_clips = []
+    temp_files = []
     try:
+        for i, chunk in enumerate(text_chunks):
+            print(f"Processing chunk {i+1}/{len(text_chunks)}")
+            chunk_audio = generate_audio_with_retry(chunk, voice_name, is_cheerful)
+            # Save chunk to temporary file
+            temp_file = f"temp_chunk_{i}.wav"
+            with open(temp_file, "wb") as f:
+                f.write(chunk_audio)
+            temp_files.append(temp_file)
+            # Load audio clip
+            audio_clip = AudioFileClip(temp_file)
+            audio_clips.append(audio_clip)
+        # Combine all audio clips
+        print("Combining audio chunks...")
+        final_audio = concatenate_audioclips(audio_clips)
+        final_audio.write_audiofile(output_path)
+    finally:
+        # Clean up temporary files
+        for temp_file in temp_files:
+            if os.path.exists(temp_file):
+                os.remove(temp_file)
+        # Close audio clips
+        for clip in audio_clips:
+            clip.close()
+def replace_video_audio(video_path, new_audio_path, output_path):
+    """Replaces the audio track of a video file."""
+    print("Replacing video audio...")
+    video_clip = None
+    audio_clip = None
+    try:
+        video_clip = VideoFileClip(video_path)
+        audio_clip = AudioFileClip(new_audio_path)
+        video_clip.audio = audio_clip
+        video_clip.write_videofile(
             output_path,
             codec="libx264",
             audio_codec="aac",
+            logger='bar'
         )
     finally:
+        if audio_clip:
+            audio_clip.close()
+        if video_clip:
+            video_clip.close()
+# --- 4. FLASK ROUTES ---
+@app.route('/', methods=['GET'])
 def index():
+    """Render the main upload page."""
+    return render_template('index.html', voices=VOICE_CHOICES)
+@app.route('/process', methods=['POST'])
+def process_video():
+    """Handle video upload and processing."""
+    input_video_path = None
+    temp_audio_path = None
     try:
+        # Validate file upload
+        if 'video' not in request.files or request.files['video'].filename == '':
+            flash("Please upload a video file.", "error")
+            return render_template('index.html', voices=VOICE_CHOICES)
+        # Save uploaded file
+        file = request.files['video']
+        filename = secure_filename(file.filename)
+        input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(input_video_path)
+        # Get processing options
+        voice_choice = request.form.get('voice', 'Charon')
+        is_cheerful = request.form.get('tone') == 'on'
+        # Generate script
+        script = generate_tamil_script(input_video_path)
+        # Create temporary audio file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            temp_audio_path = temp_audio.name
+        # Generate audio with retry and chunking
+        generate_long_audio(script, voice_choice, is_cheerful, temp_audio_path)
+        # Create dubbed video
+        final_video_name = f"dubbed_{filename}"
+        final_video_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_video_name)
+        replace_video_audio(input_video_path, temp_audio_path, final_video_path)
+        flash("Video processing complete!", "success")
+        return render_template(
+            'index.html',
+            voices=VOICE_CHOICES,
+            result_video=url_for('serve_video', filename=final_video_name),
+            script=script
         )
     except Exception as e:
+        print(f"Processing error: {str(e)}")
+        flash(f"An error occurred: {str(e)}", "error")
+        return render_template('index.html', voices=VOICE_CHOICES)
+    finally:
+        # Clean up temporary files
+        if input_video_path and os.path.exists(input_video_path):
+            os.remove(input_video_path)
+        if temp_audio_path and os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+@app.route('/downloads/<filename>')
+def serve_video(filename):
+    """Serve the processed video file."""
+    return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
+# --- 5. APPLICATION ENTRY POINT ---
 if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=7860)