Spaces:

Athspi-ai
/

Translate

Running

App Files Files Community

Athspi commited on 4 days ago

Commit

c3c3d92

verified ·

1 Parent(s): 8dd1a38

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -205

app.py CHANGED Viewed

@@ -4,47 +4,40 @@ import tempfile
 import uuid
 import google.generativeai as genai
 import requests
-from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
-import threading
-from datetime import datetime, timedelta
-# Initialize Flask app and load secrets
 load_dotenv()
 app = Flask(__name__)
-# Configuration
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 TTS_API_URL = os.getenv("TTS_API_URL")
-if not GEMINI_API_KEY or not TTS_API_URL:
-    raise ValueError("Missing required environment variables")
 genai.configure(api_key=GEMINI_API_KEY)
-# File storage setup
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
-app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB
-app.secret_key = os.urandom(24)
-# Processing status tracking
-processing_status = {}
-processing_times = {
-    'upload': 0,
-    'transcription': 0,
-    'tts': 0,
-    'dubbing': 0
-}
-# Voice options
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
@@ -54,223 +47,144 @@ GEMINI_PROMPT = """
 You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 **CRITICAL INSTRUCTIONS:**
-1. Combine all dialogue into one continuous script.
-2. NO timestamps or speaker labels.
-3. Add performance directions (e.g., `Say happily:`, `[laugh]`) directly in the text.
-"""
-def track_processing_time(task_id, stage, duration):
-    """Track processing times for each stage"""
-    processing_times[stage] = duration
-    if task_id in processing_status:
-        processing_status[task_id]['timings'][stage] = duration
-def estimate_remaining_time(task_id):
-    """Estimate remaining processing time"""
-    if task_id not in processing_status:
-        return "Calculating..."
-    status = processing_status[task_id]
-    completed_stages = [s for s in status['timings'] if status['timings'][s] is not None]
-    if len(completed_stages) == 0:
-        return "Starting soon..."
-    avg_time = sum(status['timings'][s] for s in completed_stages) / len(completed_stages)
-    remaining_stages = 4 - len(completed_stages)  # Total stages: upload, transcription, tts, dubbing
-    return remaining_stages * avg_time
-def process_video_background(task_id, video_path, voice, cheerful):
-    """Background processing function"""
-    try:
-        start_time = time.time()
-        processing_status[task_id] = {
-            'status': 'processing',
-            'progress': 0,
-            'message': 'Starting transcription',
-            'timings': {'upload': None, 'transcription': None, 'tts': None, 'dubbing': None},
-            'start_time': start_time
-        }
-        # Stage 1: Transcription
-        processing_status[task_id]['message'] = 'Transcribing video content'
-        script_start = time.time()
-        script = generate_tamil_script(video_path)
-        transcription_time = time.time() - script_start
-        track_processing_time(task_id, 'transcription', transcription_time)
-        processing_status[task_id]['progress'] = 25
-        # Stage 2: TTS Generation
-        processing_status[task_id]['message'] = 'Generating audio narration'
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
-            audio_path = temp_audio.name
-        tts_start = time.time()
-        generate_audio_track(script, voice, cheerful, audio_path)
-        tts_time = time.time() - tts_start
-        track_processing_time(task_id, 'tts', tts_time)
-        processing_status[task_id]['progress'] = 50
-        # Stage 3: Dubbing
-        processing_status[task_id]['message'] = 'Creating dubbed video'
-        final_filename = f"dubbed_{task_id}.mp4"
-        final_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_filename)
-        dubbing_start = time.time()
-        replace_video_audio(video_path, audio_path, final_path)
-        dubbing_time = time.time() - dubbing_start
-        track_processing_time(task_id, 'dubbing', dubbing_time)
-        processing_status[task_id]['progress'] = 75
-        # Cleanup
-        os.unlink(audio_path)
-        os.unlink(video_path)
-        # Finalize
-        processing_status[task_id].update({
-            'status': 'complete',
-            'progress': 100,
-            'message': 'Processing complete',
-            'result_path': final_path,
-            'script': script,
-            'end_time': time.time()
-        })
-    except Exception as e:
-        processing_status[task_id].update({
-            'status': 'error',
-            'message': f'Error: {str(e)}'
-        })
-        raise
-def generate_tamil_script(video_path):
-    """Generate Tamil script using Gemini"""
-    video_file = genai.upload_file(video_path, mime_type="video/mp4")
     while video_file.state.name == "PROCESSING":
         time.sleep(5)
         video_file = genai.get_file(video_file.name)
     if video_file.state.name != "ACTIVE":
-        raise Exception(f"Gemini processing failed: {video_file.state.name}")
-    model = genai.GenerativeModel(model_name="models/gemini-2.5-flash")
     response = model.generate_content([GEMINI_PROMPT, video_file])
     genai.delete_file(video_file.name)
     if hasattr(response, 'text') and response.text:
         return " ".join(response.text.strip().splitlines())
-    raise Exception("No valid script generated")
-def generate_audio_track(text, voice, cheerful, output_path):
-    """Generate audio using TTS API"""
     payload = {
-        "text": text,
-        "voice_name": voice,
-        "cheerful": cheerful
     }
     response = requests.post(TTS_API_URL, json=payload, timeout=300)
-    if response.status_code != 200:
-        raise Exception(f"TTS API error: {response.status_code}")
-    with open(output_path, "wb") as f:
-        f.write(response.content)
-def replace_video_audio(video_path, audio_path, output_path):
-    """Replace video audio track"""
-    video = AudioFileClip = None
     try:
-        video = VideoFileClip(video_path)
-        audio = AudioFileClip(audio_path)
-        video.audio = audio
-        video.write_videofile(
             output_path,
             codec="libx264",
             audio_codec="aac",
-            logger=None,
-            threads=4
         )
     finally:
-        if video:
-            video.close()
-        if audio:
-            audio.close()
-@app.route('/')
 def index():
-    """Main page"""
     return render_template('index.html', voices=VOICE_CHOICES)
-@app.route('/upload', methods=['POST'])
-def upload_video():
-    """Handle video upload and start processing"""
-    if 'video' not in request.files:
-        return jsonify({'error': 'No file uploaded'}), 400
-    file = request.files['video']
-    if file.filename == '':
-        return jsonify({'error': 'No file selected'}), 400
-    # Generate unique task ID
-    task_id = str(uuid.uuid4())
-    filename = secure_filename(f"{task_id}_{file.filename}")
-    video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-    file.save(video_path)
-    # Get processing options
-    voice = request.form.get('voice', 'Charon')
-    cheerful = request.form.get('cheerful', 'false') == 'true'
-    # Start background processing
-    processing_status[task_id] = {
-        'status': 'uploaded',
-        'progress': 0,
-        'message': 'Starting processing',
-        'timings': {'upload': time.time(), 'transcription': None, 'tts': None, 'dubbing': None},
-        'start_time': time.time()
-    }
-    thread = threading.Thread(
-        target=process_video_background,
-        args=(task_id, video_path, voice, cheerful)
-    thread.start()
-    return jsonify({'task_id': task_id})
-@app.route('/status/<task_id>')
-def get_status(task_id):
-    """Check processing status"""
-    if task_id not in processing_status:
-        return jsonify({'error': 'Invalid task ID'}), 404
-    status = processing_status[task_id]
-    # Calculate ETA if processing
-    eta = None
-    if status['status'] == 'processing':
-        elapsed = time.time() - status['start_time']
-        remaining = estimate_remaining_time(task_id)
-        if isinstance(remaining, (int, float)):
-            eta = str(timedelta(seconds=int(remaining)))
-    response = {
-        'status': status['status'],
-        'progress': status.get('progress', 0),
-        'message': status.get('message', ''),
-        'eta': eta
-    }
-    if status['status'] == 'complete':
-        response['result_url'] = url_for('download', filename=os.path.basename(status['result_path']))
-        response['script'] = status.get('script', '')
-    return jsonify(response)
-@app.route('/download/<filename>')
-def download(filename):
-    """Serve processed video"""
     return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860, threaded=True)

 import uuid
 import google.generativeai as genai
 import requests
+from flask import Flask, request, render_template, send_from_directory, url_for, flash
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
+# --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
 load_dotenv()
 app = Flask(__name__)
+# Load secrets from environment variables
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 TTS_API_URL = os.getenv("TTS_API_URL")
+# Validate required configurations
+if not GEMINI_API_KEY:
+    raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
+if not TTS_API_URL:
+    raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
+# Configure Gemini AI
 genai.configure(api_key=GEMINI_API_KEY)
+# Configure directories
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100 MB upload limit
+app.secret_key = os.urandom(24)  # Secure key for flash messages
+# --- 2. APPLICATION CONFIGURATION ---
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
 You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 **CRITICAL INSTRUCTIONS:**
+1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
+2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
+3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
+**EXAMPLE OUTPUT:**
+Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
+"""
+# --- 3. CORE APPLICATION FUNCTIONS ---
+def generate_tamil_script(video_file_path):
+    """Generates a Tamil script from the video using Gemini AI."""
+    print("Uploading video to Gemini for transcription...")
+    video_file = genai.upload_file(video_file_path, mime_type="video/mp4")
+    # Wait for file processing
     while video_file.state.name == "PROCESSING":
         time.sleep(5)
         video_file = genai.get_file(video_file.name)
     if video_file.state.name != "ACTIVE":
+        raise Exception(f"Gemini file processing failed: {video_file.state.name}")
+    print("Generating script...")
+    model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
     response = model.generate_content([GEMINI_PROMPT, video_file])
     genai.delete_file(video_file.name)
     if hasattr(response, 'text') and response.text:
         return " ".join(response.text.strip().splitlines())
+    raise Exception("No valid script was generated by Gemini.")
+def generate_audio_track(script_text, voice_name, is_cheerful, output_path):
+    """Generates audio from script using TTS API."""
+    print(f"Generating audio (Voice: {voice_name}, Cheerful: {is_cheerful})")
     payload = {
+        "text": script_text,
+        "voice_name": voice_name,
+        "cheerful": is_cheerful
     }
     response = requests.post(TTS_API_URL, json=payload, timeout=300)
+    if response.status_code == 200:
+        with open(output_path, "wb") as f:
+            f.write(response.content)
+        return True
+    raise Exception(f"TTS API Error: {response.status_code} - {response.text}")
+def replace_video_audio(video_path, new_audio_path, output_path):
+    """Replaces the audio track of a video file."""
+    print("Replacing video audio...")
+    video_clip = None
+    audio_clip = None
     try:
+        video_clip = VideoFileClip(video_path)
+        audio_clip = AudioFileClip(new_audio_path)
+        video_clip.audio = audio_clip
+        video_clip.write_videofile(
             output_path,
             codec="libx264",
             audio_codec="aac",
+            logger='bar'
         )
     finally:
+        if audio_clip:
+            audio_clip.close()
+        if video_clip:
+            video_clip.close()
+# --- 4. FLASK ROUTES ---
+@app.route('/', methods=['GET'])
 def index():
+    """Render the main upload page."""
     return render_template('index.html', voices=VOICE_CHOICES)
+@app.route('/process', methods=['POST'])
+def process_video():
+    """Handle video upload and processing."""
+    input_video_path = None
+    temp_audio_path = None
+    try:
+        # Validate file upload
+        if 'video' not in request.files or request.files['video'].filename == '':
+            flash("Please upload a video file.", "error")
+            return render_template('index.html', voices=VOICE_CHOICES)
+        # Save uploaded file
+        file = request.files['video']
+        filename = secure_filename(file.filename)
+        input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(input_video_path)
+        # Get processing options
+        voice_choice = request.form.get('voice', 'Charon')
+        is_cheerful = request.form.get('tone') == 'on'
+        # Generate script and audio
+        script = generate_tamil_script(input_video_path)
+        # Create temporary audio file
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            temp_audio_path = temp_audio.name
+        generate_audio_track(script, voice_choice, is_cheerful, temp_audio_path)
+        # Create dubbed video
+        final_video_name = f"dubbed_{filename}"
+        final_video_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_video_name)
+        replace_video_audio(input_video_path, temp_audio_path, final_video_path)
+        flash("Video processing complete!", "success")
+        return render_template(
+            'index.html',
+            voices=VOICE_CHOICES,
+            result_video=url_for('serve_video', filename=final_video_name),
+            script=script
+        )
+    except Exception as e:
+        print(f"Processing error: {str(e)}")
+        flash(f"An error occurred: {str(e)}", "error")
+        return render_template('index.html', voices=VOICE_CHOICES)
+    finally:
+        # Clean up temporary files
+        if input_video_path and os.path.exists(input_video_path):
+            os.remove(input_video_path)
+        if temp_audio_path and os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+@app.route('/downloads/<filename>')
+def serve_video(filename):
+    """Serve the processed video file."""
     return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
+# --- 5. APPLICATION ENTRY POINT ---
 if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=7860)