Spaces:

Athspi-ai
/

Translate

Running

App Files Files Community

Athspi commited on 4 days ago

Commit

b3273f6

verified ·

1 Parent(s): c901468

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -200

app.py CHANGED Viewed

@@ -7,267 +7,270 @@ import requests
 from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
-from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
-from moviepy.video.fx.all import resize, speedx
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
-from PIL import Image, ImageDraw, ImageFont
-import numpy as np
-# --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
 load_dotenv()
 app = Flask(__name__)
-# Load secrets from environment variables
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 TTS_API_URL = os.getenv("TTS_API_URL")
-# Validate required configurations
-if not GEMINI_API_KEY:
-    raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
-if not TTS_API_URL:
-    raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
-# Configure Gemini AI
 genai.configure(api_key=GEMINI_API_KEY)
-# Configure directories
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
-app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100 MB upload limit
-app.secret_key = os.urandom(24)  # Secure key for flash messages
-# --- 2. APPLICATION CONFIGURATION ---
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
 }
-EDITING_PRESETS = {
-    "fast_cuts": {
-        "speed": 1.2,
-        "transition_duration": 0.3,
-        "max_clip_duration": 5
-    },
-    "cinematic": {
-        "speed": 0.95,
-        "transition_duration": 1.0,
-        "black_bars": True
-    },
-    "social_media": {
-        "speed": 1.0,
-        "aspect_ratio": (9, 16),
-        "add_captions": True
-    }
-}
 GEMINI_PROMPT = """
-You are an expert AI scriptwriter. Your task is to watch the provided video and:
-1. Transcribe ALL spoken dialogue into modern, colloquial Tamil
-2. Identify key moments for editing (action, emotion, important points)
-3. Suggest timestamps for cuts/transitions
-**OUTPUT FORMAT:**
-{
-    "script": "Combined Tamil dialogue with performance cues",
-    "editing_notes": [
-        {"timestamp": 12.5, "type": "cut", "reason": "action moment"},
-        {"timestamp": 24.3, "type": "slow_mo", "reason": "emotional highlight"}
-    ]
-}
 """
-# --- 3. CORE APPLICATION FUNCTIONS ---
-def analyze_video(video_path):
-    """Analyze video content and generate script with editing suggestions."""
-    print("Analyzing video with Gemini...")
     video_file = genai.upload_file(video_path, mime_type="video/mp4")
-    # Wait for file processing
     while video_file.state.name == "PROCESSING":
         time.sleep(5)
         video_file = genai.get_file(video_file.name)
     if video_file.state.name != "ACTIVE":
-        raise Exception(f"Gemini file processing failed: {video_file.state.name}")
     model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
     response = model.generate_content([GEMINI_PROMPT, video_file])
     genai.delete_file(video_file.name)
     if hasattr(response, 'text') and response.text:
-        try:
-            return eval(response.text)  # Convert string to dict
-        except:
-            return {"script": response.text, "editing_notes": []}
-    raise Exception("No valid analysis was generated by Gemini.")
-def generate_audio(script_text, voice_name, is_cheerful):
-    """Generate audio from script using TTS API."""
-    print(f"Generating audio (Voice: {voice_name}, Cheerful: {is_cheerful})")
     payload = {
-        "text": script_text,
-        "voice_name": voice_name,
-        "cheerful": is_cheerful
     }
     response = requests.post(TTS_API_URL, json=payload, timeout=300)
-    if response.status_code == 200:
-        return response.content
-    raise Exception(f"TTS API Error: {response.status_code} - {response.text}")
-def apply_editing(video_path, audio_data, editing_notes, preset_name):
-    """Apply editing effects to video based on analysis and preset."""
-    print(f"Applying {preset_name} editing preset...")
-    preset = EDITING_PRESETS[preset_name]
-    # Save audio to temp file
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
-        temp_audio.write(audio_data)
-        temp_audio_path = temp_audio.name
-    # Load video and audio
-    video = VideoFileClip(video_path)
-    audio = AudioFileClip(temp_audio_path)
-    # Apply basic preset effects
-    if preset.get('speed'):
-        video = video.fx(speedx, preset['speed'])
-    # Apply black bars for cinematic
-    if preset.get('black_bars'):
-        def add_black_bars(get_frame, t):
-            frame = get_frame(t)
-            height, width = frame.shape[:2]
-            new_height = int(height * 0.85)
-            bar_size = (height - new_height) // 2
-            # Create black image
-            black_bar = np.zeros((bar_size, width, 3), dtype=np.uint8)
-            processed_frame = np.vstack([black_bar, frame, black_bar])
-            return processed_frame
-        video = video.fl(add_black_bars)
-    # Apply editing notes
-    clips = []
-    current_start = 0
-    for note in editing_notes:
-        if current_start >= note['timestamp']:
-            continue
-        clip = video.subclip(current_start, note['timestamp'])
-        # Apply effect based on note type
-        if note['type'] == 'slow_mo':
-            clip = clip.fx(speedx, 0.5)
-        elif note['type'] == 'fast_cut':
-            clip = clip.fx(speedx, 1.5)
-        clips.append(clip)
-        current_start = note['timestamp']
-    # Add remaining video
-    if current_start < video.duration:
-        clips.append(video.subclip(current_start))
-    # Concatenate all clips
-    final_video = concatenate_videoclips(clips)
-    final_video = final_video.set_audio(audio)
-    # Apply aspect ratio if specified
-    if preset.get('aspect_ratio'):
-        target_ratio = preset['aspect_ratio']
-        final_video = final_video.resize(height=target_ratio[1])
-    # Generate output path
-    output_path = os.path.join(app.config['DOWNLOAD_FOLDER'], f"edited_{os.path.basename(video_path)}")
-    final_video.write_videofile(
-        output_path,
-        codec="libx264",
-        audio_codec="aac",
-        threads=4,
-        preset='fast'
-    )
-    # Cleanup
-    video.close()
-    audio.close()
-    os.unlink(temp_audio_path)
-    return output_path
-# --- 4. FLASK ROUTES ---
-@app.route('/', methods=['GET'])
-def index():
-    """Render the main upload page."""
-    return render_template('index.html', voices=VOICE_CHOICES, presets=EDITING_PRESETS.keys())
-@app.route('/process', methods=['POST'])
-def process_video():
-    """Handle video upload and processing."""
-    input_video_path = None
-    try:
-        # Validate file upload
-        if 'video' not in request.files or request.files['video'].filename == '':
-            flash("Please upload a video file.", "error")
-            return render_template('index.html',
-                                voices=VOICE_CHOICES,
-                                presets=EDITING_PRESETS.keys())
-        # Save uploaded file
-        file = request.files['video']
-        filename = secure_filename(file.filename)
-        input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-        file.save(input_video_path)
-        # Get processing options
-        voice_choice = request.form.get('voice', 'Charon')
-        is_cheerful = request.form.get('tone') == 'on'
-        preset_name = request.form.get('preset', 'fast_cuts')
-        # Analyze video
-        analysis = analyze_video(input_video_path)
-        script = analysis.get('script', '')
-        editing_notes = analysis.get('editing_notes', [])
-        # Generate audio
-        audio_data = generate_audio(script, voice_choice, is_cheerful)
-        # Apply editing and generate final video
-        final_video_path = apply_editing(input_video_path, audio_data, editing_notes, preset_name)
-        return jsonify({
-            'status': 'success',
-            'video_url': url_for('serve_video', filename=os.path.basename(final_video_path)),
-            'script': script
-        })
-    except Exception as e:
-        print(f"Processing error: {str(e)}")
-        return jsonify({
-            'status': 'error',
-            'message': str(e)
-        }), 500
-    finally:
-        # Clean up uploaded file
-        if input_video_path and os.path.exists(input_video_path):
-            os.remove(input_video_path)
-@app.route('/downloads/<filename>')
-def serve_video(filename):
-    """Serve the processed video file."""
     return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
-# --- 5. APPLICATION ENTRY POINT ---
 if __name__ == '__main__':
     app.run(host="0.0.0.0", port=7860)

 from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
 from moviepy.video.io.VideoFileClip import VideoFileClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
 from werkzeug.utils import secure_filename
 from dotenv import load_dotenv
+import threading
+from datetime import datetime, timedelta
+# Initialize Flask app and load secrets
 load_dotenv()
 app = Flask(__name__)
+# Configuration
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 TTS_API_URL = os.getenv("TTS_API_URL")
+if not GEMINI_API_KEY or not TTS_API_URL:
+    raise ValueError("Missing required environment variables")
 genai.configure(api_key=GEMINI_API_KEY)
+# File storage setup
 UPLOAD_FOLDER = 'uploads'
 DOWNLOAD_FOLDER = 'downloads'
 os.makedirs(UPLOAD_FOLDER, exist_ok=True)
 os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
 app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB
+app.secret_key = os.urandom(24)
+# Processing status tracking
+processing_status = {}
+processing_times = {
+    'upload': 0,
+    'transcription': 0,
+    'tts': 0,
+    'dubbing': 0
+}
+# Voice options
 VOICE_CHOICES = {
     "Male (Charon)": "Charon",
     "Female (Zephyr)": "Zephyr"
 }
 GEMINI_PROMPT = """
+You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
+**CRITICAL INSTRUCTIONS:**
+1. Combine all dialogue into one continuous script.
+2. NO timestamps or speaker labels.
+3. Add performance directions (e.g., `Say happily:`, `[laugh]`) directly in the text.
 """
+def track_processing_time(task_id, stage, duration):
+    """Track processing times for each stage"""
+    processing_times[stage] = duration
+    if task_id in processing_status:
+        processing_status[task_id]['timings'][stage] = duration
+def estimate_remaining_time(task_id):
+    """Estimate remaining processing time"""
+    if task_id not in processing_status:
+        return "Calculating..."
+    status = processing_status[task_id]
+    completed_stages = [s for s in status['timings'] if status['timings'][s] is not None]
+    if len(completed_stages) == 0:
+        return "Starting soon..."
+    avg_time = sum(status['timings'][s] for s in completed_stages) / len(completed_stages)
+    remaining_stages = 4 - len(completed_stages)  # Total stages: upload, transcription, tts, dubbing
+    return remaining_stages * avg_time
+def process_video_background(task_id, video_path, voice, cheerful):
+    """Background processing function"""
+    try:
+        start_time = time.time()
+        processing_status[task_id] = {
+            'status': 'processing',
+            'progress': 0,
+            'message': 'Starting transcription',
+            'timings': {'upload': None, 'transcription': None, 'tts': None, 'dubbing': None},
+            'start_time': start_time
+        }
+        # Stage 1: Transcription
+        processing_status[task_id]['message'] = 'Transcribing video content'
+        script_start = time.time()
+        script = generate_tamil_script(video_path)
+        transcription_time = time.time() - script_start
+        track_processing_time(task_id, 'transcription', transcription_time)
+        processing_status[task_id]['progress'] = 25
+        # Stage 2: TTS Generation
+        processing_status[task_id]['message'] = 'Generating audio narration'
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
+            audio_path = temp_audio.name
+        tts_start = time.time()
+        generate_audio_track(script, voice, cheerful, audio_path)
+        tts_time = time.time() - tts_start
+        track_processing_time(task_id, 'tts', tts_time)
+        processing_status[task_id]['progress'] = 50
+        # Stage 3: Dubbing
+        processing_status[task_id]['message'] = 'Creating dubbed video'
+        final_filename = f"dubbed_{task_id}.mp4"
+        final_path = os.path.join(app.config['DOWNLOAD_FOLDER'], final_filename)
+        dubbing_start = time.time()
+        replace_video_audio(video_path, audio_path, final_path)
+        dubbing_time = time.time() - dubbing_start
+        track_processing_time(task_id, 'dubbing', dubbing_time)
+        processing_status[task_id]['progress'] = 75
+        # Cleanup
+        os.unlink(audio_path)
+        os.unlink(video_path)
+        # Finalize
+        processing_status[task_id].update({
+            'status': 'complete',
+            'progress': 100,
+            'message': 'Processing complete',
+            'result_path': final_path,
+            'script': script,
+            'end_time': time.time()
+        })
+    except Exception as e:
+        processing_status[task_id].update({
+            'status': 'error',
+            'message': f'Error: {str(e)}'
+        })
+        raise
+def generate_tamil_script(video_path):
+    """Generate Tamil script using Gemini"""
     video_file = genai.upload_file(video_path, mime_type="video/mp4")
     while video_file.state.name == "PROCESSING":
         time.sleep(5)
         video_file = genai.get_file(video_file.name)
     if video_file.state.name != "ACTIVE":
+        raise Exception(f"Gemini processing failed: {video_file.state.name}")
     model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
     response = model.generate_content([GEMINI_PROMPT, video_file])
     genai.delete_file(video_file.name)
     if hasattr(response, 'text') and response.text:
+        return " ".join(response.text.strip().splitlines())
+    raise Exception("No valid script generated")
+def generate_audio_track(text, voice, cheerful, output_path):
+    """Generate audio using TTS API"""
     payload = {
+        "text": text,
+        "voice_name": voice,
+        "cheerful": cheerful
     }
     response = requests.post(TTS_API_URL, json=payload, timeout=300)
+    if response.status_code != 200:
+        raise Exception(f"TTS API error: {response.status_code}")
+    with open(output_path, "wb") as f:
+        f.write(response.content)
+def replace_video_audio(video_path, audio_path, output_path):
+    """Replace video audio track"""
+    video = AudioFileClip = None
+    try:
+        video = VideoFileClip(video_path)
+        audio = AudioFileClip(audio_path)
+        video.audio = audio
+        video.write_videofile(
+            output_path,
+            codec="libx264",
+            audio_codec="aac",
+            logger=None,
+            threads=4
+        )
+    finally:
+        if video:
+            video.close()
+        if audio:
+            audio.close()
+@app.route('/')
+def index():
+    """Main page"""
+    return render_template('index.html', voices=VOICE_CHOICES)
+@app.route('/upload', methods=['POST'])
+def upload_video():
+    """Handle video upload and start processing"""
+    if 'video' not in request.files:
+        return jsonify({'error': 'No file uploaded'}), 400
+    file = request.files['video']
+    if file.filename == '':
+        return jsonify({'error': 'No file selected'}), 400
+    # Generate unique task ID
+    task_id = str(uuid.uuid4())
+    filename = secure_filename(f"{task_id}_{file.filename}")
+    video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    file.save(video_path)
+    # Get processing options
+    voice = request.form.get('voice', 'Charon')
+    cheerful = request.form.get('cheerful', 'false') == 'true'
+    # Start background processing
+    processing_status[task_id] = {
+        'status': 'uploaded',
+        'progress': 0,
+        'message': 'Starting processing',
+        'timings': {'upload': time.time(), 'transcription': None, 'tts': None, 'dubbing': None},
+        'start_time': time.time()
+    }
+    thread = threading.Thread(
+        target=process_video_background,
+        args=(task_id, video_path, voice, cheerful)
+    thread.start()
+    return jsonify({'task_id': task_id})
+@app.route('/status/<task_id>')
+def get_status(task_id):
+    """Check processing status"""
+    if task_id not in processing_status:
+        return jsonify({'error': 'Invalid task ID'}), 404
+    status = processing_status[task_id]
+    # Calculate ETA if processing
+    eta = None
+    if status['status'] == 'processing':
+        elapsed = time.time() - status['start_time']
+        remaining = estimate_remaining_time(task_id)
+        if isinstance(remaining, (int, float)):
+            eta = str(timedelta(seconds=int(remaining)))
+    response = {
+        'status': status['status'],
+        'progress': status.get('progress', 0),
+        'message': status.get('message', ''),
+        'eta': eta
+    }
+    if status['status'] == 'complete':
+        response['result_url'] = url_for('download', filename=os.path.basename(status['result_path']))
+        response['script'] = status.get('script', '')
+    return jsonify(response)
+@app.route('/download/<filename>')
+def download(filename):
+    """Serve processed video"""
     return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
 if __name__ == '__main__':
     app.run(host="0.0.0.0", port=7860)