Athspi commited on
Commit
0a39518
·
verified ·
1 Parent(s): 42d1756

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +237 -121
app.py CHANGED
@@ -1,155 +1,271 @@
1
  import os
 
2
  import tempfile
3
  import uuid
4
- from fastapi import FastAPI, UploadFile, File, Form, HTTPException
5
- from fastapi.responses import FileResponse
6
- from fastapi.staticfiles import StaticFiles
7
- from moviepy.video.io.VideoFileClip import VideoFileClip
8
- from moviepy.audio.io.AudioFileClip import AudioFileClip
9
  import google.generativeai as genai
10
  import requests
 
 
 
 
11
  from dotenv import load_dotenv
12
- from pathlib import Path
 
13
 
14
- # Load environment variables
15
  load_dotenv()
 
16
 
17
- app = FastAPI()
18
-
19
- # Configure directories
20
- UPLOAD_DIR = "uploads"
21
- DOWNLOAD_DIR = "downloads"
22
- Path(UPLOAD_DIR).mkdir(exist_ok=True)
23
- Path(DOWNLOAD_DIR).mkdir(exist_ok=True)
24
-
25
- # Mount static files
26
- app.mount("/downloads", StaticFiles(directory="downloads"), name="downloads")
27
-
28
- # Configuration
29
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
30
  TTS_API_URL = os.getenv("TTS_API_URL")
 
 
 
 
 
 
 
 
31
  genai.configure(api_key=GEMINI_API_KEY)
32
 
 
 
 
 
 
 
 
 
 
 
 
33
  VOICE_CHOICES = {
34
- "male": "Charon",
35
- "female": "Zephyr"
36
  }
37
 
38
- GEMINI_PROMPT = """
39
- You are an expert AI scriptwriter. Your task is to watch the provided video and transcribe ALL spoken dialogue into a SINGLE, CONTINUOUS block of modern, colloquial Tamil.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- **CRITICAL INSTRUCTIONS:**
42
- 1. **Single Script:** Combine all dialogue from all speakers into one continuous script.
43
- 2. **NO Timestamps or Speaker Labels:** Do NOT include any timestamps or speaker identifiers.
44
- 3. **Incorporate Performance:** Add English style prompts (e.g., `Say happily:`, `Whisper mysteriously:`) and performance tags (e.g., `[laugh]`, `[sigh]`) directly into the text for an expressive narration.
 
45
 
46
- **EXAMPLE OUTPUT:**
47
- Say happily: வணக்கம்! [laugh] எப்படி இருக்கீங்க? Whisper mysteriously: அந்த ரகசியம் எனக்கு மட்டும் தான் தெரியும்.
 
 
 
 
 
 
48
  """
49
 
50
- @app.post("/process")
51
- async def process_video(
52
- file: UploadFile = File(...),
53
- voice: str = Form("male"),
54
- cheerful: bool = Form(False)
55
- ):
56
- try:
57
- # Save uploaded file
58
- file_ext = Path(file.filename).suffix
59
- file_name = f"{uuid.uuid4()}{file_ext}"
60
- file_path = os.path.join(UPLOAD_DIR, file_name)
61
-
62
- with open(file_path, "wb") as buffer:
63
- buffer.write(await file.read())
64
 
65
- # Generate script using Gemini
66
- script = await generate_script(file_path)
67
-
68
- # Generate audio
69
- audio_path = os.path.join(UPLOAD_DIR, f"audio_{uuid.uuid4()}.wav")
70
- await generate_audio(script, voice, cheerful, audio_path)
71
-
72
- # Create dubbed video
73
- output_name = f"dubbed_{file_name}"
74
- output_path = os.path.join(DOWNLOAD_DIR, output_name)
75
- await create_dubbed_video(file_path, audio_path, output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # Cleanup
78
- os.remove(file_path)
79
- os.remove(audio_path)
 
 
 
 
 
 
 
 
80
 
81
- return {
82
- "video_url": f"/downloads/{output_name}",
83
- "script": script
84
- }
 
85
 
86
- except Exception as e:
87
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- async def generate_script(video_path: str) -> str:
 
 
 
 
 
 
 
 
 
90
  try:
91
- video_file = genai.upload_file(video_path, mime_type="video/mp4")
 
 
 
 
 
92
 
93
- while video_file.state.name == "PROCESSING":
94
- video_file = genai.get_file(video_file.name)
 
 
 
95
 
96
- if video_file.state.name != "ACTIVE":
97
- raise Exception("Gemini processing failed")
 
 
98
 
99
- model = genai.GenerativeModel("models/gemini-2.5-flash")
100
- response = model.generate_content([GEMINI_PROMPT, video_file])
101
- genai.delete_file(video_file.name)
 
102
 
103
- if hasattr(response, 'text'):
104
- return " ".join(response.text.strip().splitlines())
105
- raise Exception("No script generated")
106
- except Exception as e:
107
- raise Exception(f"Script generation failed: {str(e)}")
108
-
109
- async def generate_audio(text: str, voice: str, cheerful: bool, output_path: str):
110
- try:
111
- voice_name = VOICE_CHOICES.get(voice, "Charon")
112
- payload = {
113
- "text": text,
114
- "voice_name": voice_name,
115
- "cheerful": cheerful
116
- }
117
-
118
- response = requests.post(TTS_API_URL, json=payload, timeout=300)
119
- if response.status_code != 200:
120
- raise Exception(f"TTS API error: {response.text}")
121
-
122
- with open(output_path, "wb") as f:
123
- f.write(response.content)
124
- except Exception as e:
125
- raise Exception(f"Audio generation failed: {str(e)}")
126
-
127
- async def create_dubbed_video(video_path: str, audio_path: str, output_path: str):
128
- try:
129
- video = VideoFileClip(video_path)
130
- audio = AudioFileClip(audio_path)
131
 
132
- # Ensure audio matches video duration
133
- if audio.duration > video.duration:
134
- audio = audio.subclip(0, video.duration)
135
 
136
- video = video.set_audio(audio)
137
- video.write_videofile(
138
- output_path,
139
- codec="libx264",
140
- audio_codec="aac",
141
- threads=4,
142
- preset="fast"
143
- )
144
 
145
- video.close()
146
- audio.close()
147
  except Exception as e:
148
- raise Exception(f"Video processing failed: {str(e)}")
149
-
150
- @app.get("/downloads/{file_name}")
151
- async def download_file(file_name: str):
152
- file_path = os.path.join(DOWNLOAD_DIR, file_name)
153
- if not os.path.exists(file_path):
154
- raise HTTPException(status_code=404, detail="File not found")
155
- return FileResponse(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import time
3
  import tempfile
4
  import uuid
 
 
 
 
 
5
  import google.generativeai as genai
6
  import requests
7
+ from flask import Flask, request, render_template, send_from_directory, url_for, flash, jsonify
8
+ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeVideoClip, TextClip, concatenate_videoclips
9
+ from moviepy.video.fx.all import resize, speedx
10
+ from werkzeug.utils import secure_filename
11
  from dotenv import load_dotenv
12
+ from PIL import Image, ImageDraw, ImageFont
13
+ import numpy as np
14
 
15
+ # --- 1. INITIALIZE FLASK APP AND LOAD SECRETS ---
16
  load_dotenv()
17
+ app = Flask(__name__)
18
 
19
+ # Load secrets from environment variables
 
 
 
 
 
 
 
 
 
 
 
20
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
  TTS_API_URL = os.getenv("TTS_API_URL")
22
+
23
+ # Validate required configurations
24
+ if not GEMINI_API_KEY:
25
+ raise ValueError("SECURITY ERROR: GEMINI_API_KEY not found in .env file!")
26
+ if not TTS_API_URL:
27
+ raise ValueError("CONFIGURATION ERROR: TTS_API_URL not found in .env file!")
28
+
29
+ # Configure Gemini AI
30
  genai.configure(api_key=GEMINI_API_KEY)
31
 
32
+ # Configure directories
33
+ UPLOAD_FOLDER = 'uploads'
34
+ DOWNLOAD_FOLDER = 'downloads'
35
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
36
+ os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
37
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
38
+ app.config['DOWNLOAD_FOLDER'] = DOWNLOAD_FOLDER
39
+ app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100 MB upload limit
40
+ app.secret_key = os.urandom(24) # Secure key for flash messages
41
+
42
+ # --- 2. APPLICATION CONFIGURATION ---
43
  VOICE_CHOICES = {
44
+ "Male (Charon)": "Charon",
45
+ "Female (Zephyr)": "Zephyr"
46
  }
47
 
48
+ EDITING_PRESETS = {
49
+ "fast_cuts": {
50
+ "speed": 1.2,
51
+ "transition_duration": 0.3,
52
+ "max_clip_duration": 5
53
+ },
54
+ "cinematic": {
55
+ "speed": 0.95,
56
+ "transition_duration": 1.0,
57
+ "black_bars": True
58
+ },
59
+ "social_media": {
60
+ "speed": 1.0,
61
+ "aspect_ratio": (9, 16),
62
+ "add_captions": True
63
+ }
64
+ }
65
 
66
+ GEMINI_PROMPT = """
67
+ You are an expert AI scriptwriter. Your task is to watch the provided video and:
68
+ 1. Transcribe ALL spoken dialogue into modern, colloquial Tamil
69
+ 2. Identify key moments for editing (action, emotion, important points)
70
+ 3. Suggest timestamps for cuts/transitions
71
 
72
+ **OUTPUT FORMAT:**
73
+ {
74
+ "script": "Combined Tamil dialogue with performance cues",
75
+ "editing_notes": [
76
+ {"timestamp": 12.5, "type": "cut", "reason": "action moment"},
77
+ {"timestamp": 24.3, "type": "slow_mo", "reason": "emotional highlight"}
78
+ ]
79
+ }
80
  """
81
 
82
+ # --- 3. CORE APPLICATION FUNCTIONS ---
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ def analyze_video(video_path):
85
+ """Analyze video content and generate script with editing suggestions."""
86
+ print("Analyzing video with Gemini...")
87
+ video_file = genai.upload_file(video_path, mime_type="video/mp4")
88
+
89
+ # Wait for file processing
90
+ while video_file.state.name == "PROCESSING":
91
+ time.sleep(5)
92
+ video_file = genai.get_file(video_file.name)
93
+
94
+ if video_file.state.name != "ACTIVE":
95
+ raise Exception(f"Gemini file processing failed: {video_file.state.name}")
96
+
97
+ model = genai.GenerativeModel(model_name="models/gemini-1.5-pro-latest")
98
+ response = model.generate_content([GEMINI_PROMPT, video_file])
99
+ genai.delete_file(video_file.name)
100
+
101
+ if hasattr(response, 'text') and response.text:
102
+ try:
103
+ return eval(response.text) # Convert string to dict
104
+ except:
105
+ return {"script": response.text, "editing_notes": []}
106
+ raise Exception("No valid analysis was generated by Gemini.")
107
+
108
+ def generate_audio(script_text, voice_name, is_cheerful):
109
+ """Generate audio from script using TTS API."""
110
+ print(f"Generating audio (Voice: {voice_name}, Cheerful: {is_cheerful})")
111
+ payload = {
112
+ "text": script_text,
113
+ "voice_name": voice_name,
114
+ "cheerful": is_cheerful
115
+ }
116
+
117
+ response = requests.post(TTS_API_URL, json=payload, timeout=300)
118
+ if response.status_code == 200:
119
+ return response.content
120
+ raise Exception(f"TTS API Error: {response.status_code} - {response.text}")
121
+
122
+ def apply_editing(video_path, audio_data, editing_notes, preset_name):
123
+ """Apply editing effects to video based on analysis and preset."""
124
+ print(f"Applying {preset_name} editing preset...")
125
+ preset = EDITING_PRESETS[preset_name]
126
+
127
+ # Save audio to temp file
128
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
129
+ temp_audio.write(audio_data)
130
+ temp_audio_path = temp_audio.name
131
+
132
+ # Load video and audio
133
+ video = VideoFileClip(video_path)
134
+ audio = AudioFileClip(temp_audio_path)
135
+
136
+ # Apply basic preset effects
137
+ if preset.get('speed'):
138
+ video = video.fx(speedx, preset['speed'])
139
+
140
+ # Apply black bars for cinematic
141
+ if preset.get('black_bars'):
142
+ def add_black_bars(get_frame, t):
143
+ frame = get_frame(t)
144
+ height, width = frame.shape[:2]
145
+ new_height = int(height * 0.85)
146
+ bar_size = (height - new_height) // 2
147
+
148
+ # Create black image
149
+ black_bar = np.zeros((bar_size, width, 3), dtype=np.uint8)
150
+ processed_frame = np.vstack([black_bar, frame, black_bar])
151
+ return processed_frame
152
 
153
+ video = video.fl(add_black_bars)
154
+
155
+ # Apply editing notes
156
+ clips = []
157
+ current_start = 0
158
+
159
+ for note in editing_notes:
160
+ if current_start >= note['timestamp']:
161
+ continue
162
+
163
+ clip = video.subclip(current_start, note['timestamp'])
164
 
165
+ # Apply effect based on note type
166
+ if note['type'] == 'slow_mo':
167
+ clip = clip.fx(speedx, 0.5)
168
+ elif note['type'] == 'fast_cut':
169
+ clip = clip.fx(speedx, 1.5)
170
 
171
+ clips.append(clip)
172
+ current_start = note['timestamp']
173
+
174
+ # Add remaining video
175
+ if current_start < video.duration:
176
+ clips.append(video.subclip(current_start))
177
+
178
+ # Concatenate all clips
179
+ final_video = concatenate_videoclips(clips)
180
+ final_video = final_video.set_audio(audio)
181
+
182
+ # Apply aspect ratio if specified
183
+ if preset.get('aspect_ratio'):
184
+ target_ratio = preset['aspect_ratio']
185
+ final_video = final_video.resize(height=target_ratio[1])
186
+
187
+ # Generate output path
188
+ output_path = os.path.join(app.config['DOWNLOAD_FOLDER'], f"edited_{os.path.basename(video_path)}")
189
+ final_video.write_videofile(
190
+ output_path,
191
+ codec="libx264",
192
+ audio_codec="aac",
193
+ threads=4,
194
+ preset='fast'
195
+ )
196
+
197
+ # Cleanup
198
+ video.close()
199
+ audio.close()
200
+ os.unlink(temp_audio_path)
201
+
202
+ return output_path
203
+
204
+ # --- 4. FLASK ROUTES ---
205
 
206
+ @app.route('/', methods=['GET'])
207
+ def index():
208
+ """Render the main upload page."""
209
+ return render_template('index.html', voices=VOICE_CHOICES, presets=EDITING_PRESETS.keys())
210
+
211
+ @app.route('/process', methods=['POST'])
212
+ def process_video():
213
+ """Handle video upload and processing."""
214
+ input_video_path = None
215
+
216
  try:
217
+ # Validate file upload
218
+ if 'video' not in request.files or request.files['video'].filename == '':
219
+ flash("Please upload a video file.", "error")
220
+ return render_template('index.html',
221
+ voices=VOICE_CHOICES,
222
+ presets=EDITING_PRESETS.keys())
223
 
224
+ # Save uploaded file
225
+ file = request.files['video']
226
+ filename = secure_filename(file.filename)
227
+ input_video_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
228
+ file.save(input_video_path)
229
 
230
+ # Get processing options
231
+ voice_choice = request.form.get('voice', 'Charon')
232
+ is_cheerful = request.form.get('tone') == 'on'
233
+ preset_name = request.form.get('preset', 'fast_cuts')
234
 
235
+ # Analyze video
236
+ analysis = analyze_video(input_video_path)
237
+ script = analysis.get('script', '')
238
+ editing_notes = analysis.get('editing_notes', [])
239
 
240
+ # Generate audio
241
+ audio_data = generate_audio(script, voice_choice, is_cheerful)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
+ # Apply editing and generate final video
244
+ final_video_path = apply_editing(input_video_path, audio_data, editing_notes, preset_name)
 
245
 
246
+ return jsonify({
247
+ 'status': 'success',
248
+ 'video_url': url_for('serve_video', filename=os.path.basename(final_video_path)),
249
+ 'script': script
250
+ })
 
 
 
251
 
 
 
252
  except Exception as e:
253
+ print(f"Processing error: {str(e)}")
254
+ return jsonify({
255
+ 'status': 'error',
256
+ 'message': str(e)
257
+ }), 500
258
+
259
+ finally:
260
+ # Clean up uploaded file
261
+ if input_video_path and os.path.exists(input_video_path):
262
+ os.remove(input_video_path)
263
+
264
+ @app.route('/downloads/<filename>')
265
+ def serve_video(filename):
266
+ """Serve the processed video file."""
267
+ return send_from_directory(app.config['DOWNLOAD_FOLDER'], filename)
268
+
269
+ # --- 5. APPLICATION ENTRY POINT ---
270
+ if __name__ == '__main__':
271
+ app.run(host="0.0.0.0", port=7860)