mgbam commited on
Commit
08839d3
Β·
verified Β·
1 Parent(s): 76ef43b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +185 -104
app.py CHANGED
@@ -4,9 +4,27 @@ import json
4
  import time
5
  import random
6
  import subprocess
 
 
7
  import google.generativeai as genai
8
  from tavily import TavilyClient
9
  from runwayml import RunwayML, TaskFailedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # --- 1. CONFIGURE API KEYS ---
12
  try:
@@ -17,16 +35,85 @@ try:
17
  except KeyError as e:
18
  raise ValueError(f"API Key Error: Please set the {e} secret in your environment.")
19
 
20
- # --- 2. CORE VIDEO GENERATION FUNCTION ---
21
- def generate_video_from_topic(topic_prompt, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  job_id = f"{int(time.time())}_{random.randint(1000, 9999)}"
23
- print(f"--- Starting New Job: {job_id} for topic: '{topic_prompt}' ---")
24
 
25
- intermediate_files = []
 
 
 
 
26
 
27
  try:
28
- # STEP 1: RESEARCH (Tavily)
29
- progress(0.1, desc="πŸ” Researching topic with Tavily...")
30
  facts = "No research data available."
31
  try:
32
  research_results = tavily_client.search(
@@ -36,133 +123,127 @@ def generate_video_from_topic(topic_prompt, progress=gr.Progress(track_tqdm=True
36
  if research_results and 'results' in research_results:
37
  facts = "\n".join([res['content'] for res in research_results['results']])
38
  except Exception as e:
39
- print(f"Tavily API failed: {e}. Proceeding without research.")
40
 
41
- # STEP 2: SCRIPT & SCENE PROMPTS (Gemini)
42
- progress(0.2, desc="✍️ Writing script with Gemini...")
43
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
44
- prompt = f"""
45
- You are a creative director for viral short-form videos. Based on the topic '{topic_prompt}' and research, create a script.
46
- Your output MUST be a valid JSON object with "narration_script" (string) and "scene_prompts" (a list of 4 detailed, cinematic prompts).
 
 
 
 
 
47
  """
48
- response = gemini_model.generate_content(prompt)
49
-
50
  try:
51
- cleaned_text = (
52
- response.text
53
- .strip()
54
- .replace("```json", "")
55
- .replace("```", "")
56
- )
57
- script_data = json.loads(cleaned_text)
58
- narration = script_data['narration_script']
59
- scene_prompts = script_data['scene_prompts']
60
- except (json.JSONDecodeError, KeyError) as e:
61
- raise gr.Error(
62
- f"Gemini did not return valid JSON. Error: {e}. Response was: {response.text}"
63
- )
64
-
65
- # STEP 3: MOCK VOICE OVER
66
- progress(0.3, desc="πŸŽ™οΈ MOCKING voiceover to save credits...")
67
- audio_path = f"audio_{job_id}.mp3"
68
- intermediate_files.append(audio_path)
69
- narration_duration = len(narration.split()) / 2.5
70
- subprocess.run([
71
- 'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=mono',
72
- '-t', str(narration_duration), '-q:a', '9', '-acodec', 'libmp3lame',
73
- audio_path, '-y'
74
- ], check=True)
75
- print(f"MOCK audio file saved: {audio_path}")
76
-
77
- # STEP 4: GENERATE VIDEO SCENES (Runway SDK)
78
- video_clip_paths = []
79
- for i, scene_prompt in enumerate(scene_prompts, start=1):
80
- progress(0.4 + (i * 0.12), desc=f"🎬 Generating scene {i}/{len(scene_prompts)}...")
81
  try:
82
- task = (
83
- runway_client.image_to_video.create(
84
- model="gen4_turbo",
85
- prompt_text=scene_prompt,
86
- duration=5,
87
- ratio="1280:720"
88
- )
89
- .wait_for_task_output()
90
  )
 
91
  video_url = task.output[0]
92
  except TaskFailedError as e:
93
- raise gr.Error(f"Runway job failed: {e.task_details}")
94
-
95
- clip_path = f"scene_{i}_{job_id}.mp4"
96
- intermediate_files.append(clip_path)
97
- video_clip_paths.append(clip_path)
98
-
99
- # Download the scene clip
100
- with open(clip_path, "wb") as f:
101
- for chunk in runway_client._session.get(video_url, stream=True).iter_content(chunk_size=1024):
102
- if chunk:
103
- f.write(chunk)
104
- print(f"Video clip saved: {clip_path}")
105
-
106
- # STEP 5: STITCHING (FFmpeg)
107
- progress(0.9, desc="βœ‚οΈ Assembling final video with FFmpeg...")
108
- file_list_path = f"file_list_{job_id}.txt"
109
- intermediate_files.append(file_list_path)
110
- with open(file_list_path, "w") as f:
111
- for clip in video_clip_paths:
112
- f.write(f"file '{clip}'\n")
113
-
114
- combined_video_path = f"combined_video_{job_id}.mp4"
115
- intermediate_files.append(combined_video_path)
116
  subprocess.run([
117
- 'ffmpeg', '-f', 'concat', '-safe', '0',
118
- '-i', file_list_path, '-c', 'copy', combined_video_path, '-y'
119
  ], check=True)
 
120
 
121
- final_video_path = f"final_video_{job_id}.mp4"
 
 
122
  subprocess.run([
123
- 'ffmpeg', '-i', combined_video_path,
124
- '-i', audio_path,
125
- '-c:v', 'copy', '-c:a', 'aac', '-shortest', final_video_path, '-y'
126
  ], check=True)
127
- print(f"Final video created at: {final_video_path}")
128
 
129
- progress(1.0, desc="βœ… Done!")
130
- return final_video_path
 
131
 
132
  except Exception as e:
133
- print(f"--- JOB {job_id} FAILED ---\nError: {e}")
134
  raise gr.Error(f"An error occurred: {e}")
135
-
136
  finally:
137
- print("Cleaning up intermediate files...")
138
- for file_path in intermediate_files:
139
- if os.path.exists(file_path):
140
- os.remove(file_path)
141
- print(f"Removed: {file_path}")
142
 
143
- # --- 3. LAUNCH GRADIO APP ---
144
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
145
- gr.Markdown("# πŸ€– My Personal AI Video Studio")
146
- gr.Markdown("Enter a topic to generate a short-form video. This private tool is used for fulfilling freelance orders.")
147
 
148
  with gr.Row():
149
- topic_input = gr.Textbox(
150
- label="Video Topic",
151
- placeholder="e.g., 'The history of coffee'",
152
- scale=3
153
- )
154
- generate_button = gr.Button("Generate Video", variant="primary", scale=1)
155
-
156
  with gr.Row():
157
  video_output = gr.Video(label="Generated Video")
158
 
159
  generate_button.click(
160
  fn=generate_video_from_topic,
161
- inputs=topic_input,
162
  outputs=video_output
163
  )
164
 
165
- gr.Markdown("--- \n ### Examples of Good Topics:\n - A product: 'The new waterproof Chrono-Watch X1'\n - A concept: 'The science of sleep'")
166
 
167
  if __name__ == "__main__":
168
- demo.launch()
 
4
  import time
5
  import random
6
  import subprocess
7
+ from pathlib import Path
8
+
9
  import google.generativeai as genai
10
  from tavily import TavilyClient
11
  from runwayml import RunwayML, TaskFailedError
12
+ from PIL import Image, ImageDraw, ImageFont
13
+
14
+ # =============================================================
15
+ # AI VIDEO STUDIO (Gen-4 Turbo Image→Video compliant rewrite)
16
+ # =============================================================
17
+ # Key changes:
18
+ # 1. Added *required* prompt_image for Gen-4 / gen4_turbo image_to_video tasks (was missing -> error).
19
+ # 2. Added UI input for an optional user keyframe image; if absent we auto-generate a placeholder.
20
+ # 3. Included prompt_text together with prompt_image for better guidance.
21
+ # 4. Added more robust polling / retry & explicit exception surfaces.
22
+ # 5. Added structured logging + deterministic temp directory per job.
23
+ # 6. Wrapped cleanup in finally; kept mock VO approach.
24
+ # 7. Added basic safety guardrails.
25
+ #
26
+ # Gen-4 requires an input image plus text prompt (cannot be pure text alone) – if you want pure text-to-video, switch to Gen-3 Alpha text mode. See docs.
27
+ # =============================================================
28
 
29
  # --- 1. CONFIGURE API KEYS ---
30
  try:
 
35
  except KeyError as e:
36
  raise ValueError(f"API Key Error: Please set the {e} secret in your environment.")
37
 
38
+ # --- 2. CONSTANTS / SETTINGS ---
39
+ GEN4_MODEL = "gen4_turbo" # adjust to "gen4" if you prefer (slower / potentially higher fidelity)
40
+ SCENE_COUNT = 4
41
+ SCENE_DURATION_SECONDS = 5 # Gen-4 supports 5 or 10 seconds
42
+ VIDEO_RATIO = "1280:720" # 16:9
43
+ WORDS_PER_SEC = 2.5 # Used for mock narration length
44
+ MAX_POLL_SECONDS = 180 # Per scene
45
+ POLL_INTERVAL = 5
46
+
47
+ # --- 3. UTILITIES ---
48
+ def _log(msg: str):
49
+ print(f"[AI-STUDIO] {msg}")
50
+
51
+
52
+ def create_placeholder_image(text: str, path: Path, size=(1280, 720)) -> Path:
53
+ """Create a simple placeholder keyframe if user supplies none.
54
+ You can later replace this with a real text-to-image generation step."""
55
+ img = Image.new("RGB", size, (10, 10, 10))
56
+ draw = ImageDraw.Draw(img)
57
+ try:
58
+ font = ImageFont.truetype("DejaVuSans-Bold.ttf", 60)
59
+ except Exception:
60
+ font = ImageFont.load_default()
61
+ wrapped = []
62
+ line = ""
63
+ for word in text.split():
64
+ test = f"{line} {word}".strip()
65
+ if len(test) > 28: # naive wrap
66
+ wrapped.append(line)
67
+ line = word
68
+ else:
69
+ line = test
70
+ if line:
71
+ wrapped.append(line)
72
+ y = size[1] // 2 - (len(wrapped) * 35) // 2
73
+ for w in wrapped:
74
+ w_width, w_height = draw.textsize(w, font=font)
75
+ draw.text(((size[0]-w_width)//2, y), w, fill=(240, 240, 240), font=font)
76
+ y += w_height + 10
77
+ img.save(path)
78
+ return path
79
+
80
+
81
+ def generate_mock_voiceover(narration: str, out_path: Path):
82
+ duration = len(narration.split()) / WORDS_PER_SEC
83
+ subprocess.run([
84
+ 'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=mono',
85
+ '-t', str(duration), '-q:a', '9', '-acodec', 'libmp3lame', str(out_path), '-y'
86
+ ], check=True)
87
+ return duration
88
+
89
+
90
+ def poll_runway_task(task_obj, max_seconds=MAX_POLL_SECONDS, interval=POLL_INTERVAL):
91
+ start = time.time()
92
+ while True:
93
+ task_obj.refresh()
94
+ status = task_obj.status
95
+ if status == 'SUCCEEDED':
96
+ return task_obj
97
+ if status == 'FAILED':
98
+ raise TaskFailedError(task_details=task_obj)
99
+ if time.time() - start > max_seconds:
100
+ raise TimeoutError(f"Runway task timed out after {max_seconds}s (status={status})")
101
+ time.sleep(interval)
102
+
103
+ # --- 4. CORE PIPELINE ---
104
+ def generate_video_from_topic(topic_prompt, keyframe_image, progress=gr.Progress(track_tqdm=True)):
105
  job_id = f"{int(time.time())}_{random.randint(1000, 9999)}"
106
+ _log(f"Starting job {job_id} :: topic='{topic_prompt}'")
107
 
108
+ # Working directory for this job
109
+ workdir = Path(f"job_{job_id}")
110
+ workdir.mkdir(exist_ok=True)
111
+
112
+ intermediates = []
113
 
114
  try:
115
+ # STEP 1: Research
116
+ progress(0.05, desc="πŸ” Researching topic ...")
117
  facts = "No research data available."
118
  try:
119
  research_results = tavily_client.search(
 
123
  if research_results and 'results' in research_results:
124
  facts = "\n".join([res['content'] for res in research_results['results']])
125
  except Exception as e:
126
+ _log(f"Tavily failed: {e}")
127
 
128
+ # STEP 2: Script
129
+ progress(0.15, desc="✍️ Writing script ...")
130
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
131
+ script_prompt = f"""
132
+ You are a creative director for viral short-form videos.
133
+ Topic: {topic_prompt}
134
+ Research (may contain noise):\n{facts}\n\n
135
+ Produce JSON with keys:
136
+ narration_script: overall narration (concise, energetic, ~85-110 words per 5 scenes). Maintain coherence.
137
+ scene_prompts: list of {SCENE_COUNT} *visual* prompts. Each should be cinematic, 1-2 sentences, include style / camera / lighting cues and keep characters consistent.
138
+ Return ONLY JSON.
139
  """
140
+ response = gemini_model.generate_content(script_prompt)
 
141
  try:
142
+ cleaned = response.text.strip().replace("```json", "").replace("```", "")
143
+ data = json.loads(cleaned)
144
+ narration = data['narration_script']
145
+ scene_prompts = data['scene_prompts']
146
+ if len(scene_prompts) != SCENE_COUNT:
147
+ raise ValueError(f"Expected {SCENE_COUNT} scene prompts, got {len(scene_prompts)}")
148
+ except Exception as e:
149
+ raise gr.Error(f"Gemini JSON parse error: {e}. Raw: {response.text[:400]}")
150
+
151
+ # STEP 3: Mock VO
152
+ progress(0.25, desc="πŸŽ™οΈ Generating mock VO ...")
153
+ audio_path = workdir / f"narration_{job_id}.mp3"
154
+ generate_mock_voiceover(narration, audio_path)
155
+ intermediates.append(audio_path)
156
+
157
+ # STEP 4: Prepare keyframe image (required for Gen-4 image_to_video)
158
+ progress(0.30, desc="πŸ–ΌοΈ Preparing keyframe image ...")
159
+ if keyframe_image is not None:
160
+ keyframe_path = Path(keyframe_image)
161
+ else:
162
+ keyframe_path = workdir / "auto_keyframe.png"
163
+ create_placeholder_image(topic_prompt, keyframe_path)
164
+ intermediates.append(keyframe_path)
165
+
166
+ # STEP 5: Generate scenes
167
+ clip_paths = []
168
+ for idx, scene_prompt in enumerate(scene_prompts, start=1):
169
+ base_progress = 0.30 + (idx * 0.12)
170
+ progress(min(base_progress, 0.85), desc=f"🎬 Scene {idx}/{len(scene_prompts)} ...")
171
+ _log(f"Submitting scene {idx}: {scene_prompt[:90]}...")
172
  try:
173
+ task = runway_client.image_to_video.create(
174
+ model=GEN4_MODEL,
175
+ prompt_image=str(keyframe_path), # required param
176
+ prompt_text=scene_prompt,
177
+ duration=SCENE_DURATION_SECONDS,
178
+ ratio=VIDEO_RATIO,
 
 
179
  )
180
+ task = poll_runway_task(task)
181
  video_url = task.output[0]
182
  except TaskFailedError as e:
183
+ raise gr.Error(f"Runway failed scene {idx}: {getattr(e, 'task_details', 'No details')}")
184
+
185
+ # Download clip
186
+ clip_path = workdir / f"scene_{idx}.mp4"
187
+ r = runway_client._session.get(video_url, stream=True)
188
+ with open(clip_path, 'wb') as f:
189
+ for chunk in r.iter_content(chunk_size=8192):
190
+ if chunk: f.write(chunk)
191
+ clip_paths.append(clip_path)
192
+ intermediates.append(clip_path)
193
+ _log(f"Downloaded scene {idx} -> {clip_path}")
194
+
195
+ # STEP 6: Concatenate video
196
+ progress(0.90, desc="βœ‚οΈ Concatenating scenes ...")
197
+ list_file = workdir / "clips.txt"
198
+ with open(list_file, 'w') as lf:
199
+ for p in clip_paths:
200
+ lf.write(f"file '{p}'\n")
201
+ intermediates.append(list_file)
202
+
203
+ concat_path = workdir / f"concat_{job_id}.mp4"
 
 
204
  subprocess.run([
205
+ 'ffmpeg', '-f', 'concat', '-safe', '0', '-i', str(list_file), '-c', 'copy', str(concat_path), '-y'
 
206
  ], check=True)
207
+ intermediates.append(concat_path)
208
 
209
+ # STEP 7: Mux audio
210
+ final_path = workdir / f"final_{job_id}.mp4"
211
+ progress(0.95, desc="πŸ”Š Merging audio ...")
212
  subprocess.run([
213
+ 'ffmpeg', '-i', str(concat_path), '-i', str(audio_path), '-c:v', 'copy', '-c:a', 'aac', '-shortest', str(final_path), '-y'
 
 
214
  ], check=True)
 
215
 
216
+ progress(1.0, desc="βœ… Done")
217
+ _log(f"FINAL VIDEO: {final_path}")
218
+ return str(final_path)
219
 
220
  except Exception as e:
221
+ _log(f"JOB {job_id} FAILED: {e}")
222
  raise gr.Error(f"An error occurred: {e}")
 
223
  finally:
224
+ # Keep workdir for debugging; comment out next block to remove entire directory
225
+ pass
 
 
 
226
 
227
+ # --- 5. GRADIO UI ---
228
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
229
+ gr.Markdown("# πŸ€– My Personal AI Video Studio (Gen-4 Turbo)")
230
+ gr.Markdown("Enter a topic and (optionally) upload a keyframe image. Without an image, a simple placeholder is generated.")
231
 
232
  with gr.Row():
233
+ topic_input = gr.Textbox(label="Video Topic", placeholder="e.g., 'The history of coffee'", scale=3)
234
+ image_input = gr.Image(label="Keyframe Image (optional)", type="filepath")
235
+ with gr.Row():
236
+ generate_button = gr.Button("Generate Video", variant="primary")
 
 
 
237
  with gr.Row():
238
  video_output = gr.Video(label="Generated Video")
239
 
240
  generate_button.click(
241
  fn=generate_video_from_topic,
242
+ inputs=[topic_input, image_input],
243
  outputs=video_output
244
  )
245
 
246
+ gr.Markdown("---\n### Tips\n- Supply a consistent character/style image for more coherent scenes.\n- For pure *text-only* generation, switch to a Gen-3 Alpha text-to-video flow (not implemented here).\n- Replace placeholder keyframe logic with a real T2I model for higher quality.")
247
 
248
  if __name__ == "__main__":
249
+ demo.launch()