Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,9 +4,27 @@ import json
|
|
4 |
import time
|
5 |
import random
|
6 |
import subprocess
|
|
|
|
|
7 |
import google.generativeai as genai
|
8 |
from tavily import TavilyClient
|
9 |
from runwayml import RunwayML, TaskFailedError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# --- 1. CONFIGURE API KEYS ---
|
12 |
try:
|
@@ -17,16 +35,85 @@ try:
|
|
17 |
except KeyError as e:
|
18 |
raise ValueError(f"API Key Error: Please set the {e} secret in your environment.")
|
19 |
|
20 |
-
# --- 2.
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
job_id = f"{int(time.time())}_{random.randint(1000, 9999)}"
|
23 |
-
|
24 |
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
try:
|
28 |
-
# STEP 1:
|
29 |
-
progress(0.
|
30 |
facts = "No research data available."
|
31 |
try:
|
32 |
research_results = tavily_client.search(
|
@@ -36,133 +123,127 @@ def generate_video_from_topic(topic_prompt, progress=gr.Progress(track_tqdm=True
|
|
36 |
if research_results and 'results' in research_results:
|
37 |
facts = "\n".join([res['content'] for res in research_results['results']])
|
38 |
except Exception as e:
|
39 |
-
|
40 |
|
41 |
-
# STEP 2:
|
42 |
-
progress(0.
|
43 |
gemini_model = genai.GenerativeModel('gemini-1.5-flash')
|
44 |
-
|
45 |
-
You are a creative director for viral short-form videos.
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
47 |
"""
|
48 |
-
response = gemini_model.generate_content(
|
49 |
-
|
50 |
try:
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
try:
|
82 |
-
task = (
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
)
|
89 |
-
.wait_for_task_output()
|
90 |
)
|
|
|
91 |
video_url = task.output[0]
|
92 |
except TaskFailedError as e:
|
93 |
-
raise gr.Error(f"Runway
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
combined_video_path = f"combined_video_{job_id}.mp4"
|
115 |
-
intermediate_files.append(combined_video_path)
|
116 |
subprocess.run([
|
117 |
-
'ffmpeg', '-f', 'concat', '-safe', '0',
|
118 |
-
'-i', file_list_path, '-c', 'copy', combined_video_path, '-y'
|
119 |
], check=True)
|
|
|
120 |
|
121 |
-
|
|
|
|
|
122 |
subprocess.run([
|
123 |
-
'ffmpeg', '-i',
|
124 |
-
'-i', audio_path,
|
125 |
-
'-c:v', 'copy', '-c:a', 'aac', '-shortest', final_video_path, '-y'
|
126 |
], check=True)
|
127 |
-
print(f"Final video created at: {final_video_path}")
|
128 |
|
129 |
-
progress(1.0, desc="β
Done
|
130 |
-
|
|
|
131 |
|
132 |
except Exception as e:
|
133 |
-
|
134 |
raise gr.Error(f"An error occurred: {e}")
|
135 |
-
|
136 |
finally:
|
137 |
-
|
138 |
-
|
139 |
-
if os.path.exists(file_path):
|
140 |
-
os.remove(file_path)
|
141 |
-
print(f"Removed: {file_path}")
|
142 |
|
143 |
-
# ---
|
144 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
145 |
-
gr.Markdown("# π€ My Personal AI Video Studio")
|
146 |
-
gr.Markdown("Enter a topic
|
147 |
|
148 |
with gr.Row():
|
149 |
-
topic_input = gr.Textbox(
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
)
|
154 |
-
generate_button = gr.Button("Generate Video", variant="primary", scale=1)
|
155 |
-
|
156 |
with gr.Row():
|
157 |
video_output = gr.Video(label="Generated Video")
|
158 |
|
159 |
generate_button.click(
|
160 |
fn=generate_video_from_topic,
|
161 |
-
inputs=topic_input,
|
162 |
outputs=video_output
|
163 |
)
|
164 |
|
165 |
-
gr.Markdown("
|
166 |
|
167 |
if __name__ == "__main__":
|
168 |
-
demo.launch()
|
|
|
4 |
import time
|
5 |
import random
|
6 |
import subprocess
|
7 |
+
from pathlib import Path
|
8 |
+
|
9 |
import google.generativeai as genai
|
10 |
from tavily import TavilyClient
|
11 |
from runwayml import RunwayML, TaskFailedError
|
12 |
+
from PIL import Image, ImageDraw, ImageFont
|
13 |
+
|
14 |
+
# =============================================================
|
15 |
+
# AI VIDEO STUDIO (Gen-4 Turbo ImageβVideo compliant rewrite)
|
16 |
+
# =============================================================
|
17 |
+
# Key changes:
|
18 |
+
# 1. Added *required* prompt_image for Gen-4 / gen4_turbo image_to_video tasks (was missing -> error).
|
19 |
+
# 2. Added UI input for an optional user keyframe image; if absent we auto-generate a placeholder.
|
20 |
+
# 3. Included prompt_text together with prompt_image for better guidance.
|
21 |
+
# 4. Added more robust polling / retry & explicit exception surfaces.
|
22 |
+
# 5. Added structured logging + deterministic temp directory per job.
|
23 |
+
# 6. Wrapped cleanup in finally; kept mock VO approach.
|
24 |
+
# 7. Added basic safety guardrails.
|
25 |
+
#
|
26 |
+
# Gen-4 requires an input image plus text prompt (cannot be pure text alone) β if you want pure text-to-video, switch to Gen-3 Alpha text mode. See docs.
|
27 |
+
# =============================================================
|
28 |
|
29 |
# --- 1. CONFIGURE API KEYS ---
|
30 |
try:
|
|
|
35 |
except KeyError as e:
|
36 |
raise ValueError(f"API Key Error: Please set the {e} secret in your environment.")
|
37 |
|
38 |
+
# --- 2. CONSTANTS / SETTINGS ---
|
39 |
+
GEN4_MODEL = "gen4_turbo" # adjust to "gen4" if you prefer (slower / potentially higher fidelity)
|
40 |
+
SCENE_COUNT = 4
|
41 |
+
SCENE_DURATION_SECONDS = 5 # Gen-4 supports 5 or 10 seconds
|
42 |
+
VIDEO_RATIO = "1280:720" # 16:9
|
43 |
+
WORDS_PER_SEC = 2.5 # Used for mock narration length
|
44 |
+
MAX_POLL_SECONDS = 180 # Per scene
|
45 |
+
POLL_INTERVAL = 5
|
46 |
+
|
47 |
+
# --- 3. UTILITIES ---
|
48 |
+
def _log(msg: str):
|
49 |
+
print(f"[AI-STUDIO] {msg}")
|
50 |
+
|
51 |
+
|
52 |
+
def create_placeholder_image(text: str, path: Path, size=(1280, 720)) -> Path:
|
53 |
+
"""Create a simple placeholder keyframe if user supplies none.
|
54 |
+
You can later replace this with a real text-to-image generation step."""
|
55 |
+
img = Image.new("RGB", size, (10, 10, 10))
|
56 |
+
draw = ImageDraw.Draw(img)
|
57 |
+
try:
|
58 |
+
font = ImageFont.truetype("DejaVuSans-Bold.ttf", 60)
|
59 |
+
except Exception:
|
60 |
+
font = ImageFont.load_default()
|
61 |
+
wrapped = []
|
62 |
+
line = ""
|
63 |
+
for word in text.split():
|
64 |
+
test = f"{line} {word}".strip()
|
65 |
+
if len(test) > 28: # naive wrap
|
66 |
+
wrapped.append(line)
|
67 |
+
line = word
|
68 |
+
else:
|
69 |
+
line = test
|
70 |
+
if line:
|
71 |
+
wrapped.append(line)
|
72 |
+
y = size[1] // 2 - (len(wrapped) * 35) // 2
|
73 |
+
for w in wrapped:
|
74 |
+
w_width, w_height = draw.textsize(w, font=font)
|
75 |
+
draw.text(((size[0]-w_width)//2, y), w, fill=(240, 240, 240), font=font)
|
76 |
+
y += w_height + 10
|
77 |
+
img.save(path)
|
78 |
+
return path
|
79 |
+
|
80 |
+
|
81 |
+
def generate_mock_voiceover(narration: str, out_path: Path):
|
82 |
+
duration = len(narration.split()) / WORDS_PER_SEC
|
83 |
+
subprocess.run([
|
84 |
+
'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=mono',
|
85 |
+
'-t', str(duration), '-q:a', '9', '-acodec', 'libmp3lame', str(out_path), '-y'
|
86 |
+
], check=True)
|
87 |
+
return duration
|
88 |
+
|
89 |
+
|
90 |
+
def poll_runway_task(task_obj, max_seconds=MAX_POLL_SECONDS, interval=POLL_INTERVAL):
|
91 |
+
start = time.time()
|
92 |
+
while True:
|
93 |
+
task_obj.refresh()
|
94 |
+
status = task_obj.status
|
95 |
+
if status == 'SUCCEEDED':
|
96 |
+
return task_obj
|
97 |
+
if status == 'FAILED':
|
98 |
+
raise TaskFailedError(task_details=task_obj)
|
99 |
+
if time.time() - start > max_seconds:
|
100 |
+
raise TimeoutError(f"Runway task timed out after {max_seconds}s (status={status})")
|
101 |
+
time.sleep(interval)
|
102 |
+
|
103 |
+
# --- 4. CORE PIPELINE ---
|
104 |
+
def generate_video_from_topic(topic_prompt, keyframe_image, progress=gr.Progress(track_tqdm=True)):
|
105 |
job_id = f"{int(time.time())}_{random.randint(1000, 9999)}"
|
106 |
+
_log(f"Starting job {job_id} :: topic='{topic_prompt}'")
|
107 |
|
108 |
+
# Working directory for this job
|
109 |
+
workdir = Path(f"job_{job_id}")
|
110 |
+
workdir.mkdir(exist_ok=True)
|
111 |
+
|
112 |
+
intermediates = []
|
113 |
|
114 |
try:
|
115 |
+
# STEP 1: Research
|
116 |
+
progress(0.05, desc="π Researching topic ...")
|
117 |
facts = "No research data available."
|
118 |
try:
|
119 |
research_results = tavily_client.search(
|
|
|
123 |
if research_results and 'results' in research_results:
|
124 |
facts = "\n".join([res['content'] for res in research_results['results']])
|
125 |
except Exception as e:
|
126 |
+
_log(f"Tavily failed: {e}")
|
127 |
|
128 |
+
# STEP 2: Script
|
129 |
+
progress(0.15, desc="βοΈ Writing script ...")
|
130 |
gemini_model = genai.GenerativeModel('gemini-1.5-flash')
|
131 |
+
script_prompt = f"""
|
132 |
+
You are a creative director for viral short-form videos.
|
133 |
+
Topic: {topic_prompt}
|
134 |
+
Research (may contain noise):\n{facts}\n\n
|
135 |
+
Produce JSON with keys:
|
136 |
+
narration_script: overall narration (concise, energetic, ~85-110 words per 5 scenes). Maintain coherence.
|
137 |
+
scene_prompts: list of {SCENE_COUNT} *visual* prompts. Each should be cinematic, 1-2 sentences, include style / camera / lighting cues and keep characters consistent.
|
138 |
+
Return ONLY JSON.
|
139 |
"""
|
140 |
+
response = gemini_model.generate_content(script_prompt)
|
|
|
141 |
try:
|
142 |
+
cleaned = response.text.strip().replace("```json", "").replace("```", "")
|
143 |
+
data = json.loads(cleaned)
|
144 |
+
narration = data['narration_script']
|
145 |
+
scene_prompts = data['scene_prompts']
|
146 |
+
if len(scene_prompts) != SCENE_COUNT:
|
147 |
+
raise ValueError(f"Expected {SCENE_COUNT} scene prompts, got {len(scene_prompts)}")
|
148 |
+
except Exception as e:
|
149 |
+
raise gr.Error(f"Gemini JSON parse error: {e}. Raw: {response.text[:400]}")
|
150 |
+
|
151 |
+
# STEP 3: Mock VO
|
152 |
+
progress(0.25, desc="ποΈ Generating mock VO ...")
|
153 |
+
audio_path = workdir / f"narration_{job_id}.mp3"
|
154 |
+
generate_mock_voiceover(narration, audio_path)
|
155 |
+
intermediates.append(audio_path)
|
156 |
+
|
157 |
+
# STEP 4: Prepare keyframe image (required for Gen-4 image_to_video)
|
158 |
+
progress(0.30, desc="πΌοΈ Preparing keyframe image ...")
|
159 |
+
if keyframe_image is not None:
|
160 |
+
keyframe_path = Path(keyframe_image)
|
161 |
+
else:
|
162 |
+
keyframe_path = workdir / "auto_keyframe.png"
|
163 |
+
create_placeholder_image(topic_prompt, keyframe_path)
|
164 |
+
intermediates.append(keyframe_path)
|
165 |
+
|
166 |
+
# STEP 5: Generate scenes
|
167 |
+
clip_paths = []
|
168 |
+
for idx, scene_prompt in enumerate(scene_prompts, start=1):
|
169 |
+
base_progress = 0.30 + (idx * 0.12)
|
170 |
+
progress(min(base_progress, 0.85), desc=f"π¬ Scene {idx}/{len(scene_prompts)} ...")
|
171 |
+
_log(f"Submitting scene {idx}: {scene_prompt[:90]}...")
|
172 |
try:
|
173 |
+
task = runway_client.image_to_video.create(
|
174 |
+
model=GEN4_MODEL,
|
175 |
+
prompt_image=str(keyframe_path), # required param
|
176 |
+
prompt_text=scene_prompt,
|
177 |
+
duration=SCENE_DURATION_SECONDS,
|
178 |
+
ratio=VIDEO_RATIO,
|
|
|
|
|
179 |
)
|
180 |
+
task = poll_runway_task(task)
|
181 |
video_url = task.output[0]
|
182 |
except TaskFailedError as e:
|
183 |
+
raise gr.Error(f"Runway failed scene {idx}: {getattr(e, 'task_details', 'No details')}")
|
184 |
+
|
185 |
+
# Download clip
|
186 |
+
clip_path = workdir / f"scene_{idx}.mp4"
|
187 |
+
r = runway_client._session.get(video_url, stream=True)
|
188 |
+
with open(clip_path, 'wb') as f:
|
189 |
+
for chunk in r.iter_content(chunk_size=8192):
|
190 |
+
if chunk: f.write(chunk)
|
191 |
+
clip_paths.append(clip_path)
|
192 |
+
intermediates.append(clip_path)
|
193 |
+
_log(f"Downloaded scene {idx} -> {clip_path}")
|
194 |
+
|
195 |
+
# STEP 6: Concatenate video
|
196 |
+
progress(0.90, desc="βοΈ Concatenating scenes ...")
|
197 |
+
list_file = workdir / "clips.txt"
|
198 |
+
with open(list_file, 'w') as lf:
|
199 |
+
for p in clip_paths:
|
200 |
+
lf.write(f"file '{p}'\n")
|
201 |
+
intermediates.append(list_file)
|
202 |
+
|
203 |
+
concat_path = workdir / f"concat_{job_id}.mp4"
|
|
|
|
|
204 |
subprocess.run([
|
205 |
+
'ffmpeg', '-f', 'concat', '-safe', '0', '-i', str(list_file), '-c', 'copy', str(concat_path), '-y'
|
|
|
206 |
], check=True)
|
207 |
+
intermediates.append(concat_path)
|
208 |
|
209 |
+
# STEP 7: Mux audio
|
210 |
+
final_path = workdir / f"final_{job_id}.mp4"
|
211 |
+
progress(0.95, desc="π Merging audio ...")
|
212 |
subprocess.run([
|
213 |
+
'ffmpeg', '-i', str(concat_path), '-i', str(audio_path), '-c:v', 'copy', '-c:a', 'aac', '-shortest', str(final_path), '-y'
|
|
|
|
|
214 |
], check=True)
|
|
|
215 |
|
216 |
+
progress(1.0, desc="β
Done")
|
217 |
+
_log(f"FINAL VIDEO: {final_path}")
|
218 |
+
return str(final_path)
|
219 |
|
220 |
except Exception as e:
|
221 |
+
_log(f"JOB {job_id} FAILED: {e}")
|
222 |
raise gr.Error(f"An error occurred: {e}")
|
|
|
223 |
finally:
|
224 |
+
# Keep workdir for debugging; comment out next block to remove entire directory
|
225 |
+
pass
|
|
|
|
|
|
|
226 |
|
227 |
+
# --- 5. GRADIO UI ---
|
228 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
229 |
+
gr.Markdown("# π€ My Personal AI Video Studio (Gen-4 Turbo)")
|
230 |
+
gr.Markdown("Enter a topic and (optionally) upload a keyframe image. Without an image, a simple placeholder is generated.")
|
231 |
|
232 |
with gr.Row():
|
233 |
+
topic_input = gr.Textbox(label="Video Topic", placeholder="e.g., 'The history of coffee'", scale=3)
|
234 |
+
image_input = gr.Image(label="Keyframe Image (optional)", type="filepath")
|
235 |
+
with gr.Row():
|
236 |
+
generate_button = gr.Button("Generate Video", variant="primary")
|
|
|
|
|
|
|
237 |
with gr.Row():
|
238 |
video_output = gr.Video(label="Generated Video")
|
239 |
|
240 |
generate_button.click(
|
241 |
fn=generate_video_from_topic,
|
242 |
+
inputs=[topic_input, image_input],
|
243 |
outputs=video_output
|
244 |
)
|
245 |
|
246 |
+
gr.Markdown("---\n### Tips\n- Supply a consistent character/style image for more coherent scenes.\n- For pure *text-only* generation, switch to a Gen-3 Alpha text-to-video flow (not implemented here).\n- Replace placeholder keyframe logic with a real T2I model for higher quality.")
|
247 |
|
248 |
if __name__ == "__main__":
|
249 |
+
demo.launch()
|