Spaces:

mgbam
/

my-video-app

Sleeping

App Files Files Community

mgbam commited on 25 days ago

Commit

08839d3

verified ·

1 Parent(s): 76ef43b

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -104

app.py CHANGED Viewed

@@ -4,9 +4,27 @@ import json
 import time
 import random
 import subprocess
 import google.generativeai as genai
 from tavily import TavilyClient
 from runwayml import RunwayML, TaskFailedError
 # --- 1. CONFIGURE API KEYS ---
 try:
@@ -17,16 +35,85 @@ try:
 except KeyError as e:
     raise ValueError(f"API Key Error: Please set the {e} secret in your environment.")
-# --- 2. CORE VIDEO GENERATION FUNCTION ---
-def generate_video_from_topic(topic_prompt, progress=gr.Progress(track_tqdm=True)):
     job_id = f"{int(time.time())}_{random.randint(1000, 9999)}"
-    print(f"--- Starting New Job: {job_id} for topic: '{topic_prompt}' ---")
-    intermediate_files = []
     try:
-        # STEP 1: RESEARCH (Tavily)
-        progress(0.1, desc="🔍 Researching topic with Tavily...")
         facts = "No research data available."
         try:
             research_results = tavily_client.search(
@@ -36,133 +123,127 @@ def generate_video_from_topic(topic_prompt, progress=gr.Progress(track_tqdm=True
             if research_results and 'results' in research_results:
                 facts = "\n".join([res['content'] for res in research_results['results']])
         except Exception as e:
-            print(f"Tavily API failed: {e}. Proceeding without research.")
-        # STEP 2: SCRIPT & SCENE PROMPTS (Gemini)
-        progress(0.2, desc="✍️ Writing script with Gemini...")
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
-        prompt = f"""
-        You are a creative director for viral short-form videos. Based on the topic '{topic_prompt}' and research, create a script.
-        Your output MUST be a valid JSON object with "narration_script" (string) and "scene_prompts" (a list of 4 detailed, cinematic prompts).
         """
-        response = gemini_model.generate_content(prompt)
         try:
-            cleaned_text = (
-                response.text
-                .strip()
-                .replace("```json", "")
-                .replace("```", "")
-            )
-            script_data = json.loads(cleaned_text)
-            narration = script_data['narration_script']
-            scene_prompts = script_data['scene_prompts']
-        except (json.JSONDecodeError, KeyError) as e:
-            raise gr.Error(
-                f"Gemini did not return valid JSON. Error: {e}. Response was: {response.text}"
-            )
-        # STEP 3: MOCK VOICE OVER
-        progress(0.3, desc="🎙️ MOCKING voiceover to save credits...")
-        audio_path = f"audio_{job_id}.mp3"
-        intermediate_files.append(audio_path)
-        narration_duration = len(narration.split()) / 2.5
-        subprocess.run([
-            'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=mono',
-            '-t', str(narration_duration), '-q:a', '9', '-acodec', 'libmp3lame',
-            audio_path, '-y'
-        ], check=True)
-        print(f"MOCK audio file saved: {audio_path}")
-        # STEP 4: GENERATE VIDEO SCENES (Runway SDK)
-        video_clip_paths = []
-        for i, scene_prompt in enumerate(scene_prompts, start=1):
-            progress(0.4 + (i * 0.12), desc=f"🎬 Generating scene {i}/{len(scene_prompts)}...")
             try:
-                task = (
-                    runway_client.image_to_video.create(
-                        model="gen4_turbo",
-                        prompt_text=scene_prompt,
-                        duration=5,
-                        ratio="1280:720"
-                    )
-                    .wait_for_task_output()
                 )
                 video_url = task.output[0]
             except TaskFailedError as e:
-                raise gr.Error(f"Runway job failed: {e.task_details}")
-            clip_path = f"scene_{i}_{job_id}.mp4"
-            intermediate_files.append(clip_path)
-            video_clip_paths.append(clip_path)
-            # Download the scene clip
-            with open(clip_path, "wb") as f:
-                for chunk in runway_client._session.get(video_url, stream=True).iter_content(chunk_size=1024):
-                    if chunk:
-                        f.write(chunk)
-            print(f"Video clip saved: {clip_path}")
-        # STEP 5: STITCHING (FFmpeg)
-        progress(0.9, desc="✂️ Assembling final video with FFmpeg...")
-        file_list_path = f"file_list_{job_id}.txt"
-        intermediate_files.append(file_list_path)
-        with open(file_list_path, "w") as f:
-            for clip in video_clip_paths:
-                f.write(f"file '{clip}'\n")
-        combined_video_path = f"combined_video_{job_id}.mp4"
-        intermediate_files.append(combined_video_path)
         subprocess.run([
-            'ffmpeg', '-f', 'concat', '-safe', '0',
-            '-i', file_list_path, '-c', 'copy', combined_video_path, '-y'
         ], check=True)
-        final_video_path = f"final_video_{job_id}.mp4"
         subprocess.run([
-            'ffmpeg', '-i', combined_video_path,
-            '-i', audio_path,
-            '-c:v', 'copy', '-c:a', 'aac', '-shortest', final_video_path, '-y'
         ], check=True)
-        print(f"Final video created at: {final_video_path}")
-        progress(1.0, desc="✅ Done!")
-        return final_video_path
     except Exception as e:
-        print(f"--- JOB {job_id} FAILED ---\nError: {e}")
         raise gr.Error(f"An error occurred: {e}")
     finally:
-        print("Cleaning up intermediate files...")
-        for file_path in intermediate_files:
-            if os.path.exists(file_path):
-                os.remove(file_path)
-                print(f"Removed: {file_path}")
-# --- 3. LAUNCH GRADIO APP ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 My Personal AI Video Studio")
-    gr.Markdown("Enter a topic to generate a short-form video. This private tool is used for fulfilling freelance orders.")
     with gr.Row():
-        topic_input = gr.Textbox(
-            label="Video Topic",
-            placeholder="e.g., 'The history of coffee'",
-            scale=3
-        )
-        generate_button = gr.Button("Generate Video", variant="primary", scale=1)
     with gr.Row():
         video_output = gr.Video(label="Generated Video")
     generate_button.click(
         fn=generate_video_from_topic,
-        inputs=topic_input,
         outputs=video_output
     )
-    gr.Markdown("--- \n ### Examples of Good Topics:\n - A product: 'The new waterproof Chrono-Watch X1'\n - A concept: 'The science of sleep'")
 if __name__ == "__main__":
-    demo.launch()

 import time
 import random
 import subprocess
+from pathlib import Path
 import google.generativeai as genai
 from tavily import TavilyClient
 from runwayml import RunwayML, TaskFailedError
+from PIL import Image, ImageDraw, ImageFont
+# =============================================================
+# AI VIDEO STUDIO (Gen-4 Turbo Image→Video compliant rewrite)
+# =============================================================
+# Key changes:
+# 1. Added *required* prompt_image for Gen-4 / gen4_turbo image_to_video tasks (was missing -> error).
+# 2. Added UI input for an optional user keyframe image; if absent we auto-generate a placeholder.
+# 3. Included prompt_text together with prompt_image for better guidance.
+# 4. Added more robust polling / retry & explicit exception surfaces.
+# 5. Added structured logging + deterministic temp directory per job.
+# 6. Wrapped cleanup in finally; kept mock VO approach.
+# 7. Added basic safety guardrails.
+#
+# Gen-4 requires an input image plus text prompt (cannot be pure text alone) – if you want pure text-to-video, switch to Gen-3 Alpha text mode. See docs.
+# =============================================================
 # --- 1. CONFIGURE API KEYS ---
 try:
 except KeyError as e:
     raise ValueError(f"API Key Error: Please set the {e} secret in your environment.")
+# --- 2. CONSTANTS / SETTINGS ---
+GEN4_MODEL = "gen4_turbo"   # adjust to "gen4" if you prefer (slower / potentially higher fidelity)
+SCENE_COUNT = 4
+SCENE_DURATION_SECONDS = 5  # Gen-4 supports 5 or 10 seconds
+VIDEO_RATIO = "1280:720"    # 16:9
+WORDS_PER_SEC = 2.5          # Used for mock narration length
+MAX_POLL_SECONDS = 180       # Per scene
+POLL_INTERVAL = 5
+# --- 3. UTILITIES ---
+def _log(msg: str):
+    print(f"[AI-STUDIO] {msg}")
+def create_placeholder_image(text: str, path: Path, size=(1280, 720)) -> Path:
+    """Create a simple placeholder keyframe if user supplies none.
+    You can later replace this with a real text-to-image generation step."""
+    img = Image.new("RGB", size, (10, 10, 10))
+    draw = ImageDraw.Draw(img)
+    try:
+        font = ImageFont.truetype("DejaVuSans-Bold.ttf", 60)
+    except Exception:
+        font = ImageFont.load_default()
+    wrapped = []
+    line = ""
+    for word in text.split():
+        test = f"{line} {word}".strip()
+        if len(test) > 28:  # naive wrap
+            wrapped.append(line)
+            line = word
+        else:
+            line = test
+    if line:
+        wrapped.append(line)
+    y = size[1] // 2 - (len(wrapped) * 35) // 2
+    for w in wrapped:
+        w_width, w_height = draw.textsize(w, font=font)
+        draw.text(((size[0]-w_width)//2, y), w, fill=(240, 240, 240), font=font)
+        y += w_height + 10
+    img.save(path)
+    return path
+def generate_mock_voiceover(narration: str, out_path: Path):
+    duration = len(narration.split()) / WORDS_PER_SEC
+    subprocess.run([
+        'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=mono',
+        '-t', str(duration), '-q:a', '9', '-acodec', 'libmp3lame', str(out_path), '-y'
+    ], check=True)
+    return duration
+def poll_runway_task(task_obj, max_seconds=MAX_POLL_SECONDS, interval=POLL_INTERVAL):
+    start = time.time()
+    while True:
+        task_obj.refresh()
+        status = task_obj.status
+        if status == 'SUCCEEDED':
+            return task_obj
+        if status == 'FAILED':
+            raise TaskFailedError(task_details=task_obj)
+        if time.time() - start > max_seconds:
+            raise TimeoutError(f"Runway task timed out after {max_seconds}s (status={status})")
+        time.sleep(interval)
+# --- 4. CORE PIPELINE ---
+def generate_video_from_topic(topic_prompt, keyframe_image, progress=gr.Progress(track_tqdm=True)):
     job_id = f"{int(time.time())}_{random.randint(1000, 9999)}"
+    _log(f"Starting job {job_id} :: topic='{topic_prompt}'")
+    # Working directory for this job
+    workdir = Path(f"job_{job_id}")
+    workdir.mkdir(exist_ok=True)
+    intermediates = []
     try:
+        # STEP 1: Research
+        progress(0.05, desc="🔍 Researching topic ...")
         facts = "No research data available."
         try:
             research_results = tavily_client.search(
             if research_results and 'results' in research_results:
                 facts = "\n".join([res['content'] for res in research_results['results']])
         except Exception as e:
+            _log(f"Tavily failed: {e}")
+        # STEP 2: Script
+        progress(0.15, desc="✍️ Writing script ...")
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
+        script_prompt = f"""
+        You are a creative director for viral short-form videos.
+        Topic: {topic_prompt}
+        Research (may contain noise):\n{facts}\n\n
+        Produce JSON with keys:
+        narration_script: overall narration (concise, energetic, ~85-110 words per 5 scenes). Maintain coherence.
+        scene_prompts: list of {SCENE_COUNT} *visual* prompts. Each should be cinematic, 1-2 sentences, include style / camera / lighting cues and keep characters consistent.
+        Return ONLY JSON.
         """
+        response = gemini_model.generate_content(script_prompt)
         try:
+            cleaned = response.text.strip().replace("```json", "").replace("```", "")
+            data = json.loads(cleaned)
+            narration = data['narration_script']
+            scene_prompts = data['scene_prompts']
+            if len(scene_prompts) != SCENE_COUNT:
+                raise ValueError(f"Expected {SCENE_COUNT} scene prompts, got {len(scene_prompts)}")
+        except Exception as e:
+            raise gr.Error(f"Gemini JSON parse error: {e}. Raw: {response.text[:400]}")
+        # STEP 3: Mock VO
+        progress(0.25, desc="🎙️ Generating mock VO ...")
+        audio_path = workdir / f"narration_{job_id}.mp3"
+        generate_mock_voiceover(narration, audio_path)
+        intermediates.append(audio_path)
+        # STEP 4: Prepare keyframe image (required for Gen-4 image_to_video)
+        progress(0.30, desc="🖼️ Preparing keyframe image ...")
+        if keyframe_image is not None:
+            keyframe_path = Path(keyframe_image)
+        else:
+            keyframe_path = workdir / "auto_keyframe.png"
+            create_placeholder_image(topic_prompt, keyframe_path)
+        intermediates.append(keyframe_path)
+        # STEP 5: Generate scenes
+        clip_paths = []
+        for idx, scene_prompt in enumerate(scene_prompts, start=1):
+            base_progress = 0.30 + (idx * 0.12)
+            progress(min(base_progress, 0.85), desc=f"🎬 Scene {idx}/{len(scene_prompts)} ...")
+            _log(f"Submitting scene {idx}: {scene_prompt[:90]}...")
             try:
+                task = runway_client.image_to_video.create(
+                    model=GEN4_MODEL,
+                    prompt_image=str(keyframe_path),  # required param
+                    prompt_text=scene_prompt,
+                    duration=SCENE_DURATION_SECONDS,
+                    ratio=VIDEO_RATIO,
                 )
+                task = poll_runway_task(task)
                 video_url = task.output[0]
             except TaskFailedError as e:
+                raise gr.Error(f"Runway failed scene {idx}: {getattr(e, 'task_details', 'No details')}")
+            # Download clip
+            clip_path = workdir / f"scene_{idx}.mp4"
+            r = runway_client._session.get(video_url, stream=True)
+            with open(clip_path, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    if chunk: f.write(chunk)
+            clip_paths.append(clip_path)
+            intermediates.append(clip_path)
+            _log(f"Downloaded scene {idx} -> {clip_path}")
+        # STEP 6: Concatenate video
+        progress(0.90, desc="✂️ Concatenating scenes ...")
+        list_file = workdir / "clips.txt"
+        with open(list_file, 'w') as lf:
+            for p in clip_paths:
+                lf.write(f"file '{p}'\n")
+        intermediates.append(list_file)
+        concat_path = workdir / f"concat_{job_id}.mp4"
         subprocess.run([
+            'ffmpeg', '-f', 'concat', '-safe', '0', '-i', str(list_file), '-c', 'copy', str(concat_path), '-y'
         ], check=True)
+        intermediates.append(concat_path)
+        # STEP 7: Mux audio
+        final_path = workdir / f"final_{job_id}.mp4"
+        progress(0.95, desc="🔊 Merging audio ...")
         subprocess.run([
+            'ffmpeg', '-i', str(concat_path), '-i', str(audio_path), '-c:v', 'copy', '-c:a', 'aac', '-shortest', str(final_path), '-y'
         ], check=True)
+        progress(1.0, desc="✅ Done")
+        _log(f"FINAL VIDEO: {final_path}")
+        return str(final_path)
     except Exception as e:
+        _log(f"JOB {job_id} FAILED: {e}")
         raise gr.Error(f"An error occurred: {e}")
     finally:
+        # Keep workdir for debugging; comment out next block to remove entire directory
+        pass
+# --- 5. GRADIO UI ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🤖 My Personal AI Video Studio (Gen-4 Turbo)")
+    gr.Markdown("Enter a topic and (optionally) upload a keyframe image. Without an image, a simple placeholder is generated.")
     with gr.Row():
+        topic_input = gr.Textbox(label="Video Topic", placeholder="e.g., 'The history of coffee'", scale=3)
+        image_input = gr.Image(label="Keyframe Image (optional)", type="filepath")
+    with gr.Row():
+        generate_button = gr.Button("Generate Video", variant="primary")
     with gr.Row():
         video_output = gr.Video(label="Generated Video")
     generate_button.click(
         fn=generate_video_from_topic,
+        inputs=[topic_input, image_input],
         outputs=video_output
     )
+    gr.Markdown("---\n### Tips\n- Supply a consistent character/style image for more coherent scenes.\n- For pure *text-only* generation, switch to a Gen-3 Alpha text-to-video flow (not implemented here).\n- Replace placeholder keyframe logic with a real T2I model for higher quality.")
 if __name__ == "__main__":
+    demo.launch()