Spaces:

mgbam
/

ChronoWeave

Sleeping

App Files Files Community

mgbam commited on Apr 14

Commit

62f88b4

verified ·

1 Parent(s): 4ed1d3d

Create app.py

Browse files

Files changed (1) hide show

app.py +493 -0

app.py ADDED Viewed

	@@ -0,0 +1,493 @@

+# Copyright 2025 Google LLC. Based on work by Yousif Ahmed.
+# Concept: ChronoWeave - Branching Narrative Generation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
+import streamlit as st
+import google.generativeai as genai
+import os
+import json
+import numpy as np
+from io import BytesIO
+import time
+import wave
+import contextlib
+import asyncio
+import uuid # For unique filenames
+import shutil # For cleaning up temp dirs
+# Image handling
+from PIL import Image
+# Video and audio processing
+from moviepy.editor import ImageClip, AudioFileClip, CompositeVideoClip, concatenate_videoclips
+# Type hints
+import typing_extensions as typing
+# Async support for Streamlit/Google API
+import nest_asyncio
+nest_asyncio.apply() # Apply patch for asyncio in environments like Streamlit/Jupyter
+# --- Configuration ---
+st.set_page_config(page_title="ChronoWeave", layout="wide")
+st.title("🌀 ChronoWeave: Branching Narrative Generator")
+st.markdown("""
+Generate multiple, branching story timelines from a single theme using AI.
+Based on the work of Yousif Ahmed. Copyright 2025 Google LLC.
+""")
+# --- Constants ---
+MODEL = "models/gemini-1.5-flash" # Or other suitable text model supporting JSON
+# Using v1alpha for the Live API for audio output.
+AUDIO_MODEL_VERSION = 'v1alpha' # Must be alpha for audio modality
+IMAGE_MODEL_ID = "imagen-3" # Or your preferred Imagen model "imagen-3.0-generate-002"
+# --- API Key Handling ---
+try:
+    # Preferred way to handle secrets in Streamlit sharing/HF Spaces
+    GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
+    os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
+except KeyError:
+    st.error("🚨 Google API Key not found! Please add it as a Secret named 'GOOGLE_API_KEY' in your Hugging Face Space settings.", icon="🚨")
+    st.stop() # Halt execution if no key
+# --- Initialize Google Client ---
+try:
+    # Initialize the client with the API key
+    genai.configure(api_key=GOOGLE_API_KEY)
+    # Create separate clients or configure one for different API versions if needed
+    # Client for Text/Imagen (standard API)
+    client_standard = genai.GenerativeModel(MODEL)
+    # Client for Live Audio (v1alpha) - requires different client init
+    client_live = genai.Client(
+         client_options={'api_endpoint': f'{AUDIO_MODEL_VERSION}.generativelanguage.googleapis.com'}
+    )
+    # Note: As of recent updates, genai.configure might handle this better,
+    # but separating clients or explicitly setting endpoints can be more robust.
+    # Adjust based on the library version and observed behavior.
+except Exception as e:
+    st.error(f"🚨 Failed to initialize Google AI Client: {e}", icon="🚨")
+    st.stop()
+# --- Define Structured Output Schemas ---
+class StorySegment(typing.TypedDict):
+    scene_id: int
+    image_prompt: str
+    audio_text: str
+    character_description: str
+    timeline_visual_modifier: typing.Optional[str]
+class Timeline(typing.TypedDict):
+    timeline_id: int
+    divergence_reason: str
+    segments: list[StorySegment]
+class ChronoWeaveResponse(typing.TypedDict):
+    core_theme: str
+    timelines: list[Timeline]
+    total_scenes_per_timeline: int
+# --- Helper Functions ---
+@contextlib.contextmanager
+def wave_file(filename, channels=1, rate=24000, sample_width=2):
+    """Context manager to write WAV files."""
+    with wave.open(filename, "wb") as wf:
+        wf.setnchannels(channels)
+        wf.setsampwidth(sample_width)
+        wf.setframerate(rate)
+        yield wf
+async def generate_audio_live_async(api_text, output_filename):
+    """Generates audio using Gemini Live API (async version)."""
+    collected_audio = bytearray()
+    st.write(f"🎙️ Generating audio for: '{api_text[:50]}...'") # Log start
+    try:
+        # Use the 'client_live' specifically configured for v1alpha
+        live_model = client_live.get_model(f"models/gemini-1.5-flash") # Specify model within the live client context
+        config = {
+            "response_modalities": ["AUDIO"]
+        }
+        # Connect to the Live API using the live client.
+        async with live_model.connect(config=config) as session:
+            await session.send_request([api_text]) # Simpler send for single prompt
+            async for response in session.stream_content():
+                 if response.audio_chunk:
+                    collected_audio.extend(response.audio_chunk.data)
+        if not collected_audio:
+             st.warning(f"⚠️ No audio data received for: '{api_text[:50]}...'")
+             return None # Indicate failure
+        audio_bytes = bytes(collected_audio)
+        # Write the collected audio bytes into a WAV file.
+        with wave_file(output_filename) as wf:
+            wf.writeframes(audio_bytes)
+        st.write(f"   ✅ Audio saved: {os.path.basename(output_filename)}")
+        return output_filename
+    except Exception as e:
+        st.error(f"   ❌ Audio generation failed for '{api_text[:50]}...': {e}", icon="🚨")
+        return None
+def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = "") -> ChronoWeaveResponse | None:
+    """Generates branching story sequences using Gemini structured output."""
+    st.write(f"📚 Generating {num_timelines} timeline(s) for theme: '{theme}'...")
+    divergence_instruction = f"Introduce divergence between timelines. {divergence_prompt}" if divergence_prompt else "Introduce natural points of divergence between timelines after the first scene or two."
+    prompt = f'''
+    As an expert narrative designer, create a branching story based on the theme: "{theme}".
+    Generate exactly {num_timelines} distinct timelines, each containing exactly {num_scenes} scenes.
+    Each scene should be approximately 5-10 seconds long when narrated.
+    {divergence_instruction} Clearly state the reason for divergence for each timeline after the first.
+    For each scene in each timeline, provide:
+    - scene_id: An integer starting from 0 for the scene number within its timeline.
+    - image_prompt: A concise (15-25 words) description for an image generation model. Focus on visual details, characters (animals/objects only, NO PEOPLE), background, and action. Maintain a consistent 'kids animation style' (e.g., simple, rounded shapes, bright colors) across all scenes and timelines unless specified by a timeline_visual_modifier.
+    - audio_text: A single, engaging sentence of narration or dialogue for the scene (max 25 words).
+    - character_description: Brief description of recurring characters (names, key features) mentioned in *this specific scene's image prompt*. Keep consistent within a timeline. (Max 30 words).
+    - timeline_visual_modifier: (Optional, string or null) A *brief* hint if this timeline should have a slightly different visual feel from this scene onwards (e.g., "slightly darker lighting", "more cluttered background", "character looks worried"). Keep it subtle. Use null if no specific modifier.
+    Constraint: Ensure the output strictly adheres to the following JSON schema. Do not include preamble or explanations outside the JSON structure. Respond ONLY with the JSON object.
+    JSON Schema:
+    {{
+      "type": "object",
+      "properties": {{
+        "core_theme": {{"type": "string"}},
+        "timelines": {{
+          "type": "array",
+          "items": {{
+            "type": "object",
+            "properties": {{
+              "timeline_id": {{"type": "integer"}},
+              "divergence_reason": {{"type": "string"}},
+              "segments": {{
+                "type": "array",
+                "items": {{
+                  "type": "object",
+                  "properties": {{
+                    "scene_id": {{"type": "integer"}},
+                    "image_prompt": {{"type": "string"}},
+                    "audio_text": {{"type": "string"}},
+                    "character_description": {{"type": "string"}},
+                    "timeline_visual_modifier": {{"type": ["string", "null"]}}
+                  }},
+                  "required": ["scene_id", "image_prompt", "audio_text", "character_description", "timeline_visual_modifier"]
+                }}
+              }}
+            }},
+            "required": ["timeline_id", "divergence_reason", "segments"]
+          }}
+        }},
+        "total_scenes_per_timeline": {{"type": "integer"}}
+      }},
+      "required": ["core_theme", "timelines", "total_scenes_per_timeline"]
+    }}
+    '''
+    try:
+        response = client_standard.generate_content(
+            contents=prompt,
+            generation_config=genai.types.GenerationConfig(
+                 response_mime_type="application/json",
+                 # Optional: Add temperature, etc. if needed
+            )
+            # The schema can also be passed via generation_config in some versions/models
+            # config={
+            #     'response_mime_type': 'application/json',
+            #     'response_schema': ChronoWeaveResponse # Pass the TypedDict directly
+            # }
+        )
+        # Debugging: Print raw response text
+        # st.text_area("Raw Gemini Response:", response.text, height=200)
+        story_data = json.loads(response.text) # Assuming response.text contains the JSON string
+        st.success("✅ Story structure generated successfully!")
+        # Basic validation (can be more thorough)
+        if 'timelines' in story_data and isinstance(story_data['timelines'], list):
+             # Further validation could check segment structure, etc.
+             return story_data # Return the parsed dictionary
+        else:
+             st.error("🚨 Generated story data is missing the 'timelines' list.", icon="🚨")
+             return None
+    except json.JSONDecodeError as e:
+        st.error(f"🚨 Failed to decode JSON response from Gemini: {e}", icon="🚨")
+        st.text_area("Problematic Response Text:", response.text if 'response' in locals() else "No response object.", height=150)
+        return None
+    except Exception as e:
+        st.error(f"🚨 Error generating story sequence: {e}", icon="🚨")
+        # Log the prompt potentially? Be careful with sensitive data if applicable.
+        # st.text_area("Failed Prompt:", prompt, height=200)
+        return None
+def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1") -> Image.Image | None:
+    """Generates an image using Imagen."""
+    st.write(f"🖼️ Generating image for: '{prompt[:60]}...'")
+    try:
+        # Use the standard client's dedicated image generation method
+        response = client_standard.generate_content(
+            f"Generate an image with the following prompt, ensuring a child-friendly animation style and NO human figures: {prompt}",
+             generation_config=genai.types.GenerationConfig(
+                 candidate_count=1, # Generate one image
+                 # Imagen specific parameters are often passed differently or rely on model defaults
+                 # Check documentation for precise Imagen control via the unified API
+             ),
+             # If the model/API version requires specific image parameters:
+             # tools=[genai.ImageParams(model=IMAGE_MODEL_ID, number_of_images=1, aspect_ratio=aspect_ratio, person_generation="DONT_ALLOW")]
+        )
+        # Accessing image data might vary slightly depending on API response structure
+        # This assumes response.parts contains the image data if successful
+        if response.parts and response.parts[0].inline_data:
+            image_bytes = response.parts[0].inline_data.data
+            image = Image.open(BytesIO(image_bytes))
+            st.write("   ✅ Image generated.")
+            return image
+        else:
+            # Check for safety blocks or other reasons for failure
+            if response.prompt_feedback.block_reason:
+                 st.warning(f"   ⚠️ Image generation blocked for prompt '{prompt[:60]}...'. Reason: {response.prompt_feedback.block_reason}", icon="⚠️")
+            else:
+                 st.warning(f"   ⚠️ No image data received for prompt '{prompt[:60]}...'.", icon="⚠️")
+            # Debugging: st.write(response)
+            return None
+    except Exception as e:
+        st.error(f"   ❌ Image generation failed for '{prompt[:60]}...': {e}", icon="🚨")
+        return None
+# --- Streamlit UI Elements ---
+st.sidebar.header("Configuration")
+# API Key display/check (already handled above, but sidebar is a good place)
+if GOOGLE_API_KEY:
+    st.sidebar.success("Google API Key Loaded!", icon="✅")
+else:
+    st.sidebar.error("Google API Key Missing!", icon="🚨")
+theme = st.sidebar.text_input("Story Theme:", "A curious squirrel finds a shiny object")
+num_scenes = st.sidebar.slider("Scenes per Timeline:", min_value=2, max_value=7, value=3)
+num_timelines = st.sidebar.slider("Number of Timelines:", min_value=1, max_value=4, value=2)
+divergence_prompt = st.sidebar.text_input("Divergence Hint (Optional):", placeholder="e.g., What if it started raining?")
+aspect_ratio = st.sidebar.selectbox("Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0)
+generate_button = st.sidebar.button("✨ Generate ChronoWeave ✨", type="primary", disabled=(not GOOGLE_API_KEY))
+st.sidebar.markdown("---")
+st.sidebar.info("Note: Generation can take several minutes depending on settings.")
+# --- Main Logic ---
+if generate_button:
+    if not theme:
+        st.error("Please enter a story theme.", icon="👈")
+    else:
+        # Create a unique temporary directory for this run
+        run_id = str(uuid.uuid4())
+        temp_dir = os.path.join(".", f"chrono_temp_{run_id}") # Create in current dir
+        os.makedirs(temp_dir, exist_ok=True)
+        st.write(f"Working directory: {temp_dir}")
+        final_video_paths = {} # To store {timeline_id: video_path}
+        with st.spinner("Generating narrative structure..."):
+            chrono_data = generate_story_sequence_chrono(theme, num_scenes, num_timelines, divergence_prompt)
+        if chrono_data and 'timelines' in chrono_data:
+            st.success(f"Found {len(chrono_data['timelines'])} timelines. Processing each...")
+            all_timelines_successful = True # Flag to track if all timelines worked
+            # Use st.status for detailed progress
+            with st.status("Generating assets and composing videos...", expanded=True) as status:
+                for timeline in chrono_data['timelines']:
+                    timeline_id = timeline['timeline_id']
+                    divergence = timeline['divergence_reason']
+                    segments = timeline['segments']
+                    st.subheader(f"Timeline {timeline_id}: {divergence}")
+                    temp_image_files = []
+                    temp_audio_files = []
+                    video_clips = []
+                    timeline_successful = True # Flag for this specific timeline
+                    for i, segment in enumerate(segments):
+                        status.update(label=f"Processing Timeline {timeline_id}, Scene {i+1}/{num_scenes}...")
+                        scene_id = segment['scene_id']
+                        image_prompt = segment['image_prompt']
+                        audio_text = segment['audio_text']
+                        char_desc = segment['character_description']
+                        vis_mod = segment['timeline_visual_modifier']
+                        st.write(f"--- Scene {i+1} (T{timeline_id}) ---")
+                        st.write(f"*   **Image Prompt:** {image_prompt}" + (f" (Modifier: {vis_mod})" if vis_mod else ""))
+                        st.write(f"*   **Audio Text:** {audio_text}")
+                        # st.write(f"*   Character Desc: {char_desc}") # Can be verbose
+                        # --- Image Generation ---
+                        combined_prompt = f"{image_prompt} {char_desc}"
+                        if vis_mod:
+                            combined_prompt += f" Style hint: {vis_mod}"
+                        generated_image = generate_image_imagen(combined_prompt, aspect_ratio)
+                        if generated_image:
+                            image_path = os.path.join(temp_dir, f"t{timeline_id}_s{i}_image.png")
+                            generated_image.save(image_path)
+                            temp_image_files.append(image_path)
+                            st.image(generated_image, width=200) # Show thumbnail
+                        else:
+                            st.warning(f"Skipping scene {i+1} in timeline {timeline_id} due to image generation failure.")
+                            timeline_successful = False
+                            continue # Skip to next segment if image fails
+                        # --- Audio Generation ---
+                        # Add negative prompt to prevent conversational filler
+                        audio_negative_prompt = "Narrate the following sentence directly, with expression, without any introduction or closing remarks like 'Okay' or 'Here is the narration'. Just read the sentence:"
+                        full_audio_prompt = f"{audio_negative_prompt}\n{audio_text}"
+                        audio_path = os.path.join(temp_dir, f"t{timeline_id}_s{i}_audio.wav")
+                        # Run the async audio generation function
+                        try:
+                           generated_audio_path = asyncio.run(generate_audio_live_async(full_audio_prompt, audio_path))
+                        except Exception as e:
+                            st.error(f"Asyncio error during audio gen: {e}")
+                            generated_audio_path = None
+                        if generated_audio_path:
+                            temp_audio_files.append(generated_audio_path)
+                            # st.audio(generated_audio_path) # Optional: Preview audio
+                        else:
+                            st.warning(f"Skipping video clip for scene {i+1} in timeline {timeline_id} due to audio generation failure.")
+                            # Clean up the image file for this failed scene segment
+                            if os.path.exists(image_path):
+                                os.remove(image_path)
+                                temp_image_files.remove(image_path)
+                            timeline_successful = False
+                            continue # Skip making video clip if audio fails
+                        # --- Create Video Clip ---
+                        try:
+                            st.write("   🎬 Creating video clip...")
+                            audio_clip = AudioFileClip(generated_audio_path)
+                            # Ensure PIL Image is used if needed, or numpy array directly
+                            np_image = np.array(Image.open(image_path))
+                            # Create ImageClip, ensure duration matches audio
+                            image_clip = ImageClip(np_image).set_duration(audio_clip.duration)
+                            # Handle potential size mismatch if needed (resize image_clip or set size explicitly)
+                            # image_clip = image_clip.resize(width=...)
+                            composite_clip = image_clip.set_audio(audio_clip) # Simpler composition
+                            video_clips.append(composite_clip)
+                            st.write("      ✅ Clip created.")
+                        except Exception as e:
+                             st.error(f"      ❌ Failed to create video clip for scene {i+1} (T{timeline_id}): {e}", icon="🚨")
+                             timeline_successful = False
+                             # Don't break the whole timeline, just skip this clip maybe? Or mark timeline as failed.
+                    # --- Assemble Timeline Video ---
+                    if video_clips and timeline_successful: # Only assemble if clips were made and no major errors
+                        status.update(label=f"Composing final video for Timeline {timeline_id}...")
+                        st.write(f"🎞️ Assembling final video for Timeline {timeline_id}...")
+                        try:
+                            final_timeline_video = concatenate_videoclips(video_clips, method="compose")
+                            output_filename = os.path.join(temp_dir, f"timeline_{timeline_id}_final_video.mp4")
+                            # Use 'libx264' for broader compatibility, specify audio codec
+                            final_timeline_video.write_videofile(output_filename, fps=24, codec='libx264', audio_codec='aac')
+                            final_video_paths[timeline_id] = output_filename
+                            st.success(f"   ✅ Video for Timeline {timeline_id} saved: {os.path.basename(output_filename)}")
+                            # Close clips to release resources
+                            for clip in video_clips:
+                                if hasattr(clip, 'close'): clip.close()
+                                if hasattr(clip, 'audio') and hasattr(clip.audio, 'close'): clip.audio.close()
+                            if hasattr(final_timeline_video, 'close'): final_timeline_video.close()
+                        except Exception as e:
+                            st.error(f"   ❌ Failed to write final video for Timeline {timeline_id}: {e}", icon="🚨")
+                            all_timelines_successful = False
+                    elif not video_clips:
+                         st.warning(f"No video clips were successfully generated for Timeline {timeline_id}. Skipping final video assembly.")
+                         all_timelines_successful = False
+                    else:
+                         st.warning(f"Timeline {timeline_id} encountered errors. Skipping final video assembly.")
+                         all_timelines_successful = False
+                    # Intermediate cleanup for the timeline (optional, helps manage files)
+                    # for file in temp_audio_files:
+                    #     if os.path.exists(file): os.remove(file)
+                    # for file in temp_image_files:
+                    #     if os.path.exists(file): os.remove(file)
+                # Final status update
+                if all_timelines_successful and final_video_paths:
+                    status.update(label="ChronoWeave Generation Complete!", state="complete", expanded=False)
+                elif final_video_paths:
+                     status.update(label="ChronoWeave Generation Partially Complete (some errors occurred).", state="warning", expanded=False)
+                else:
+                    status.update(label="ChronoWeave Generation Failed.", state="error", expanded=False)
+            # --- Display Results ---
+            st.header("Generated Timelines")
+            if final_video_paths:
+                sorted_timeline_ids = sorted(final_video_paths.keys())
+                for timeline_id in sorted_timeline_ids:
+                    video_path = final_video_paths[timeline_id]
+                    # Find matching timeline divergence reason
+                    reason = "Unknown"
+                    for t in chrono_data.get('timelines', []):
+                         if t.get('timeline_id') == timeline_id:
+                             reason = t.get('divergence_reason', 'N/A')
+                             break
+                    st.subheader(f"Timeline {timeline_id}: {reason}")
+                    try:
+                        video_file = open(video_path, 'rb')
+                        video_bytes = video_file.read()
+                        st.video(video_bytes)
+                        video_file.close()
+                    except FileNotFoundError:
+                         st.error(f"Could not find video file: {video_path}", icon="🚨")
+                    except Exception as e:
+                         st.error(f"Could not display video {video_path}: {e}", icon="🚨")
+            else:
+                st.warning("No final videos were successfully generated.")
+            # --- Cleanup ---
+            st.write("Cleaning up temporary files...")
+            try:
+                shutil.rmtree(temp_dir)
+                st.write("   ✅ Temporary files removed.")
+            except Exception as e:
+                st.warning(f"   ⚠️ Could not remove temporary directory {temp_dir}: {e}", icon="⚠️")
+        elif not chrono_data:
+            st.error("Story generation failed. Cannot proceed.", icon="🛑")
+        else:
+            # This case might happen if chrono_data is returned but is malformed (e.g., no 'timelines' key)
+            st.error("Story data seems malformed. Cannot proceed.", icon="🛑")
+            # st.json(chrono_data) # Display the problematic data
+else:
+    st.info("Configure settings in the sidebar and click 'Generate ChronoWeave'")