Spaces:

mgbam
/

ChronoWeave

Running

App Files Files Community

mgbam commited on Apr 15

Commit

a66ce42

verified ·

1 Parent(s): 463d8c7

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -29

app.py CHANGED Viewed

@@ -23,11 +23,11 @@ from PIL import Image
 # Pydantic for data validation
 from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
-from typing import List, Optional, Literal, Dict, Any
 # Video and audio processing
 from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
-# from moviepy.config import change_settings  # Potential for setting ImageMagick path if needed
 # Type hints
 import typing_extensions as typing
@@ -36,6 +36,9 @@ import typing_extensions as typing
 import nest_asyncio
 nest_asyncio.apply()
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
@@ -55,14 +58,14 @@ Generate multiple, branching story timelines from a single theme using AI, compl
 TEXT_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_SAMPLING_RATE = 24000
-IMAGE_MODEL_ID = "imagen-3"  # NOTE: Requires Vertex AI SDK access
 DEFAULT_ASPECT_RATIO = "1:1"
 VIDEO_FPS = 24
 VIDEO_CODEC = "libx264"
 AUDIO_CODEC = "aac"
 TEMP_DIR_BASE = ".chrono_temp"
-# --- API Key Handling ---
 GOOGLE_API_KEY = None
 try:
     GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
@@ -75,24 +78,25 @@ except KeyError:
         st.error("🚨 **Google API Key Not Found!** Please configure it.", icon="🚨")
         st.stop()
-# --- Initialize Google Clients ---
 try:
     genai.configure(api_key=GOOGLE_API_KEY)
     logger.info("Configured google-generativeai with API key.")
-    # Initialize text/JSON model
     client_standard = genai.GenerativeModel(TEXT_MODEL_ID)
     logger.info(f"Initialized text/JSON model handle: {TEXT_MODEL_ID}.")
-    # Initialize audio model
     live_model = genai.GenerativeModel(AUDIO_MODEL_ID)
     logger.info(f"Initialized audio model handle: {AUDIO_MODEL_ID}.")
-    # Initialize image model (placeholder for future Vertex AI SDK integration)
-    image_model_genai = genai.GenerativeModel(IMAGE_MODEL_ID)
-    logger.info(f"Initialized google-generativeai handle for image model: {IMAGE_MODEL_ID} (May require Vertex AI SDK).")
-    # ---> TODO: Initialize Vertex AI client here if switching SDK <---
 except AttributeError as ae:
     logger.exception("AttributeError during Client Init.")
     st.error(f"🚨 Init Error: {ae}. Update library?", icon="🚨")
@@ -158,14 +162,11 @@ def wave_file_writer(filename: str, channels: int = 1, rate: int = AUDIO_SAMPLIN
                 logger.error(f"Error closing wave file {filename}: {e_close}")
 async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
-    """
-    Generates audio using Gemini Live API (async version) via the GenerativeModel.
-    """
     collected_audio = bytearray()
     task_id = os.path.basename(output_filename).split('.')[0]
     logger.info(f"🎙️ [{task_id}] Requesting audio: '{api_text[:60]}...'")
     try:
-        # Corrected config structure for audio generation
         config = {
             "response_modalities": ["AUDIO"],
             "audio_encoding": "LINEAR16",
@@ -203,16 +204,24 @@ async def generate_audio_live_async(api_text: str, output_filename: str, voice:
         return None
 def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = "") -> Optional[ChronoWeaveResponse]:
-    """
-    Generates branching story sequences using Gemini structured output and validates with Pydantic.
-    """
     st.info(f"📚 Generating {num_timelines} timeline(s) x {num_scenes} scenes for: '{theme}'...")
     logger.info(f"Requesting story structure: Theme='{theme}', Timelines={num_timelines}, Scenes={num_scenes}")
     divergence_instruction = (
         f"Introduce clear points of divergence between timelines, after first scene if possible. "
         f"Hint: '{divergence_prompt}'. State divergence reason clearly. **For timeline_id 0, use 'Initial path' or 'Baseline scenario'.**"
     )
-    prompt = f"""Act as narrative designer. Create story for theme: "{theme}". Instructions: 1. Exactly **{num_timelines}** timelines. 2. Each timeline exactly **{num_scenes}** scenes. 3. **NO humans/humanoids**. Focus: animals, fantasy creatures, animated objects, nature. 4. {divergence_instruction}. 5. Style: **'Simple, friendly kids animation, bright colors, rounded shapes'**, unless `timeline_visual_modifier` alters. 6. `audio_text`: single concise sentence (max 30 words). 7. `image_prompt`: descriptive, concise (target 15-35 words MAX). Focus on scene elements. **AVOID repeating general style**. 8. `character_description`: VERY brief (name, features). Target < 20 words. Output: ONLY valid JSON object adhering to schema. No text before/after. JSON Schema: ```json
 {json.dumps(ChronoWeaveResponse.model_json_schema(), indent=2)}
 ```"""
     try:
@@ -254,14 +263,37 @@ def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: i
 def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str = "IMG") -> Optional[Image.Image]:
     """
-    Generates an image.
-    <<< IMPORTANT: This function needs to be rewritten using the Vertex AI SDK
-    (google-cloud-aiplatform) to correctly call Imagen models. >>>
     """
     logger.info(f"🖼️ [{task_id}] Requesting image: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
-    logger.error(f"❌ [{task_id}] Image generation skipped: Function needs update to use Vertex AI SDK for Imagen.")
-    st.error(f"Image generation for {task_id} skipped: Requires Vertex AI SDK implementation.", icon="🖼️")
-    return None
 # --- Streamlit UI Elements ---
 st.sidebar.header("⚙️ Configuration")

 # Pydantic for data validation
 from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
+from typing import List, Optional, Dict, Any
 # Video and audio processing
 from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
+# from moviepy.config import change_settings  # Uncomment if you need to change settings
 # Type hints
 import typing_extensions as typing
 import nest_asyncio
 nest_asyncio.apply()
+# Import Vertex AI SDK
+from google.cloud import aiplatform
 # --- Logging Setup ---
 logging.basicConfig(
     level=logging.INFO,
 TEXT_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_MODEL_ID = "models/gemini-1.5-flash"
 AUDIO_SAMPLING_RATE = 24000
+IMAGE_MODEL_ID = "imagen-3"  # Now used with Vertex AI
 DEFAULT_ASPECT_RATIO = "1:1"
 VIDEO_FPS = 24
 VIDEO_CODEC = "libx264"
 AUDIO_CODEC = "aac"
 TEMP_DIR_BASE = ".chrono_temp"
+# --- API Key and Vertex AI Config Handling ---
 GOOGLE_API_KEY = None
 try:
     GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
         st.error("🚨 **Google API Key Not Found!** Please configure it.", icon="🚨")
         st.stop()
+# --- Vertex AI Configuration ---
+# Set up environment variables for Vertex AI; ensure these are in your Streamlit secrets or environment.
+PROJECT_ID = st.secrets.get("PROJECT_ID") or os.environ.get("PROJECT_ID")
+LOCATION = st.secrets.get("LOCATION") or os.environ.get("LOCATION", "us-central1")
+IMAGE_ENDPOINT_ID = st.secrets.get("IMAGE_ENDPOINT_ID") or os.environ.get("IMAGE_ENDPOINT_ID")
+if not PROJECT_ID or not IMAGE_ENDPOINT_ID:
+    st.error("🚨 **Vertex AI is not configured properly!** "
+             "Please set PROJECT_ID and IMAGE_ENDPOINT_ID in your secrets.", icon="🚨")
+    st.stop()
+# --- Initialize Google Clients for text/audio ---
 try:
     genai.configure(api_key=GOOGLE_API_KEY)
     logger.info("Configured google-generativeai with API key.")
     client_standard = genai.GenerativeModel(TEXT_MODEL_ID)
     logger.info(f"Initialized text/JSON model handle: {TEXT_MODEL_ID}.")
     live_model = genai.GenerativeModel(AUDIO_MODEL_ID)
     logger.info(f"Initialized audio model handle: {AUDIO_MODEL_ID}.")
 except AttributeError as ae:
     logger.exception("AttributeError during Client Init.")
     st.error(f"🚨 Init Error: {ae}. Update library?", icon="🚨")
                 logger.error(f"Error closing wave file {filename}: {e_close}")
 async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
+    """Generates audio using Gemini Live API (async version) via the GenerativeModel."""
     collected_audio = bytearray()
     task_id = os.path.basename(output_filename).split('.')[0]
     logger.info(f"🎙️ [{task_id}] Requesting audio: '{api_text[:60]}...'")
     try:
         config = {
             "response_modalities": ["AUDIO"],
             "audio_encoding": "LINEAR16",
         return None
 def generate_story_sequence_chrono(theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = "") -> Optional[ChronoWeaveResponse]:
+    """Generates branching story sequences using Gemini structured output and validates with Pydantic."""
     st.info(f"📚 Generating {num_timelines} timeline(s) x {num_scenes} scenes for: '{theme}'...")
     logger.info(f"Requesting story structure: Theme='{theme}', Timelines={num_timelines}, Scenes={num_scenes}")
     divergence_instruction = (
         f"Introduce clear points of divergence between timelines, after first scene if possible. "
         f"Hint: '{divergence_prompt}'. State divergence reason clearly. **For timeline_id 0, use 'Initial path' or 'Baseline scenario'.**"
     )
+    prompt = f"""Act as narrative designer. Create story for theme: "{theme}". Instructions:
+1. Exactly **{num_timelines}** timelines.
+2. Each timeline exactly **{num_scenes}** scenes.
+3. **NO humans/humanoids**; focus on animals, fantasy creatures, animated objects, nature.
+4. {divergence_instruction}.
+5. Style: **'Simple, friendly kids animation, bright colors, rounded shapes'**, unless `timeline_visual_modifier` alters.
+6. `audio_text`: single concise sentence (max 30 words).
+7. `image_prompt`: descriptive, concise (target 15-35 words MAX). Focus on scene elements. **AVOID repeating general style**.
+8. `character_description`: VERY brief (name, features). Target < 20 words.
+Output: ONLY valid JSON object adhering to schema. No text before/after.
+JSON Schema: ```json
 {json.dumps(ChronoWeaveResponse.model_json_schema(), indent=2)}
 ```"""
     try:
 def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str = "IMG") -> Optional[Image.Image]:
     """
+    Generates an image using Vertex AI's Imagen model via the Google Cloud AI Platform SDK.
+    Ensure that the following environment variables or Streamlit secrets are set:
+      - PROJECT_ID: Your Google Cloud project ID.
+      - LOCATION: The Vertex AI region (e.g., "us-central1").
+      - IMAGE_ENDPOINT_ID: The resource ID of your deployed Imagen endpoint.
     """
     logger.info(f"🖼️ [{task_id}] Requesting image: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
+    try:
+        # Initialize Vertex AI with your project and location.
+        aiplatform.init(project=PROJECT_ID, location=LOCATION)
+        # Retrieve your deployed endpoint.
+        endpoint = aiplatform.Endpoint(IMAGE_ENDPOINT_ID)
+        # Create a prediction instance. (The instance structure depends on your model.)
+        instance = {"prompt": prompt, "aspect_ratio": aspect_ratio}
+        prediction_response = endpoint.predict(instances=[instance])
+        # Assume the prediction returns a base64-encoded image string under the key "image".
+        import base64
+        image_base64 = prediction_response.predictions[0].get("image")
+        if not image_base64:
+            logger.error(f"❌ [{task_id}] No image returned in prediction.")
+            st.error(f"Image prediction failed for {task_id}: No image returned.", icon="🖼️")
+            return None
+        image_data = base64.b64decode(image_base64)
+        image = Image.open(BytesIO(image_data))
+        logger.info(f"✅ [{task_id}] Image generated successfully.")
+        return image
+    except Exception as e:
+        logger.exception(f"❌ [{task_id}] Image generation failed: {e}")
+        st.error(f"Image generation failed for {task_id}: {e}", icon="🖼️")
+        return None
 # --- Streamlit UI Elements ---
 st.sidebar.header("⚙️ Configuration")