Spaces:

mgbam
/

ChronoWeave

Sleeping

App Files Files Community

ChronoWeave / app.py

mgbam

Update app.py

66aa79d verified 4 months ago

raw

history blame

24 kB

	# Copyright 2025 Google LLC.
	# Based on work by Yousif Ahmed.
	# Concept: ChronoWeave – Branching Narrative Generation
	# Licensed under the Apache License, Version 2.0 (the "License").
	# You may not use this file except in compliance with the License.
	# Obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0

	import os
	import json
	import time
	import uuid
	import asyncio
	import logging
	import shutil
	import contextlib
	import wave
	from io import BytesIO
	from typing import List, Optional, Tuple, Dict, Any

	import streamlit as st
	import numpy as np
	from PIL import Image

	# Pydantic for data validation
	from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator

	# Video and audio processing
	from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips

	# Google generative API and async patch
	import google.generativeai as genai
	import nest_asyncio
	nest_asyncio.apply() # Make asyncio work in Streamlit/Jupyter

	# --- Logging Setup ---
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
	logger = logging.getLogger(__name__)

	# --- Constants & Configurations ---
	TEXT_MODEL_ID = "models/gemini-1.5-flash" # Alternatively "gemini-1.5-pro"
	AUDIO_MODEL_ID = "models/gemini-1.5-flash" # Audio generation uses the text model for now
	AUDIO_SAMPLING_RATE = 24000
	IMAGE_MODEL_ID = "imagen-3" # NOTE: Requires Vertex AI SDK update for production
	DEFAULT_ASPECT_RATIO = "1:1"
	VIDEO_FPS = 24
	VIDEO_CODEC = "libx264"
	AUDIO_CODEC = "aac"
	TEMP_DIR_BASE = ".chrono_temp"


	# --- Pydantic Schemas ---
	class StorySegment(BaseModel):
	scene_id: int = Field(..., ge=0)
	image_prompt: str = Field(..., min_length=10, max_length=250)
	audio_text: str = Field(..., min_length=5, max_length=150)
	character_description: str = Field(..., max_length=250)
	timeline_visual_modifier: Optional[str] = Field(None, max_length=50)

	@field_validator("image_prompt")
	@classmethod
	def image_prompt_no_humans(cls, v: str) -> str:
	if any(word in v.lower() for word in ["person", "people", "human", "man", "woman", "boy", "girl", "child"]):
	logger.warning(f"Image prompt '{v[:50]}...' may contain human-related descriptors.")
	return v


	class Timeline(BaseModel):
	timeline_id: int = Field(..., ge=0)
	divergence_reason: str = Field(..., min_length=5)
	segments: List[StorySegment] = Field(..., min_items=1)


	class ChronoWeaveResponse(BaseModel):
	core_theme: str = Field(..., min_length=5)
	timelines: List[Timeline] = Field(..., min_items=1)
	total_scenes_per_timeline: int = Field(..., gt=0)

	@model_validator(mode="after")
	def check_timeline_segment_count(self) -> "ChronoWeaveResponse":
	expected = self.total_scenes_per_timeline
	for i, t in enumerate(self.timelines):
	if len(t.segments) != expected:
	raise ValueError(f"Timeline {i} (ID: {t.timeline_id}): Expected {expected} segments, got {len(t.segments)}.")
	return self


	# --- Helper Functions ---
	@contextlib.contextmanager
	def wave_file_writer(filename: str, channels: int = 1, rate: int = AUDIO_SAMPLING_RATE, sample_width: int = 2):
	"""Safely writes a WAV file using a context manager."""
	wf = None
	try:
	wf = wave.open(filename, "wb")
	wf.setnchannels(channels)
	wf.setsampwidth(sample_width) # 16-bit audio (2 bytes)
	wf.setframerate(rate)
	yield wf
	except Exception as exc:
	logger.error(f"Error writing wave file {filename}: {exc}")
	raise
	finally:
	if wf:
	try:
	wf.close()
	except Exception as e_close:
	logger.error(f"Error closing wave file {filename}: {e_close}")


	# --- ChronoWeave Generator Class ---
	class ChronoWeaveGenerator:
	"""
	Encapsulates the logic for generating branching narratives, processing assets (audio, image)
	and assembling final videos.
	"""

	def __init__(self, api_key: str):
	self.api_key = api_key
	genai.configure(api_key=self.api_key)

	try:
	self.client_text = genai.GenerativeModel(TEXT_MODEL_ID)
	logger.info(f"Initialized text model: {TEXT_MODEL_ID}")
	self.client_audio = genai.GenerativeModel(AUDIO_MODEL_ID)
	logger.info(f"Initialized audio model: {AUDIO_MODEL_ID}")
	self.client_image = genai.GenerativeModel(IMAGE_MODEL_ID)
	logger.info(f"Initialized image model: {IMAGE_MODEL_ID} (Placeholder: Update to Vertex AI SDK)")
	except Exception as exc:
	logger.exception("Failed to initialize Google Clients/Models.")
	raise exc

	def generate_story_structure(
	self, theme: str, num_scenes: int, num_timelines: int, divergence_prompt: str = ""
	) -> Optional[ChronoWeaveResponse]:
	"""
	Generates a story structure as JSON using the text model and validates it via Pydantic.
	"""
	st.info(f"Generating {num_timelines} timeline(s) each with {num_scenes} scene(s) for theme: '{theme}'")
	logger.info(f"Story generation request: Theme='{theme}', Timelines={num_timelines}, Scenes={num_scenes}")

	divergence_instruction = (
	f"Introduce clear divergence after the first scene. Hint: '{divergence_prompt}'. "
	f"For timeline_id 0, use 'Initial path' or 'Baseline scenario'."
	)

	prompt = f"""Act as a narrative designer. Create a story for the theme: "{theme}".
	Instructions:
	1. Exactly {num_timelines} timelines.
	2. Each timeline must consist of exactly {num_scenes} scenes.
	3. NO humans/humanoids; focus on animals, fantasy creatures, animated objects, and nature.
	4. {divergence_instruction}
	5. Style: 'Simple, friendly kids animation, bright colors, rounded shapes' unless modified by `timeline_visual_modifier`.
	6. `audio_text`: One concise sentence (max 30 words).
	7. `image_prompt`: Descriptive prompt (15–35 words) that emphasizes scene elements. Avoid repeating general style.
	8. `character_description`: Very brief (name and features; < 20 words).

	Output only a valid JSON object conforming exactly to this schema:
	JSON Schema: ```json
	{json.dumps(ChronoWeaveResponse.model_json_schema(), indent=2)}
	```"""

	try:
	response = self.client_text.generate_content(
	contents=prompt,
	generation_config=genai.types.GenerationConfig(
	response_mime_type="application/json", temperature=0.7
	),
	)
	raw_data = json.loads(response.text)
	validated_data = ChronoWeaveResponse.model_validate(raw_data)
	st.success("Story structure validated successfully!")
	return validated_data

	except json.JSONDecodeError as json_err:
	logger.error(f"JSON decode failed: {json_err}\nResponse: {response.text}")
	st.error(f"🚨 JSON Parsing Error: {json_err}", icon="📄")
	st.text_area("Response", response.text, height=150)
	except ValidationError as val_err:
	logger.error(f"Pydantic validation error: {val_err}\nData: {json.dumps(raw_data, indent=2)}")
	st.error(f"🚨 Invalid story structure: {val_err}", icon="🧬")
	st.json(raw_data)
	except Exception as e:
	logger.exception("Story generation error:")
	st.error(f"🚨 Error generating story: {e}", icon="💥")
	return None

	async def generate_audio(self, text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
	"""
	Asynchronously generates audio using the Gemini Live API.
	"""
	task_id = os.path.basename(output_filename).split(".")[0]
	collected_audio = bytearray()
	logger.info(f"🎙️ [{task_id}] Generating audio for text: '{text[:60]}...'")
	try:
	config = {
	"response_modalities": ["AUDIO"],
	"audio_config": {"audio_encoding": "LINEAR16", "sample_rate_hertz": AUDIO_SAMPLING_RATE},
	}
	directive = f"Narrate directly: \"{text}\""
	async with self.client_audio.connect(config=config) as session:
	await session.send_request([directive])
	async for response in session.stream_content():
	if response.audio_chunk and response.audio_chunk.data:
	collected_audio.extend(response.audio_chunk.data)
	if hasattr(response, "error") and response.error:
	logger.error(f"❌ [{task_id}] Audio error: {response.error}")
	st.error(f"Audio stream error {task_id}: {response.error}", icon="🔊")
	return None

	if not collected_audio:
	logger.warning(f"⚠️ [{task_id}] No audio data received.")
	st.warning(f"No audio data for {task_id}.", icon="🔊")
	return None

	with wave_file_writer(output_filename) as wf:
	wf.writeframes(bytes(collected_audio))
	logger.info(f"✅ [{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(collected_audio)} bytes)")
	return output_filename

	except genai.types.generation_types.BlockedPromptException as bpe:
	logger.error(f"❌ [{task_id}] Audio blocked: {bpe}")
	st.error(f"Audio blocked for {task_id}.", icon="🔇")
	except Exception as e:
	logger.exception(f"❌ [{task_id}] Audio generation failed: {e}")
	st.error(f"Audio generation failed for {task_id}: {e}", icon="🔊")
	return None

	async def generate_image_async(self, prompt: str, aspect_ratio: str, task_id: str) -> Optional[Image.Image]:
	"""
	Wraps the synchronous image generation function in a thread pool to allow asynchronous invocation.
	Currently, this function is a stub pending Vertex AI SDK integration.
	"""
	loop = asyncio.get_event_loop()
	logger.info(f"🖼️ [{task_id}] Requesting image for prompt: '{prompt[:70]}...' (Aspect Ratio: {aspect_ratio})")
	# Placeholder: the real implementation would call a Vertex AI SDK function.
	def gen_image():
	logger.error(f"❌ [{task_id}] Image generation not implemented. Update required for Vertex AI.")
	return None

	image_result = await loop.run_in_executor(None, gen_image)
	if image_result is None:
	st.error(f"Image generation for {task_id} skipped: Requires Vertex AI SDK implementation.", icon="🖼️")
	return image_result

	async def process_scene(
	self,
	timeline_id: int,
	segment: StorySegment,
	temp_dir: str,
	aspect_ratio: str,
	audio_voice: Optional[str] = None,
	) -> Tuple[Optional[str], Optional[str], Optional[Any], List[str]]:
	"""
	Processes a single scene: generates image and audio concurrently, creates a video clip if both succeed.
	Returns a tuple of (image_path, audio_path, video_clip, [error messages]).
	"""
	errors: List[str] = []
	task_id = f"T{timeline_id}_S{segment.scene_id}"
	image_path = os.path.join(temp_dir, f"{task_id}_image.png")
	audio_path = os.path.join(temp_dir, f"{task_id}_audio.wav")
	video_clip = None

	# Launch image and audio generation concurrently.
	image_future = asyncio.create_task(self.generate_image_async(
	prompt=f"{segment.image_prompt} Featuring: {segment.character_description} {'Style hint: ' + segment.timeline_visual_modifier if segment.timeline_visual_modifier else ''}",
	aspect_ratio=aspect_ratio,
	task_id=task_id,
	))
	audio_future = asyncio.create_task(self.generate_audio(segment.audio_text, audio_path, audio_voice))

	image_result, audio_result = await asyncio.gather(image_future, audio_future)

	# Handle image result (if available, save and preview)
	if image_result:
	try:
	image_result.save(image_path)
	st.image(image_result, width=180, caption=f"Scene {segment.scene_id + 1}")
	except Exception as e:
	logger.error(f"❌ [{task_id}] Error saving image: {e}")
	errors.append(f"Scene {segment.scene_id + 1}: Image save error.")
	else:
	errors.append(f"Scene {segment.scene_id + 1}: Image generation failed.")

	# Handle audio result and preview
	if audio_result:
	try:
	with open(audio_result, "rb") as ap:
	st.audio(ap.read(), format="audio/wav")
	except Exception as e:
	logger.warning(f"⚠️ [{task_id}] Audio preview error: {e}")
	else:
	errors.append(f"Scene {segment.scene_id + 1}: Audio generation failed.")

	# Create video clip if both image and audio exist.
	if not errors and os.path.exists(image_path) and os.path.exists(audio_path):
	try:
	audio_clip = AudioFileClip(audio_path)
	np_img = np.array(Image.open(image_path))
	img_clip = ImageClip(np_img).set_duration(audio_clip.duration)
	video_clip = img_clip.set_audio(audio_clip)
	logger.info(f"✅ [{task_id}] Video clip created (Duration: {audio_clip.duration:.2f}s).")
	except Exception as e:
	logger.exception(f"❌ [{task_id}] Failed to create video clip: {e}")
	errors.append(f"Scene {segment.scene_id + 1}: Video clip creation failed.")
	finally:
	# Cleanup moviepy instances.
	try:
	if 'audio_clip' in locals():
	audio_clip.close()
	if 'img_clip' in locals():
	img_clip.close()
	except Exception:
	pass

	return (image_path if os.path.exists(image_path) else None,
	audio_path if os.path.exists(audio_path) else None,
	video_clip,
	errors)

	async def process_timeline(
	self,
	timeline: Timeline,
	temp_dir: str,
	aspect_ratio: str,
	audio_voice: Optional[str] = None,
	) -> Tuple[Optional[str], List[str]]:
	"""
	Processes an entire timeline by concurrently processing all scenes,
	and then assembling a final video if all scenes succeed.
	Returns the final video path and a list of error messages.
	"""
	timeline_id = timeline.timeline_id
	scene_tasks = [
	self.process_scene(timeline_id, segment, temp_dir, aspect_ratio, audio_voice)
	for segment in timeline.segments
	]
	results = await asyncio.gather(*scene_tasks)
	video_clips = []
	timeline_errors: List[str] = []
	for idx, (img_path, aud_path, clip, errs) in enumerate(results):
	if errs:
	timeline_errors.extend(errs)
	if clip is not None:
	video_clips.append(clip)

	# Assemble the timeline video only if every scene produced a valid clip.
	if video_clips and len(video_clips) == len(timeline.segments):
	output_filename = os.path.join(temp_dir, f"timeline_{timeline_id}_final.mp4")
	try:
	final_video = concatenate_videoclips(video_clips, method="compose")
	final_video.write_videofile(
	output_filename,
	fps=VIDEO_FPS,
	codec=VIDEO_CODEC,
	audio_codec=AUDIO_CODEC,
	logger=None
	)
	logger.info(f"✅ Timeline {timeline_id} video saved: {output_filename}")
	# Cleanup the clips.
	for clip in video_clips:
	clip.close()
	final_video.close()
	return output_filename, timeline_errors
	except Exception as e:
	logger.exception(f"❌ Timeline {timeline_id} video assembly failed: {e}")
	timeline_errors.append(f"Timeline {timeline_id}: Video assembly failed.")
	else:
	timeline_errors.append(f"Timeline {timeline_id}: Incomplete scenes; skipping video assembly.")
	return None, timeline_errors


	# --- Streamlit UI and Main Process ---
	def main():
	# --- API Key Retrieval ---
	GOOGLE_API_KEY: Optional[str] = None
	try:
	GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
	logger.info("Google API Key loaded from Streamlit secrets.")
	except KeyError:
	GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
	if GOOGLE_API_KEY:
	logger.info("Google API Key loaded from environment variable.")
	else:
	st.error("🚨 Google API Key Not Found! Please configure it.", icon="🚨")
	st.stop()

	# --- UI Configuration ---
	st.set_page_config(page_title="ChronoWeave", layout="wide", initial_sidebar_state="expanded")
	st.title("🌀 ChronoWeave: Advanced Branching Narrative Generator")
	st.markdown("""
	Generate multiple, branching story timelines from a single theme using AI – complete with images and narration.
	Based on work by Yousif Ahmed. Copyright 2025 Google LLC.
	""")

	st.sidebar.header("⚙️ Configuration")
	if GOOGLE_API_KEY:
	st.sidebar.success("Google API Key Loaded", icon="✅")
	else:
	st.sidebar.error("Google API Key Missing!", icon="🚨")

	theme = st.sidebar.text_input("📖 Story Theme:", "A curious squirrel finds a mysterious, glowing acorn")
	num_scenes = st.sidebar.slider("🎬 Scenes per Timeline:", min_value=2, max_value=7, value=3)
	num_timelines = st.sidebar.slider("🌿 Number of Timelines:", min_value=1, max_value=4, value=2)
	divergence_prompt = st.sidebar.text_input("↔️ Divergence Hint (Optional):", placeholder="e.g., What if a bird tried to steal it?")
	st.sidebar.subheader("🎨 Visual & Audio Settings")
	aspect_ratio = st.sidebar.selectbox("🖼️ Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0)
	audio_voice = None

	generate_button = st.sidebar.button("✨ Generate ChronoWeave ✨", type="primary", disabled=(not GOOGLE_API_KEY), use_container_width=True)
	st.sidebar.markdown("---")
	st.sidebar.info("⏳ Generation may take several minutes.")
	st.sidebar.markdown(f"<small>Txt: {TEXT_MODEL_ID}, Img: {IMAGE_MODEL_ID}, Aud: {AUDIO_MODEL_ID}</small>", unsafe_allow_html=True)

	if generate_button:
	if not theme:
	st.error("Please enter a story theme.", icon="👈")
	return

	# Create a unique temporary directory for this run
	run_id = str(uuid.uuid4()).split('-')[0]
	temp_dir = os.path.join(TEMP_DIR_BASE, f"run_{run_id}")
	try:
	os.makedirs(temp_dir, exist_ok=True)
	logger.info(f"Created temporary directory: {temp_dir}")
	except OSError as e:
	st.error(f"🚨 Failed to create temporary directory {temp_dir}: {e}", icon="📂")
	st.stop()

	# Instantiate the ChronoWeave generator
	generator = ChronoWeaveGenerator(GOOGLE_API_KEY)
	chrono_response = None
	with st.spinner("Generating narrative structure... 🤔"):
	chrono_response = generator.generate_story_structure(theme, num_scenes, num_timelines, divergence_prompt)

	if not chrono_response:
	logger.error("Story generation or validation failed.")
	return

	overall_start_time = time.time()
	final_video_paths: Dict[int, str] = {}
	generation_errors: Dict[int, List[str]] = {}

	async def process_all_timelines():
	timeline_tasks = {}
	for timeline in chrono_response.timelines:
	timeline_tasks[timeline.timeline_id] = asyncio.create_task(
	generator.process_timeline(timeline, temp_dir, aspect_ratio, audio_voice)
	)
	return await asyncio.gather(*timeline_tasks.values(), return_exceptions=False)

	with st.spinner("Processing scenes and assembling videos..."):
	timeline_results = asyncio.run(process_all_timelines())

	# Collect results per timeline.
	for timeline, (video_path, errors) in zip(chrono_response.timelines, timeline_results):
	generation_errors[timeline.timeline_id] = errors
	if video_path:
	final_video_paths[timeline.timeline_id] = video_path

	overall_duration = time.time() - overall_start_time
	# Display status messages
	if final_video_paths:
	st.success(f"Complete! ({len(final_video_paths)} video(s) created in {overall_duration:.2f}s)")
	else:
	st.error(f"Failed. No final videos generated in {overall_duration:.2f}s")

	# --- Display Final Videos ---
	st.header("🎬 Generated Timelines")
	if final_video_paths:
	sorted_ids = sorted(final_video_paths.keys())
	num_cols = min(len(sorted_ids), 3)
	cols = st.columns(num_cols)
	for idx, timeline_id in enumerate(sorted_ids):
	video_path = final_video_paths[timeline_id]
	timeline_data = next((t for t in chrono_response.timelines if t.timeline_id == timeline_id), None)
	divergence = timeline_data.divergence_reason if timeline_data else "Unknown"
	with cols[idx % num_cols]:
	st.subheader(f"Timeline {timeline_id}")
	st.caption(f"Divergence: {divergence}")
	try:
	with open(video_path, "rb") as vf:
	video_bytes = vf.read()
	st.video(video_bytes)
	st.download_button(
	f"Download Timeline {timeline_id}",
	video_bytes,
	file_name=f"timeline_{timeline_id}.mp4",
	mime="video/mp4",
	key=f"dl_{timeline_id}"
	)
	if generation_errors.get(timeline_id):
	scene_errs = generation_errors[timeline_id]
	if scene_errs:
	with st.expander(f"⚠️ View Scene Issues ({len(scene_errs)})"):
	for err in scene_errs:
	st.warning(f"- {err}")
	except FileNotFoundError:
	st.error(f"Error: Video for Timeline {timeline_id} is missing.", icon="🚨")
	except Exception as e:
	st.error(f"Display error for Timeline {timeline_id}: {e}", icon="🚨")
	else:
	st.warning("No final videos were successfully generated.")
	with st.expander("View All Generation Errors", expanded=True):
	for tid, errs in generation_errors.items():
	if errs:
	st.error(f"Timeline {tid}:")
	for msg in errs:
	st.error(f" - {msg}")

	# --- Cleanup ---
	st.info(f"Cleaning up temporary files: {temp_dir}")
	try:
	shutil.rmtree(temp_dir)
	st.success("✅ Temporary files cleaned up.")
	logger.info(f"Temporary directory removed: {temp_dir}")
	except Exception as e:
	st.warning(f"Could not remove temporary files at: {temp_dir}", icon="⚠️")
	logger.error(f"Failed to remove temporary directory {temp_dir}: {e}")
	else:
	st.info("Configure settings and click '✨ Generate ChronoWeave ✨' to start.")

	if __name__ == "__main__":
	main()