Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -21,8 +21,9 @@ import logging # For better logging
|
|
21 |
# Image handling
|
22 |
from PIL import Image
|
23 |
# Pydantic for data validation
|
24 |
-
|
25 |
-
from
|
|
|
26 |
|
27 |
# Video and audio processing
|
28 |
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
|
@@ -51,7 +52,7 @@ Generate multiple, branching story timelines from a single theme using AI, compl
|
|
51 |
# Text/JSON Model
|
52 |
TEXT_MODEL_ID = "models/gemini-1.5-flash" # Or "gemini-1.5-pro" for potentially higher quality/cost
|
53 |
# Audio Model Config
|
54 |
-
AUDIO_API_VERSION = 'v1alpha' #
|
55 |
AUDIO_MODEL_ID = f"models/gemini-1.5-flash" # Model used for audio tasks
|
56 |
AUDIO_SAMPLING_RATE = 24000 # Standard for TTS models like Google's
|
57 |
# Image Model Config
|
@@ -67,44 +68,33 @@ TEMP_DIR_BASE = ".chrono_temp" # Base name for temporary directories
|
|
67 |
# --- API Key Handling ---
|
68 |
GOOGLE_API_KEY = None
|
69 |
try:
|
70 |
-
# Preferred way: Use Streamlit secrets when deployed
|
71 |
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
|
72 |
logger.info("Google API Key loaded from Streamlit secrets.")
|
73 |
except KeyError:
|
74 |
-
# Fallback: Check environment variable (useful for local development)
|
75 |
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
|
76 |
if GOOGLE_API_KEY:
|
77 |
logger.info("Google API Key loaded from environment variable.")
|
78 |
else:
|
79 |
-
# Error if neither is found
|
80 |
st.error(
|
81 |
-
"π¨ **Google API Key Not Found
|
82 |
-
"Please configure your Google API Key:\n"
|
83 |
-
"1. **Streamlit Cloud/Hugging Face Spaces:** Add it as a Secret named `GOOGLE_API_KEY` in your app's settings.\n"
|
84 |
-
"2. **Local Development:** Set the `GOOGLE_API_KEY` environment variable or create a `.streamlit/secrets.toml` file.",
|
85 |
icon="π¨"
|
86 |
)
|
87 |
-
st.stop()
|
88 |
|
89 |
# --- Initialize Google Clients ---
|
90 |
-
# CORRECTED SECTION: Uses genai.GenerativeModel for both models
|
91 |
try:
|
92 |
-
# Configure globally
|
93 |
genai.configure(api_key=GOOGLE_API_KEY)
|
94 |
logger.info("Configured google-generativeai with API key.")
|
95 |
|
96 |
-
# Model/Client Handle for Text/Imagen Generation
|
97 |
client_standard = genai.GenerativeModel(TEXT_MODEL_ID)
|
98 |
logger.info(f"Initialized standard GenerativeModel for {TEXT_MODEL_ID}.")
|
99 |
|
100 |
-
|
101 |
-
# Use the standard GenerativeModel initialization.
|
102 |
-
live_model = genai.GenerativeModel(AUDIO_MODEL_ID) # Use GenerativeModel here
|
103 |
logger.info(f"Initialized GenerativeModel handle for audio ({AUDIO_MODEL_ID}).")
|
104 |
|
105 |
except AttributeError as ae:
|
106 |
logger.exception("AttributeError during Google AI Client Initialization.")
|
107 |
-
st.error(f"π¨
|
108 |
st.stop()
|
109 |
except Exception as e:
|
110 |
logger.exception("Failed to initialize Google AI Clients.")
|
@@ -112,18 +102,21 @@ except Exception as e:
|
|
112 |
st.stop()
|
113 |
|
114 |
|
115 |
-
# --- Define Pydantic Schemas
|
116 |
class StorySegment(BaseModel):
|
117 |
scene_id: int = Field(..., ge=0, description="Scene number within the timeline, starting from 0.")
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
character_description: str = Field(..., max_length=
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
127 |
if any(word in v.lower() for word in ["person", "people", "human", "man", "woman", "boy", "girl", "child"]):
|
128 |
logger.warning(f"Image prompt '{v[:50]}...' may contain human descriptions. Relying on API-level controls & prompt instructions.")
|
129 |
return v
|
@@ -138,15 +131,14 @@ class ChronoWeaveResponse(BaseModel):
|
|
138 |
timelines: List[Timeline] = Field(..., min_items=1, description="List of generated timelines.")
|
139 |
total_scenes_per_timeline: int = Field(..., gt=0, description="The requested number of scenes per timeline.")
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
return timelines
|
150 |
|
151 |
# --- Helper Functions ---
|
152 |
|
@@ -162,7 +154,7 @@ def wave_file_writer(filename: str, channels: int = 1, rate: int = AUDIO_SAMPLIN
|
|
162 |
yield wf
|
163 |
except Exception as e:
|
164 |
logger.error(f"Error opening/configuring wave file {filename}: {e}")
|
165 |
-
raise
|
166 |
finally:
|
167 |
if wf:
|
168 |
try:
|
@@ -172,34 +164,26 @@ def wave_file_writer(filename: str, channels: int = 1, rate: int = AUDIO_SAMPLIN
|
|
172 |
|
173 |
|
174 |
async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
|
175 |
-
"""
|
176 |
-
Generates audio using Gemini Live API (async version) via the GenerativeModel.
|
177 |
-
Returns the path to the generated audio file or None on failure.
|
178 |
-
"""
|
179 |
collected_audio = bytearray()
|
180 |
-
task_id = os.path.basename(output_filename).split('.')[0]
|
181 |
logger.info(f"ποΈ [{task_id}] Requesting audio for: '{api_text[:60]}...'")
|
182 |
|
183 |
try:
|
184 |
-
# Use the 'live_model' (a GenerativeModel instance) initialized earlier.
|
185 |
config = {
|
186 |
"response_modalities": ["AUDIO"],
|
187 |
"audio_config": {
|
188 |
-
"audio_encoding": "LINEAR16",
|
189 |
"sample_rate_hertz": AUDIO_SAMPLING_RATE,
|
190 |
-
# "voice": voice if voice else "aura-asteria-en" # Optional
|
191 |
}
|
192 |
}
|
193 |
-
|
194 |
-
# Prepend directive to discourage conversational filler
|
195 |
directive_prompt = (
|
196 |
"Narrate the following sentence directly and engagingly. "
|
197 |
-
"Do not add any introductory or concluding remarks
|
198 |
"Speak only the sentence itself:\n\n"
|
199 |
f'"{api_text}"'
|
200 |
)
|
201 |
|
202 |
-
# Connect and stream using the GenerativeModel instance
|
203 |
async with live_model.connect(config=config) as session:
|
204 |
await session.send_request([directive_prompt])
|
205 |
async for response in session.stream_content():
|
@@ -215,18 +199,17 @@ async def generate_audio_live_async(api_text: str, output_filename: str, voice:
|
|
215 |
st.warning(f"No audio data generated for scene {task_id}.", icon="π")
|
216 |
return None
|
217 |
|
218 |
-
# Write the collected audio bytes into a WAV file.
|
219 |
with wave_file_writer(output_filename, rate=AUDIO_SAMPLING_RATE) as wf:
|
220 |
wf.writeframes(bytes(collected_audio))
|
221 |
logger.info(f" β
[{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(collected_audio)} bytes)")
|
222 |
return output_filename
|
223 |
|
224 |
except genai.types.generation_types.BlockedPromptException as bpe:
|
225 |
-
logger.error(f" β [{task_id}] Audio generation blocked
|
226 |
-
st.error(f"Audio generation blocked for scene {task_id}
|
227 |
return None
|
228 |
except Exception as e:
|
229 |
-
logger.exception(f" β [{task_id}] Audio generation failed unexpectedly
|
230 |
st.error(f"Audio generation failed for scene {task_id}: {e}", icon="π")
|
231 |
return None
|
232 |
|
@@ -237,10 +220,7 @@ def generate_story_sequence_chrono(
|
|
237 |
num_timelines: int,
|
238 |
divergence_prompt: str = ""
|
239 |
) -> Optional[ChronoWeaveResponse]:
|
240 |
-
"""
|
241 |
-
Generates branching story sequences using Gemini structured output and validates with Pydantic.
|
242 |
-
Returns a validated Pydantic object or None on failure.
|
243 |
-
"""
|
244 |
st.info(f"π Generating {num_timelines} timeline(s) x {num_scenes} scenes for theme: '{theme}'...")
|
245 |
logger.info(f"Requesting story structure: Theme='{theme}', Timelines={num_timelines}, Scenes={num_scenes}")
|
246 |
|
@@ -250,7 +230,7 @@ def generate_story_sequence_chrono(
|
|
250 |
f"Clearly state the divergence reason for each timeline (except potentially the first)."
|
251 |
)
|
252 |
|
253 |
-
# Updated prompt with stricter
|
254 |
prompt = f"""
|
255 |
Act as an expert narrative designer specializing in short, visual, branching stories for children.
|
256 |
Create a story based on the core theme: "{theme}".
|
@@ -262,20 +242,19 @@ def generate_story_sequence_chrono(
|
|
262 |
4. {divergence_instruction}
|
263 |
5. Maintain a consistent visual style across all scenes and timelines: **'Simple, friendly kids animation style with bright colors and rounded shapes'**, unless a `timeline_visual_modifier` subtly alters it.
|
264 |
6. Each scene's narration (`audio_text`) should be a single, concise sentence (approx. 5-10 seconds spoken length, max 30 words).
|
265 |
-
7. Image prompts (`image_prompt`) should be descriptive **and concise (target 15-35 words MAXIMUM)**, focusing only on the non-human character(s), setting, action, and essential visual style elements for *this specific scene*. Explicitly mention the main character(s) for consistency. **Do NOT repeat the general
|
266 |
-
8. `character_description` should briefly describe recurring non-human characters mentioned *in the specific scene's image prompt* (name, key visual features). Keep consistent within a timeline.
|
267 |
|
268 |
**Output Format:**
|
269 |
Respond ONLY with a valid JSON object adhering strictly to the provided schema. Do not include any text before or after the JSON object.
|
270 |
|
271 |
**JSON Schema:**
|
272 |
```json
|
273 |
-
{json.dumps(ChronoWeaveResponse.
|
274 |
```
|
275 |
-
""" # Using .
|
276 |
|
277 |
try:
|
278 |
-
# Use the standard client (GenerativeModel instance) for text generation
|
279 |
response = client_standard.generate_content(
|
280 |
contents=prompt,
|
281 |
generation_config=genai.types.GenerationConfig(
|
@@ -284,13 +263,12 @@ def generate_story_sequence_chrono(
|
|
284 |
)
|
285 |
)
|
286 |
|
287 |
-
# Attempt to parse the JSON
|
288 |
try:
|
289 |
raw_data = json.loads(response.text)
|
290 |
except json.JSONDecodeError as json_err:
|
291 |
logger.error(f"Failed to decode JSON response: {json_err}")
|
292 |
logger.error(f"Problematic Response Text:\n{response.text}")
|
293 |
-
st.error(f"π¨ Failed to parse
|
294 |
st.text_area("Problematic AI Response:", response.text, height=200)
|
295 |
return None
|
296 |
except Exception as e:
|
@@ -298,10 +276,9 @@ def generate_story_sequence_chrono(
|
|
298 |
st.error(f"π¨ Error processing AI response: {e}", icon="π")
|
299 |
return None
|
300 |
|
301 |
-
# Validate the parsed data using Pydantic
|
302 |
try:
|
303 |
-
# Use
|
304 |
-
validated_data = ChronoWeaveResponse.
|
305 |
logger.info("β
Story structure generated and validated successfully!")
|
306 |
st.success("β
Story structure generated and validated!")
|
307 |
return validated_data
|
@@ -309,33 +286,28 @@ def generate_story_sequence_chrono(
|
|
309 |
logger.error(f"JSON structure validation failed: {val_err}")
|
310 |
logger.error(f"Received Data:\n{json.dumps(raw_data, indent=2)}")
|
311 |
st.error(f"π¨ The generated story structure is invalid: {val_err}", icon="π§¬")
|
312 |
-
st.json(raw_data)
|
313 |
return None
|
314 |
|
315 |
except genai.types.generation_types.BlockedPromptException as bpe:
|
316 |
logger.error(f"Story generation prompt blocked: {bpe}")
|
317 |
-
st.error("π¨
|
318 |
return None
|
319 |
except Exception as e:
|
320 |
logger.exception("Error during story sequence generation:")
|
321 |
-
st.error(f"π¨
|
322 |
return None
|
323 |
|
324 |
|
325 |
def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str = "IMG") -> Optional[Image.Image]:
|
326 |
-
"""
|
327 |
-
Generates an image using Imagen via the standard client with specific controls.
|
328 |
-
Returns a PIL Image object or None on failure.
|
329 |
-
"""
|
330 |
logger.info(f"πΌοΈ [{task_id}] Requesting image for: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
|
331 |
|
332 |
-
# Refined prompt - relies on the story generator to provide concise prompts now
|
333 |
-
# Still includes base style and negative constraints as reinforcement
|
334 |
full_prompt = (
|
335 |
f"Generate an image in a child-friendly, simple animation style with bright colors and rounded shapes. "
|
336 |
f"Ensure absolutely NO humans or human-like figures are present. Focus on animals or objects. "
|
337 |
f"Aspect ratio should be {aspect_ratio}. "
|
338 |
-
f"Scene Description: {prompt}"
|
339 |
)
|
340 |
|
341 |
try:
|
@@ -349,25 +321,24 @@ def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str =
|
|
349 |
image_bytes = None
|
350 |
safety_ratings = []
|
351 |
block_reason = None
|
|
|
352 |
|
353 |
-
#
|
354 |
if hasattr(response, 'candidates') and response.candidates:
|
355 |
candidate = response.candidates[0]
|
|
|
|
|
356 |
if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
357 |
part = candidate.content.parts[0]
|
358 |
if hasattr(part, 'inline_data') and part.inline_data and hasattr(part.inline_data, 'data'):
|
359 |
image_bytes = part.inline_data.data
|
360 |
if hasattr(candidate, 'safety_ratings'):
|
361 |
safety_ratings = candidate.safety_ratings
|
362 |
-
# Finish reason might also indicate issues (e.g., SAFETY)
|
363 |
-
# if hasattr(candidate, 'finish_reason') and candidate.finish_reason != 'STOP': ...
|
364 |
|
365 |
-
# Check prompt feedback for blocking outside of candidates
|
366 |
if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
|
367 |
-
if hasattr(response.prompt_feedback, 'block_reason') and response.prompt_feedback.block_reason != 'BLOCK_REASON_UNSPECIFIED':
|
368 |
-
block_reason = response.prompt_feedback.block_reason.name
|
369 |
if hasattr(response.prompt_feedback, 'safety_ratings'):
|
370 |
-
# Combine prompt feedback ratings with candidate ratings if necessary
|
371 |
safety_ratings.extend(response.prompt_feedback.safety_ratings)
|
372 |
|
373 |
|
@@ -375,39 +346,40 @@ def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str =
|
|
375 |
try:
|
376 |
image = Image.open(BytesIO(image_bytes))
|
377 |
logger.info(f" β
[{task_id}] Image generated successfully.")
|
378 |
-
# Log safety flags if present
|
379 |
filtered_ratings = [f"{r.category.name}: {r.probability.name}" for r in safety_ratings if hasattr(r,'probability') and r.probability.name != 'NEGLIGIBLE']
|
380 |
if filtered_ratings:
|
381 |
-
logger.warning(f" β οΈ [{task_id}] Image
|
382 |
-
st.warning(f"Image for scene {task_id} flagged
|
383 |
return image
|
384 |
except Exception as img_err:
|
385 |
logger.error(f" β [{task_id}] Failed to decode generated image data: {img_err}")
|
386 |
st.warning(f"Failed to decode image data for scene {task_id}.", icon="πΌοΈ")
|
387 |
return None
|
388 |
else:
|
389 |
-
#
|
|
|
390 |
if block_reason:
|
391 |
-
|
392 |
-
|
|
|
393 |
else:
|
394 |
-
# Check for safety flags even if no block reason explicitly given
|
395 |
filtered_ratings = [f"{r.category.name}: {r.probability.name}" for r in safety_ratings if hasattr(r,'probability') and r.probability.name != 'NEGLIGIBLE']
|
396 |
if filtered_ratings:
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
|
|
403 |
return None
|
404 |
|
405 |
except genai.types.generation_types.BlockedPromptException as bpe:
|
406 |
logger.error(f" β [{task_id}] Image generation blocked (exception): {bpe}")
|
407 |
-
st.error(f"Image generation blocked for scene {task_id}
|
408 |
return None
|
409 |
except Exception as e:
|
410 |
-
logger.exception(f" β [{task_id}] Image generation failed unexpectedly
|
411 |
st.error(f"Image generation failed for scene {task_id}: {e}", icon="πΌοΈ")
|
412 |
return None
|
413 |
|
@@ -415,27 +387,24 @@ def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str =
|
|
415 |
# --- Streamlit UI Elements ---
|
416 |
st.sidebar.header("βοΈ Configuration")
|
417 |
|
418 |
-
# API Key Status
|
419 |
if GOOGLE_API_KEY:
|
420 |
st.sidebar.success("Google API Key Loaded", icon="β
")
|
421 |
else:
|
422 |
st.sidebar.error("Google API Key Missing!", icon="π¨")
|
423 |
|
424 |
-
# Story Parameters
|
425 |
theme = st.sidebar.text_input("π Story Theme:", "A curious squirrel finds a mysterious, glowing acorn")
|
426 |
-
num_scenes = st.sidebar.slider("π¬ Scenes per Timeline:", min_value=2, max_value=7, value=3
|
427 |
-
num_timelines = st.sidebar.slider("πΏ Number of Timelines:", min_value=1, max_value=4, value=2
|
428 |
-
divergence_prompt = st.sidebar.text_input("βοΈ Divergence Hint (Optional):", placeholder="e.g., What if a bird tried to steal it?"
|
429 |
|
430 |
-
# Generation Settings
|
431 |
st.sidebar.subheader("π¨ Visual & Audio Settings")
|
432 |
-
aspect_ratio = st.sidebar.selectbox("πΌοΈ Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0
|
433 |
-
audio_voice = None
|
434 |
|
435 |
generate_button = st.sidebar.button("β¨ Generate ChronoWeave β¨", type="primary", disabled=(not GOOGLE_API_KEY), use_container_width=True)
|
436 |
|
437 |
st.sidebar.markdown("---")
|
438 |
-
st.sidebar.info("β³ Generation can take several minutes
|
439 |
st.sidebar.markdown(f"<small>Models: Text={TEXT_MODEL_ID}, Image={IMAGE_MODEL_ID}, Audio={AUDIO_MODEL_ID}</small>", unsafe_allow_html=True)
|
440 |
|
441 |
|
@@ -444,8 +413,7 @@ if generate_button:
|
|
444 |
if not theme:
|
445 |
st.error("Please enter a story theme in the sidebar.", icon="π")
|
446 |
else:
|
447 |
-
|
448 |
-
run_id = str(uuid.uuid4()).split('-')[0] # Short unique ID
|
449 |
temp_dir = os.path.join(TEMP_DIR_BASE, f"run_{run_id}")
|
450 |
try:
|
451 |
os.makedirs(temp_dir, exist_ok=True)
|
@@ -454,8 +422,8 @@ if generate_button:
|
|
454 |
st.error(f"π¨ Failed to create temporary directory {temp_dir}: {e}", icon="π")
|
455 |
st.stop()
|
456 |
|
457 |
-
final_video_paths = {}
|
458 |
-
generation_errors = {}
|
459 |
|
460 |
# --- 1. Generate Narrative Structure ---
|
461 |
chrono_response: Optional[ChronoWeaveResponse] = None
|
@@ -463,15 +431,11 @@ if generate_button:
|
|
463 |
chrono_response = generate_story_sequence_chrono(theme, num_scenes, num_timelines, divergence_prompt)
|
464 |
|
465 |
if chrono_response:
|
466 |
-
# Structure generated and validated successfully by the function
|
467 |
-
# st.success(...) is now inside generate_story_sequence_chrono on success
|
468 |
-
|
469 |
# --- 2. Process Each Timeline ---
|
470 |
overall_start_time = time.time()
|
471 |
-
all_timelines_successful = True
|
472 |
|
473 |
with st.status("Generating assets and composing videos...", expanded=True) as status:
|
474 |
-
|
475 |
for timeline_index, timeline in enumerate(chrono_response.timelines):
|
476 |
timeline_id = timeline.timeline_id
|
477 |
divergence = timeline.divergence_reason
|
@@ -494,18 +458,16 @@ if generate_button:
|
|
494 |
status.update(label=status_message)
|
495 |
st.markdown(f"--- **Scene {scene_id + 1} ({task_id})** ---")
|
496 |
logger.info(status_message)
|
497 |
-
|
498 |
scene_has_error = False
|
499 |
|
500 |
-
st.write(f" *Image Prompt:* {segment.image_prompt}" + (f" *(
|
501 |
st.write(f" *Audio Text:* {segment.audio_text}")
|
502 |
|
503 |
# --- 2a. Image Generation ---
|
504 |
generated_image: Optional[Image.Image] = None
|
505 |
with st.spinner(f"[{task_id}] Generating image... π¨"):
|
506 |
-
|
507 |
-
|
508 |
-
if segment.character_description: # Add character desc if present
|
509 |
combined_prompt += f" Featuring: {segment.character_description}"
|
510 |
if segment.timeline_visual_modifier:
|
511 |
combined_prompt += f" Style hint: {segment.timeline_visual_modifier}."
|
@@ -518,12 +480,12 @@ if generate_button:
|
|
518 |
temp_image_files[scene_id] = image_path
|
519 |
st.image(generated_image, width=180, caption=f"Scene {scene_id+1} Image")
|
520 |
except Exception as e:
|
521 |
-
logger.error(f" β [{task_id}] Failed to save image
|
522 |
st.error(f"Failed to save image for scene {task_id}.", icon="πΎ")
|
523 |
scene_has_error = True
|
524 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image save failed.")
|
525 |
else:
|
526 |
-
|
527 |
scene_has_error = True
|
528 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image generation failed.")
|
529 |
continue
|
@@ -538,13 +500,13 @@ if generate_button:
|
|
538 |
generate_audio_live_async(segment.audio_text, audio_path_temp, audio_voice)
|
539 |
)
|
540 |
except RuntimeError as e:
|
541 |
-
logger.error(f" β [{task_id}] Asyncio runtime error
|
542 |
-
st.error(f"Asyncio error during audio
|
543 |
scene_has_error = True
|
544 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio async error.")
|
545 |
except Exception as e:
|
546 |
-
logger.exception(f" β [{task_id}] Unexpected
|
547 |
-
st.error(f"Unexpected
|
548 |
scene_has_error = True
|
549 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation error.")
|
550 |
|
@@ -556,16 +518,12 @@ if generate_button:
|
|
556 |
except Exception as e:
|
557 |
logger.warning(f" β οΈ [{task_id}] Could not display audio preview: {e}")
|
558 |
else:
|
559 |
-
|
560 |
scene_has_error = True
|
561 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation failed.")
|
562 |
if scene_id in temp_image_files and os.path.exists(temp_image_files[scene_id]):
|
563 |
-
try:
|
564 |
-
|
565 |
-
logger.info(f" ποΈ [{task_id}] Removed image file due to audio failure.")
|
566 |
-
del temp_image_files[scene_id]
|
567 |
-
except OSError as e:
|
568 |
-
logger.warning(f" β οΈ [{task_id}] Could not remove image file {temp_image_files[scene_id]} after audio failure: {e}")
|
569 |
continue
|
570 |
|
571 |
# --- 2c. Create Video Clip ---
|
@@ -573,17 +531,14 @@ if generate_button:
|
|
573 |
st.write(f" π¬ Creating video clip for Scene {scene_id+1}...")
|
574 |
img_path = temp_image_files[scene_id]
|
575 |
aud_path = temp_audio_files[scene_id]
|
576 |
-
audio_clip_instance = None
|
577 |
-
image_clip_instance = None
|
578 |
-
composite_clip = None
|
579 |
try:
|
580 |
-
if not os.path.exists(img_path): raise FileNotFoundError(f"Image file
|
581 |
-
if not os.path.exists(aud_path): raise FileNotFoundError(f"Audio file
|
582 |
|
583 |
audio_clip_instance = AudioFileClip(aud_path)
|
584 |
np_image = np.array(Image.open(img_path))
|
585 |
image_clip_instance = ImageClip(np_image).set_duration(audio_clip_instance.duration)
|
586 |
-
|
587 |
composite_clip = image_clip_instance.set_audio(audio_clip_instance)
|
588 |
video_clips.append(composite_clip)
|
589 |
logger.info(f" β
[{task_id}] Video clip created (Duration: {audio_clip_instance.duration:.2f}s).")
|
@@ -591,16 +546,16 @@ if generate_button:
|
|
591 |
scene_success_count += 1
|
592 |
|
593 |
except Exception as e:
|
594 |
-
logger.exception(f" β [{task_id}] Failed to create video clip
|
595 |
st.error(f"Failed to create video clip for {task_id}: {e}", icon="π¬")
|
596 |
scene_has_error = True
|
597 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Video clip creation failed.")
|
598 |
if audio_clip_instance: audio_clip_instance.close()
|
599 |
if image_clip_instance: image_clip_instance.close()
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
|
605 |
# --- 2d. Assemble Timeline Video ---
|
606 |
timeline_duration = time.time() - timeline_start_time
|
@@ -612,46 +567,36 @@ if generate_button:
|
|
612 |
final_timeline_video = None
|
613 |
try:
|
614 |
final_timeline_video = concatenate_videoclips(video_clips, method="compose")
|
615 |
-
final_timeline_video.write_videofile(
|
616 |
-
output_filename,
|
617 |
-
fps=VIDEO_FPS,
|
618 |
-
codec=VIDEO_CODEC,
|
619 |
-
audio_codec=AUDIO_CODEC,
|
620 |
-
logger=None
|
621 |
-
)
|
622 |
final_video_paths[timeline_id] = output_filename
|
623 |
logger.info(f" β
[{timeline_label}] Final video saved: {os.path.basename(output_filename)}")
|
624 |
st.success(f"β
Video for {timeline_label} completed in {timeline_duration:.2f}s.")
|
625 |
-
|
626 |
except Exception as e:
|
627 |
logger.exception(f" β [{timeline_label}] Failed to write final video: {e}")
|
628 |
st.error(f"Failed to assemble video for {timeline_label}: {e}", icon="πΌ")
|
629 |
all_timelines_successful = False
|
630 |
generation_errors[timeline_id].append(f"Timeline {timeline_id}: Final video assembly failed.")
|
631 |
finally:
|
632 |
-
logger.debug(f"[{timeline_label}] Closing {len(video_clips)} source clips...")
|
633 |
for i, clip in enumerate(video_clips):
|
634 |
try:
|
635 |
if clip:
|
636 |
if clip.audio: clip.audio.close()
|
637 |
clip.close()
|
638 |
-
except Exception as e_close:
|
639 |
-
logger.warning(f" β οΈ [{timeline_label}] Error closing source clip {i}: {e_close}")
|
640 |
if final_timeline_video:
|
641 |
try:
|
642 |
if final_timeline_video.audio: final_timeline_video.audio.close()
|
643 |
final_timeline_video.close()
|
644 |
-
|
645 |
-
except Exception as e_close_final:
|
646 |
-
logger.warning(f" β οΈ [{timeline_label}] Error closing final video object: {e_close_final}")
|
647 |
|
648 |
elif not video_clips:
|
649 |
-
logger.warning(f"[{timeline_label}] No video clips
|
650 |
-
st.warning(f"No scenes
|
651 |
all_timelines_successful = False
|
652 |
else: # Some scenes failed
|
653 |
error_count = len(segments) - scene_success_count
|
654 |
-
logger.warning(f"[{timeline_label}]
|
655 |
st.warning(f"{timeline_label} had errors in {error_count} scene(s). Final video not assembled.", icon="β οΈ")
|
656 |
all_timelines_successful = False
|
657 |
|
@@ -659,20 +604,16 @@ if generate_button:
|
|
659 |
logger.error(f"Summary of errors in {timeline_label}: {generation_errors[timeline_id]}")
|
660 |
|
661 |
# --- End of Timelines Loop ---
|
662 |
-
|
663 |
overall_duration = time.time() - overall_start_time
|
664 |
if all_timelines_successful and final_video_paths:
|
665 |
status_msg = f"ChronoWeave Generation Complete! ({len(final_video_paths)} videos in {overall_duration:.2f}s)"
|
666 |
-
status.update(label=status_msg, state="complete", expanded=False)
|
667 |
-
logger.info(status_msg)
|
668 |
elif final_video_paths:
|
669 |
-
status_msg = f"ChronoWeave Partially Complete ({len(final_video_paths)} videos,
|
670 |
-
status.update(label=status_msg, state="warning", expanded=True)
|
671 |
-
logger.warning(status_msg)
|
672 |
else:
|
673 |
-
status_msg = f"ChronoWeave Generation Failed. No videos produced.
|
674 |
-
status.update(label=status_msg, state="error", expanded=True)
|
675 |
-
logger.error(status_msg)
|
676 |
|
677 |
# --- 3. Display Results ---
|
678 |
st.header("π¬ Generated Timelines")
|
@@ -691,30 +632,21 @@ if generate_button:
|
|
691 |
st.subheader(f"Timeline {timeline_id}")
|
692 |
st.caption(f"Divergence: {reason}")
|
693 |
try:
|
694 |
-
with open(video_path, 'rb') as video_file:
|
695 |
-
video_bytes = video_file.read()
|
696 |
st.video(video_bytes)
|
697 |
logger.info(f"Displaying video for Timeline {timeline_id}")
|
698 |
-
st.download_button(
|
699 |
-
label=f"Download T{timeline_id} Video",
|
700 |
-
data=video_bytes,
|
701 |
-
file_name=f"chronoweave_timeline_{timeline_id}.mp4",
|
702 |
-
mime="video/mp4",
|
703 |
-
key=f"download_btn_{timeline_id}"
|
704 |
-
)
|
705 |
if generation_errors.get(timeline_id):
|
706 |
-
with st.expander(f"β οΈ View {len(generation_errors[timeline_id])}
|
707 |
-
for error_msg in generation_errors[timeline_id]:
|
708 |
-
st.warning(f"- {error_msg}")
|
709 |
-
|
710 |
except FileNotFoundError:
|
711 |
-
logger.error(f"
|
712 |
-
st.error(f"Error: Video file
|
713 |
except Exception as e:
|
714 |
logger.exception(f"Could not display video {video_path}: {e}")
|
715 |
-
st.error(f"Error displaying video
|
716 |
else:
|
717 |
-
st.warning("No final videos were successfully generated
|
718 |
all_errors = [msg for err_list in generation_errors.values() for msg in err_list]
|
719 |
if all_errors:
|
720 |
st.subheader("Summary of Generation Issues")
|
@@ -722,8 +654,7 @@ if generate_button:
|
|
722 |
for tid, errors in generation_errors.items():
|
723 |
if errors:
|
724 |
st.error(f"Timeline {tid}:")
|
725 |
-
for msg in errors:
|
726 |
-
st.error(f" - {msg}")
|
727 |
|
728 |
# --- 4. Cleanup ---
|
729 |
st.info(f"Attempting to clean up temporary directory: {temp_dir}")
|
@@ -736,11 +667,10 @@ if generate_button:
|
|
736 |
st.warning(f"Could not automatically remove temporary files: {temp_dir}. Please remove it manually if needed.", icon="β οΈ")
|
737 |
|
738 |
elif not chrono_response:
|
739 |
-
# Error message already shown by generate_story_sequence_chrono or validation
|
740 |
logger.error("Story generation or validation failed, cannot proceed.")
|
741 |
else:
|
742 |
-
st.error("An unexpected issue occurred after story generation.
|
743 |
-
logger.error("Chrono_response existed but was falsy in
|
744 |
|
745 |
else:
|
746 |
st.info("Configure settings in the sidebar and click 'β¨ Generate ChronoWeave β¨' to start.")
|
|
|
21 |
# Image handling
|
22 |
from PIL import Image
|
23 |
# Pydantic for data validation
|
24 |
+
# Updated imports for Pydantic v2 syntax
|
25 |
+
from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
|
26 |
+
from typing import List, Optional, Literal, Dict, Any
|
27 |
|
28 |
# Video and audio processing
|
29 |
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips
|
|
|
52 |
# Text/JSON Model
|
53 |
TEXT_MODEL_ID = "models/gemini-1.5-flash" # Or "gemini-1.5-pro" for potentially higher quality/cost
|
54 |
# Audio Model Config
|
55 |
+
AUDIO_API_VERSION = 'v1alpha' # May not be strictly needed for endpoint if library handles it
|
56 |
AUDIO_MODEL_ID = f"models/gemini-1.5-flash" # Model used for audio tasks
|
57 |
AUDIO_SAMPLING_RATE = 24000 # Standard for TTS models like Google's
|
58 |
# Image Model Config
|
|
|
68 |
# --- API Key Handling ---
|
69 |
GOOGLE_API_KEY = None
|
70 |
try:
|
|
|
71 |
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
|
72 |
logger.info("Google API Key loaded from Streamlit secrets.")
|
73 |
except KeyError:
|
|
|
74 |
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
|
75 |
if GOOGLE_API_KEY:
|
76 |
logger.info("Google API Key loaded from environment variable.")
|
77 |
else:
|
|
|
78 |
st.error(
|
79 |
+
"π¨ **Google API Key Not Found!** Please configure it via Streamlit secrets or environment variable.",
|
|
|
|
|
|
|
80 |
icon="π¨"
|
81 |
)
|
82 |
+
st.stop()
|
83 |
|
84 |
# --- Initialize Google Clients ---
|
|
|
85 |
try:
|
|
|
86 |
genai.configure(api_key=GOOGLE_API_KEY)
|
87 |
logger.info("Configured google-generativeai with API key.")
|
88 |
|
|
|
89 |
client_standard = genai.GenerativeModel(TEXT_MODEL_ID)
|
90 |
logger.info(f"Initialized standard GenerativeModel for {TEXT_MODEL_ID}.")
|
91 |
|
92 |
+
live_model = genai.GenerativeModel(AUDIO_MODEL_ID)
|
|
|
|
|
93 |
logger.info(f"Initialized GenerativeModel handle for audio ({AUDIO_MODEL_ID}).")
|
94 |
|
95 |
except AttributeError as ae:
|
96 |
logger.exception("AttributeError during Google AI Client Initialization.")
|
97 |
+
st.error(f"π¨ Initialization Error: {ae}. Ensure 'google-generativeai' is up-to-date.", icon="π¨")
|
98 |
st.stop()
|
99 |
except Exception as e:
|
100 |
logger.exception("Failed to initialize Google AI Clients.")
|
|
|
102 |
st.stop()
|
103 |
|
104 |
|
105 |
+
# --- Define Pydantic Schemas (Using V2 Syntax) ---
|
106 |
class StorySegment(BaseModel):
|
107 |
scene_id: int = Field(..., ge=0, description="Scene number within the timeline, starting from 0.")
|
108 |
+
image_prompt: str = Field(..., min_length=10, max_length=250, # Keep increased limit
|
109 |
+
description="Concise visual description (target 15-35 words). Focus on non-human characters, setting, action, style.")
|
110 |
+
audio_text: str = Field(..., min_length=5, max_length=150, description="Single sentence of narration/dialogue (max 30 words).")
|
111 |
+
# Increased max_length for character_description
|
112 |
+
character_description: str = Field(..., max_length=250, # <-- Increased from 100
|
113 |
+
description="Brief description of key non-human characters/objects in *this* scene's prompt (target under 20 words).")
|
114 |
+
timeline_visual_modifier: Optional[str] = Field(None, max_length=50, description="Optional subtle visual style hint.")
|
115 |
+
|
116 |
+
# Pydantic v2 style field validator
|
117 |
+
@field_validator('image_prompt')
|
118 |
+
@classmethod
|
119 |
+
def image_prompt_no_humans(cls, v: str) -> str:
|
120 |
if any(word in v.lower() for word in ["person", "people", "human", "man", "woman", "boy", "girl", "child"]):
|
121 |
logger.warning(f"Image prompt '{v[:50]}...' may contain human descriptions. Relying on API-level controls & prompt instructions.")
|
122 |
return v
|
|
|
131 |
timelines: List[Timeline] = Field(..., min_items=1, description="List of generated timelines.")
|
132 |
total_scenes_per_timeline: int = Field(..., gt=0, description="The requested number of scenes per timeline.")
|
133 |
|
134 |
+
# Pydantic v2 style model validator
|
135 |
+
@model_validator(mode='after') # Use 'after' to access validated fields
|
136 |
+
def check_timeline_segment_count(self) -> 'ChronoWeaveResponse':
|
137 |
+
expected_scenes = self.total_scenes_per_timeline
|
138 |
+
for i, timeline in enumerate(self.timelines):
|
139 |
+
if len(timeline.segments) != expected_scenes:
|
140 |
+
raise ValueError(f"Timeline {i} (ID: {timeline.timeline_id}) has {len(timeline.segments)} segments, but expected {expected_scenes}.")
|
141 |
+
return self
|
|
|
142 |
|
143 |
# --- Helper Functions ---
|
144 |
|
|
|
154 |
yield wf
|
155 |
except Exception as e:
|
156 |
logger.error(f"Error opening/configuring wave file {filename}: {e}")
|
157 |
+
raise
|
158 |
finally:
|
159 |
if wf:
|
160 |
try:
|
|
|
164 |
|
165 |
|
166 |
async def generate_audio_live_async(api_text: str, output_filename: str, voice: Optional[str] = None) -> Optional[str]:
|
167 |
+
"""Generates audio using Gemini Live API (async version) via the GenerativeModel."""
|
|
|
|
|
|
|
168 |
collected_audio = bytearray()
|
169 |
+
task_id = os.path.basename(output_filename).split('.')[0]
|
170 |
logger.info(f"ποΈ [{task_id}] Requesting audio for: '{api_text[:60]}...'")
|
171 |
|
172 |
try:
|
|
|
173 |
config = {
|
174 |
"response_modalities": ["AUDIO"],
|
175 |
"audio_config": {
|
176 |
+
"audio_encoding": "LINEAR16",
|
177 |
"sample_rate_hertz": AUDIO_SAMPLING_RATE,
|
|
|
178 |
}
|
179 |
}
|
|
|
|
|
180 |
directive_prompt = (
|
181 |
"Narrate the following sentence directly and engagingly. "
|
182 |
+
"Do not add any introductory or concluding remarks. "
|
183 |
"Speak only the sentence itself:\n\n"
|
184 |
f'"{api_text}"'
|
185 |
)
|
186 |
|
|
|
187 |
async with live_model.connect(config=config) as session:
|
188 |
await session.send_request([directive_prompt])
|
189 |
async for response in session.stream_content():
|
|
|
199 |
st.warning(f"No audio data generated for scene {task_id}.", icon="π")
|
200 |
return None
|
201 |
|
|
|
202 |
with wave_file_writer(output_filename, rate=AUDIO_SAMPLING_RATE) as wf:
|
203 |
wf.writeframes(bytes(collected_audio))
|
204 |
logger.info(f" β
[{task_id}] Audio saved: {os.path.basename(output_filename)} ({len(collected_audio)} bytes)")
|
205 |
return output_filename
|
206 |
|
207 |
except genai.types.generation_types.BlockedPromptException as bpe:
|
208 |
+
logger.error(f" β [{task_id}] Audio generation blocked: {bpe}")
|
209 |
+
st.error(f"Audio generation blocked for scene {task_id}.", icon="π")
|
210 |
return None
|
211 |
except Exception as e:
|
212 |
+
logger.exception(f" β [{task_id}] Audio generation failed unexpectedly: {e}")
|
213 |
st.error(f"Audio generation failed for scene {task_id}: {e}", icon="π")
|
214 |
return None
|
215 |
|
|
|
220 |
num_timelines: int,
|
221 |
divergence_prompt: str = ""
|
222 |
) -> Optional[ChronoWeaveResponse]:
|
223 |
+
"""Generates branching story sequences using Gemini structured output and validates with Pydantic."""
|
|
|
|
|
|
|
224 |
st.info(f"π Generating {num_timelines} timeline(s) x {num_scenes} scenes for theme: '{theme}'...")
|
225 |
logger.info(f"Requesting story structure: Theme='{theme}', Timelines={num_timelines}, Scenes={num_scenes}")
|
226 |
|
|
|
230 |
f"Clearly state the divergence reason for each timeline (except potentially the first)."
|
231 |
)
|
232 |
|
233 |
+
# Updated prompt with stricter guidance on description lengths
|
234 |
prompt = f"""
|
235 |
Act as an expert narrative designer specializing in short, visual, branching stories for children.
|
236 |
Create a story based on the core theme: "{theme}".
|
|
|
242 |
4. {divergence_instruction}
|
243 |
5. Maintain a consistent visual style across all scenes and timelines: **'Simple, friendly kids animation style with bright colors and rounded shapes'**, unless a `timeline_visual_modifier` subtly alters it.
|
244 |
6. Each scene's narration (`audio_text`) should be a single, concise sentence (approx. 5-10 seconds spoken length, max 30 words).
|
245 |
+
7. Image prompts (`image_prompt`) should be descriptive **and concise (target 15-35 words MAXIMUM)**, focusing only on the non-human character(s), setting, action, and essential visual style elements for *this specific scene*. Explicitly mention the main character(s) for consistency. **Do NOT repeat the general style description** unless essential for a specific visual change.
|
246 |
+
8. `character_description` should **very briefly** describe recurring non-human characters mentioned *in the specific scene's image prompt* (name, key visual features). **Keep descriptions extremely concise (e.g., "Nutsy: fluffy squirrel"). Aim for under 20 words total per scene.** Keep consistent within a timeline.
|
247 |
|
248 |
**Output Format:**
|
249 |
Respond ONLY with a valid JSON object adhering strictly to the provided schema. Do not include any text before or after the JSON object.
|
250 |
|
251 |
**JSON Schema:**
|
252 |
```json
|
253 |
+
{json.dumps(ChronoWeaveResponse.model_json_schema(), indent=2)}
|
254 |
```
|
255 |
+
""" # Using .model_json_schema() for Pydantic v2.
|
256 |
|
257 |
try:
|
|
|
258 |
response = client_standard.generate_content(
|
259 |
contents=prompt,
|
260 |
generation_config=genai.types.GenerationConfig(
|
|
|
263 |
)
|
264 |
)
|
265 |
|
|
|
266 |
try:
|
267 |
raw_data = json.loads(response.text)
|
268 |
except json.JSONDecodeError as json_err:
|
269 |
logger.error(f"Failed to decode JSON response: {json_err}")
|
270 |
logger.error(f"Problematic Response Text:\n{response.text}")
|
271 |
+
st.error(f"π¨ Failed to parse story structure from AI: {json_err}", icon="π")
|
272 |
st.text_area("Problematic AI Response:", response.text, height=200)
|
273 |
return None
|
274 |
except Exception as e:
|
|
|
276 |
st.error(f"π¨ Error processing AI response: {e}", icon="π")
|
277 |
return None
|
278 |
|
|
|
279 |
try:
|
280 |
+
# Use model_validate for Pydantic v2
|
281 |
+
validated_data = ChronoWeaveResponse.model_validate(raw_data)
|
282 |
logger.info("β
Story structure generated and validated successfully!")
|
283 |
st.success("β
Story structure generated and validated!")
|
284 |
return validated_data
|
|
|
286 |
logger.error(f"JSON structure validation failed: {val_err}")
|
287 |
logger.error(f"Received Data:\n{json.dumps(raw_data, indent=2)}")
|
288 |
st.error(f"π¨ The generated story structure is invalid: {val_err}", icon="π§¬")
|
289 |
+
st.json(raw_data)
|
290 |
return None
|
291 |
|
292 |
except genai.types.generation_types.BlockedPromptException as bpe:
|
293 |
logger.error(f"Story generation prompt blocked: {bpe}")
|
294 |
+
st.error("π¨ Story generation prompt blocked (safety filters). Try rephrasing.", icon="π«")
|
295 |
return None
|
296 |
except Exception as e:
|
297 |
logger.exception("Error during story sequence generation:")
|
298 |
+
st.error(f"π¨ Unexpected error during story generation: {e}", icon="π₯")
|
299 |
return None
|
300 |
|
301 |
|
302 |
def generate_image_imagen(prompt: str, aspect_ratio: str = "1:1", task_id: str = "IMG") -> Optional[Image.Image]:
|
303 |
+
"""Generates an image using Imagen via the standard client."""
|
|
|
|
|
|
|
304 |
logger.info(f"πΌοΈ [{task_id}] Requesting image for: '{prompt[:70]}...' (Aspect: {aspect_ratio})")
|
305 |
|
|
|
|
|
306 |
full_prompt = (
|
307 |
f"Generate an image in a child-friendly, simple animation style with bright colors and rounded shapes. "
|
308 |
f"Ensure absolutely NO humans or human-like figures are present. Focus on animals or objects. "
|
309 |
f"Aspect ratio should be {aspect_ratio}. "
|
310 |
+
f"Scene Description: {prompt}"
|
311 |
)
|
312 |
|
313 |
try:
|
|
|
321 |
image_bytes = None
|
322 |
safety_ratings = []
|
323 |
block_reason = None
|
324 |
+
finish_reason = None # Store finish reason if available
|
325 |
|
326 |
+
# Consolidate response checking
|
327 |
if hasattr(response, 'candidates') and response.candidates:
|
328 |
candidate = response.candidates[0]
|
329 |
+
if hasattr(candidate, 'finish_reason'):
|
330 |
+
finish_reason = candidate.finish_reason.name if hasattr(candidate.finish_reason, 'name') else str(candidate.finish_reason)
|
331 |
if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
332 |
part = candidate.content.parts[0]
|
333 |
if hasattr(part, 'inline_data') and part.inline_data and hasattr(part.inline_data, 'data'):
|
334 |
image_bytes = part.inline_data.data
|
335 |
if hasattr(candidate, 'safety_ratings'):
|
336 |
safety_ratings = candidate.safety_ratings
|
|
|
|
|
337 |
|
|
|
338 |
if hasattr(response, 'prompt_feedback') and response.prompt_feedback:
|
339 |
+
if hasattr(response.prompt_feedback, 'block_reason') and response.prompt_feedback.block_reason.name != 'BLOCK_REASON_UNSPECIFIED':
|
340 |
+
block_reason = response.prompt_feedback.block_reason.name
|
341 |
if hasattr(response.prompt_feedback, 'safety_ratings'):
|
|
|
342 |
safety_ratings.extend(response.prompt_feedback.safety_ratings)
|
343 |
|
344 |
|
|
|
346 |
try:
|
347 |
image = Image.open(BytesIO(image_bytes))
|
348 |
logger.info(f" β
[{task_id}] Image generated successfully.")
|
|
|
349 |
filtered_ratings = [f"{r.category.name}: {r.probability.name}" for r in safety_ratings if hasattr(r,'probability') and r.probability.name != 'NEGLIGIBLE']
|
350 |
if filtered_ratings:
|
351 |
+
logger.warning(f" β οΈ [{task_id}] Image flagged by safety filters: {', '.join(filtered_ratings)}.")
|
352 |
+
st.warning(f"Image for scene {task_id} flagged: {', '.join(filtered_ratings)}", icon="β οΈ")
|
353 |
return image
|
354 |
except Exception as img_err:
|
355 |
logger.error(f" β [{task_id}] Failed to decode generated image data: {img_err}")
|
356 |
st.warning(f"Failed to decode image data for scene {task_id}.", icon="πΌοΈ")
|
357 |
return None
|
358 |
else:
|
359 |
+
# Enhanced logging for failure reasons
|
360 |
+
fail_reason = "Unknown reason."
|
361 |
if block_reason:
|
362 |
+
fail_reason = f"Blocked (Reason: {block_reason})."
|
363 |
+
elif finish_reason and finish_reason not in ['STOP', 'FINISH_REASON_UNSPECIFIED']:
|
364 |
+
fail_reason = f"Finished early (Reason: {finish_reason})."
|
365 |
else:
|
|
|
366 |
filtered_ratings = [f"{r.category.name}: {r.probability.name}" for r in safety_ratings if hasattr(r,'probability') and r.probability.name != 'NEGLIGIBLE']
|
367 |
if filtered_ratings:
|
368 |
+
fail_reason = f"Safety filters triggered: {', '.join(filtered_ratings)}."
|
369 |
+
|
370 |
+
logger.warning(f" β οΈ [{task_id}] No image data received. Reason: {fail_reason} Prompt: '{prompt[:70]}...'")
|
371 |
+
st.warning(f"No image data received for scene {task_id}. Reason: {fail_reason}", icon="πΌοΈ")
|
372 |
+
# Uncomment to log full response only on unknown failures
|
373 |
+
# if fail_reason == "Unknown reason.":
|
374 |
+
# logger.debug(f" β οΈ [{task_id}] Full API response object: {response}")
|
375 |
return None
|
376 |
|
377 |
except genai.types.generation_types.BlockedPromptException as bpe:
|
378 |
logger.error(f" β [{task_id}] Image generation blocked (exception): {bpe}")
|
379 |
+
st.error(f"Image generation blocked for scene {task_id} (exception).", icon="π«")
|
380 |
return None
|
381 |
except Exception as e:
|
382 |
+
logger.exception(f" β [{task_id}] Image generation failed unexpectedly: {e}")
|
383 |
st.error(f"Image generation failed for scene {task_id}: {e}", icon="πΌοΈ")
|
384 |
return None
|
385 |
|
|
|
387 |
# --- Streamlit UI Elements ---
|
388 |
st.sidebar.header("βοΈ Configuration")
|
389 |
|
|
|
390 |
if GOOGLE_API_KEY:
|
391 |
st.sidebar.success("Google API Key Loaded", icon="β
")
|
392 |
else:
|
393 |
st.sidebar.error("Google API Key Missing!", icon="π¨")
|
394 |
|
|
|
395 |
theme = st.sidebar.text_input("π Story Theme:", "A curious squirrel finds a mysterious, glowing acorn")
|
396 |
+
num_scenes = st.sidebar.slider("π¬ Scenes per Timeline:", min_value=2, max_value=7, value=3)
|
397 |
+
num_timelines = st.sidebar.slider("πΏ Number of Timelines:", min_value=1, max_value=4, value=2)
|
398 |
+
divergence_prompt = st.sidebar.text_input("βοΈ Divergence Hint (Optional):", placeholder="e.g., What if a bird tried to steal it?")
|
399 |
|
|
|
400 |
st.sidebar.subheader("π¨ Visual & Audio Settings")
|
401 |
+
aspect_ratio = st.sidebar.selectbox("πΌοΈ Image Aspect Ratio:", ["1:1", "16:9", "9:16"], index=0)
|
402 |
+
audio_voice = None
|
403 |
|
404 |
generate_button = st.sidebar.button("β¨ Generate ChronoWeave β¨", type="primary", disabled=(not GOOGLE_API_KEY), use_container_width=True)
|
405 |
|
406 |
st.sidebar.markdown("---")
|
407 |
+
st.sidebar.info("β³ Generation can take several minutes.", icon="β³")
|
408 |
st.sidebar.markdown(f"<small>Models: Text={TEXT_MODEL_ID}, Image={IMAGE_MODEL_ID}, Audio={AUDIO_MODEL_ID}</small>", unsafe_allow_html=True)
|
409 |
|
410 |
|
|
|
413 |
if not theme:
|
414 |
st.error("Please enter a story theme in the sidebar.", icon="π")
|
415 |
else:
|
416 |
+
run_id = str(uuid.uuid4()).split('-')[0]
|
|
|
417 |
temp_dir = os.path.join(TEMP_DIR_BASE, f"run_{run_id}")
|
418 |
try:
|
419 |
os.makedirs(temp_dir, exist_ok=True)
|
|
|
422 |
st.error(f"π¨ Failed to create temporary directory {temp_dir}: {e}", icon="π")
|
423 |
st.stop()
|
424 |
|
425 |
+
final_video_paths = {}
|
426 |
+
generation_errors = {}
|
427 |
|
428 |
# --- 1. Generate Narrative Structure ---
|
429 |
chrono_response: Optional[ChronoWeaveResponse] = None
|
|
|
431 |
chrono_response = generate_story_sequence_chrono(theme, num_scenes, num_timelines, divergence_prompt)
|
432 |
|
433 |
if chrono_response:
|
|
|
|
|
|
|
434 |
# --- 2. Process Each Timeline ---
|
435 |
overall_start_time = time.time()
|
436 |
+
all_timelines_successful = True
|
437 |
|
438 |
with st.status("Generating assets and composing videos...", expanded=True) as status:
|
|
|
439 |
for timeline_index, timeline in enumerate(chrono_response.timelines):
|
440 |
timeline_id = timeline.timeline_id
|
441 |
divergence = timeline.divergence_reason
|
|
|
458 |
status.update(label=status_message)
|
459 |
st.markdown(f"--- **Scene {scene_id + 1} ({task_id})** ---")
|
460 |
logger.info(status_message)
|
|
|
461 |
scene_has_error = False
|
462 |
|
463 |
+
st.write(f" *Image Prompt:* {segment.image_prompt}" + (f" *(Mod: {segment.timeline_visual_modifier})*" if segment.timeline_visual_modifier else ""))
|
464 |
st.write(f" *Audio Text:* {segment.audio_text}")
|
465 |
|
466 |
# --- 2a. Image Generation ---
|
467 |
generated_image: Optional[Image.Image] = None
|
468 |
with st.spinner(f"[{task_id}] Generating image... π¨"):
|
469 |
+
combined_prompt = segment.image_prompt
|
470 |
+
if segment.character_description:
|
|
|
471 |
combined_prompt += f" Featuring: {segment.character_description}"
|
472 |
if segment.timeline_visual_modifier:
|
473 |
combined_prompt += f" Style hint: {segment.timeline_visual_modifier}."
|
|
|
480 |
temp_image_files[scene_id] = image_path
|
481 |
st.image(generated_image, width=180, caption=f"Scene {scene_id+1} Image")
|
482 |
except Exception as e:
|
483 |
+
logger.error(f" β [{task_id}] Failed to save image: {e}")
|
484 |
st.error(f"Failed to save image for scene {task_id}.", icon="πΎ")
|
485 |
scene_has_error = True
|
486 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image save failed.")
|
487 |
else:
|
488 |
+
# Error/warning already logged by generate_image_imagen
|
489 |
scene_has_error = True
|
490 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Image generation failed.")
|
491 |
continue
|
|
|
500 |
generate_audio_live_async(segment.audio_text, audio_path_temp, audio_voice)
|
501 |
)
|
502 |
except RuntimeError as e:
|
503 |
+
logger.error(f" β [{task_id}] Asyncio runtime error: {e}")
|
504 |
+
st.error(f"Asyncio error during audio gen for {task_id}: {e}", icon="β‘")
|
505 |
scene_has_error = True
|
506 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio async error.")
|
507 |
except Exception as e:
|
508 |
+
logger.exception(f" β [{task_id}] Unexpected audio error: {e}")
|
509 |
+
st.error(f"Unexpected audio error for {task_id}: {e}", icon="π₯")
|
510 |
scene_has_error = True
|
511 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation error.")
|
512 |
|
|
|
518 |
except Exception as e:
|
519 |
logger.warning(f" β οΈ [{task_id}] Could not display audio preview: {e}")
|
520 |
else:
|
521 |
+
# Error/warning already logged by generate_audio_live_async
|
522 |
scene_has_error = True
|
523 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Audio generation failed.")
|
524 |
if scene_id in temp_image_files and os.path.exists(temp_image_files[scene_id]):
|
525 |
+
try: os.remove(temp_image_files[scene_id]); logger.info(f" ποΈ [{task_id}] Removed image file due to audio failure."); del temp_image_files[scene_id]
|
526 |
+
except OSError as e: logger.warning(f" β οΈ [{task_id}] Failed to remove image after audio failure: {e}")
|
|
|
|
|
|
|
|
|
527 |
continue
|
528 |
|
529 |
# --- 2c. Create Video Clip ---
|
|
|
531 |
st.write(f" π¬ Creating video clip for Scene {scene_id+1}...")
|
532 |
img_path = temp_image_files[scene_id]
|
533 |
aud_path = temp_audio_files[scene_id]
|
534 |
+
audio_clip_instance, image_clip_instance, composite_clip = None, None, None
|
|
|
|
|
535 |
try:
|
536 |
+
if not os.path.exists(img_path): raise FileNotFoundError(f"Image file missing: {img_path}")
|
537 |
+
if not os.path.exists(aud_path): raise FileNotFoundError(f"Audio file missing: {aud_path}")
|
538 |
|
539 |
audio_clip_instance = AudioFileClip(aud_path)
|
540 |
np_image = np.array(Image.open(img_path))
|
541 |
image_clip_instance = ImageClip(np_image).set_duration(audio_clip_instance.duration)
|
|
|
542 |
composite_clip = image_clip_instance.set_audio(audio_clip_instance)
|
543 |
video_clips.append(composite_clip)
|
544 |
logger.info(f" β
[{task_id}] Video clip created (Duration: {audio_clip_instance.duration:.2f}s).")
|
|
|
546 |
scene_success_count += 1
|
547 |
|
548 |
except Exception as e:
|
549 |
+
logger.exception(f" β [{task_id}] Failed to create video clip: {e}")
|
550 |
st.error(f"Failed to create video clip for {task_id}: {e}", icon="π¬")
|
551 |
scene_has_error = True
|
552 |
generation_errors[timeline_id].append(f"Scene {scene_id+1}: Video clip creation failed.")
|
553 |
if audio_clip_instance: audio_clip_instance.close()
|
554 |
if image_clip_instance: image_clip_instance.close()
|
555 |
+
try:
|
556 |
+
if os.path.exists(img_path): os.remove(img_path)
|
557 |
+
if os.path.exists(aud_path): os.remove(aud_path)
|
558 |
+
except OSError as e_rem: logger.warning(f" β οΈ [{task_id}] Failed to remove files after clip error: {e_rem}")
|
559 |
|
560 |
# --- 2d. Assemble Timeline Video ---
|
561 |
timeline_duration = time.time() - timeline_start_time
|
|
|
567 |
final_timeline_video = None
|
568 |
try:
|
569 |
final_timeline_video = concatenate_videoclips(video_clips, method="compose")
|
570 |
+
final_timeline_video.write_videofile(output_filename, fps=VIDEO_FPS, codec=VIDEO_CODEC, audio_codec=AUDIO_CODEC, logger=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
571 |
final_video_paths[timeline_id] = output_filename
|
572 |
logger.info(f" β
[{timeline_label}] Final video saved: {os.path.basename(output_filename)}")
|
573 |
st.success(f"β
Video for {timeline_label} completed in {timeline_duration:.2f}s.")
|
|
|
574 |
except Exception as e:
|
575 |
logger.exception(f" β [{timeline_label}] Failed to write final video: {e}")
|
576 |
st.error(f"Failed to assemble video for {timeline_label}: {e}", icon="πΌ")
|
577 |
all_timelines_successful = False
|
578 |
generation_errors[timeline_id].append(f"Timeline {timeline_id}: Final video assembly failed.")
|
579 |
finally:
|
580 |
+
logger.debug(f"[{timeline_label}] Closing {len(video_clips)} source clips and final video...")
|
581 |
for i, clip in enumerate(video_clips):
|
582 |
try:
|
583 |
if clip:
|
584 |
if clip.audio: clip.audio.close()
|
585 |
clip.close()
|
586 |
+
except Exception as e_close: logger.warning(f" β οΈ [{timeline_label}] Error closing source clip {i}: {e_close}")
|
|
|
587 |
if final_timeline_video:
|
588 |
try:
|
589 |
if final_timeline_video.audio: final_timeline_video.audio.close()
|
590 |
final_timeline_video.close()
|
591 |
+
except Exception as e_close_final: logger.warning(f" β οΈ [{timeline_label}] Error closing final video object: {e_close_final}")
|
|
|
|
|
592 |
|
593 |
elif not video_clips:
|
594 |
+
logger.warning(f"[{timeline_label}] No video clips generated. Skipping final assembly.")
|
595 |
+
st.warning(f"No scenes processed for {timeline_label}. Video cannot be created.", icon="π«")
|
596 |
all_timelines_successful = False
|
597 |
else: # Some scenes failed
|
598 |
error_count = len(segments) - scene_success_count
|
599 |
+
logger.warning(f"[{timeline_label}] Errors in {error_count} scene(s). Skipping final video assembly.")
|
600 |
st.warning(f"{timeline_label} had errors in {error_count} scene(s). Final video not assembled.", icon="β οΈ")
|
601 |
all_timelines_successful = False
|
602 |
|
|
|
604 |
logger.error(f"Summary of errors in {timeline_label}: {generation_errors[timeline_id]}")
|
605 |
|
606 |
# --- End of Timelines Loop ---
|
|
|
607 |
overall_duration = time.time() - overall_start_time
|
608 |
if all_timelines_successful and final_video_paths:
|
609 |
status_msg = f"ChronoWeave Generation Complete! ({len(final_video_paths)} videos in {overall_duration:.2f}s)"
|
610 |
+
status.update(label=status_msg, state="complete", expanded=False); logger.info(status_msg)
|
|
|
611 |
elif final_video_paths:
|
612 |
+
status_msg = f"ChronoWeave Partially Complete ({len(final_video_paths)} videos, errors occurred). Time: {overall_duration:.2f}s"
|
613 |
+
status.update(label=status_msg, state="warning", expanded=True); logger.warning(status_msg)
|
|
|
614 |
else:
|
615 |
+
status_msg = f"ChronoWeave Generation Failed. No videos produced. Time: {overall_duration:.2f}s"
|
616 |
+
status.update(label=status_msg, state="error", expanded=True); logger.error(status_msg)
|
|
|
617 |
|
618 |
# --- 3. Display Results ---
|
619 |
st.header("π¬ Generated Timelines")
|
|
|
632 |
st.subheader(f"Timeline {timeline_id}")
|
633 |
st.caption(f"Divergence: {reason}")
|
634 |
try:
|
635 |
+
with open(video_path, 'rb') as video_file: video_bytes = video_file.read()
|
|
|
636 |
st.video(video_bytes)
|
637 |
logger.info(f"Displaying video for Timeline {timeline_id}")
|
638 |
+
st.download_button(label=f"Download T{timeline_id} Video", data=video_bytes, file_name=f"chronoweave_timeline_{timeline_id}.mp4", mime="video/mp4", key=f"dl_{timeline_id}")
|
|
|
|
|
|
|
|
|
|
|
|
|
639 |
if generation_errors.get(timeline_id):
|
640 |
+
with st.expander(f"β οΈ View {len(generation_errors[timeline_id])} Issues"):
|
641 |
+
for error_msg in generation_errors[timeline_id]: st.warning(f"- {error_msg}")
|
|
|
|
|
642 |
except FileNotFoundError:
|
643 |
+
logger.error(f"Video file not found for display: {video_path}")
|
644 |
+
st.error(f"Error: Video file missing for T{timeline_id}.", icon="π¨")
|
645 |
except Exception as e:
|
646 |
logger.exception(f"Could not display video {video_path}: {e}")
|
647 |
+
st.error(f"Error displaying video T{timeline_id}: {e}", icon="π¨")
|
648 |
else:
|
649 |
+
st.warning("No final videos were successfully generated.")
|
650 |
all_errors = [msg for err_list in generation_errors.values() for msg in err_list]
|
651 |
if all_errors:
|
652 |
st.subheader("Summary of Generation Issues")
|
|
|
654 |
for tid, errors in generation_errors.items():
|
655 |
if errors:
|
656 |
st.error(f"Timeline {tid}:")
|
657 |
+
for msg in errors: st.error(f" - {msg}")
|
|
|
658 |
|
659 |
# --- 4. Cleanup ---
|
660 |
st.info(f"Attempting to clean up temporary directory: {temp_dir}")
|
|
|
667 |
st.warning(f"Could not automatically remove temporary files: {temp_dir}. Please remove it manually if needed.", icon="β οΈ")
|
668 |
|
669 |
elif not chrono_response:
|
|
|
670 |
logger.error("Story generation or validation failed, cannot proceed.")
|
671 |
else:
|
672 |
+
st.error("An unexpected issue occurred after story generation.", icon="π")
|
673 |
+
logger.error("Chrono_response existed but was falsy in main logic.")
|
674 |
|
675 |
else:
|
676 |
st.info("Configure settings in the sidebar and click 'β¨ Generate ChronoWeave β¨' to start.")
|