Nick021402 commited on
Commit
3e86d86
Β·
verified Β·
1 Parent(s): ee9cfe3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +696 -0
app.py ADDED
@@ -0,0 +1,696 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
4
+ import cv2 # Used internally by moviepy/pillow if needed, good to have it
5
+ import numpy as np
6
+ from PIL import Image, ImageDraw, ImageFont # PIL for image handling
7
+ import os
8
+ import tempfile
9
+ import zipfile
10
+ import nltk
11
+ from moviepy.editor import ImageClip, CompositeVideoClip, TextClip, vfx # vfx for effects
12
+ import re
13
+ from typing import List, Tuple, Dict
14
+ import gc
15
+ import time
16
+ import shutil # For robust directory cleanup
17
+ from datetime import datetime # For unique zip filenames
18
+
19
+ # Download required NLTK data (punkt tokenizer)
20
+ try:
21
+ nltk.data.find('tokenizers/punkt')
22
+ except LookupError:
23
+ nltk.download('punkt')
24
+
25
+ class StoryVideoGenerator:
26
+ def __init__(self):
27
+ self.device = "cpu" # Explicitly set to CPU for free-tier compatibility
28
+ self.pipe = None
29
+ self.temp_dir = tempfile.mkdtemp() # Create a unique temporary directory
30
+ self.current_seed = 42 # Base seed for consistency across runs
31
+
32
+ # Ensure outputs directory exists (for the final ZIP file)
33
+ os.makedirs("outputs", exist_ok=True)
34
+
35
+ def load_model(self):
36
+ """Load the Stable Diffusion model optimized for CPU"""
37
+ if self.pipe is None:
38
+ print("Loading Stable Diffusion model...")
39
+ model_id = "runwayml/stable-diffusion-v1-5" # Standard Stable Diffusion v1.5
40
+ self.pipe = StableDiffusionPipeline.from_pretrained(
41
+ model_id,
42
+ torch_dtype=torch.float32, # Crucial for CPU performance and memory
43
+ safety_checker=None, # Disable for speed and reduced memory
44
+ requires_safety_checker=False # Explicitly ensure safety checker is off
45
+ )
46
+ self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(self.pipe.scheduler.config)
47
+ self.pipe = self.pipe.to(self.device)
48
+ self.pipe.enable_attention_slicing() # Memory optimization
49
+ self.pipe.enable_vae_slicing() # Memory optimization
50
+ self.pipe.enable_sequential_cpu_offload() # Aggressive CPU offload for large models
51
+ print("Stable Diffusion model loaded successfully on CPU.")
52
+
53
+ def segment_story(self, story: str, max_segments: int = 10) -> List[str]:
54
+ """Break story into logical segments for video generation"""
55
+ # Clean the story text
56
+ story = re.sub(r'\s+', ' ', story.strip())
57
+
58
+ # Split into sentences
59
+ sentences = nltk.sent_tokenize(story)
60
+
61
+ segments = []
62
+ current_segment_sentences = []
63
+ current_word_count = 0
64
+
65
+ # Aim for segments of roughly 25-35 words for 10 seconds of video.
66
+ # A common speaking rate is 120-150 words per minute, so ~20-25 words per 10 seconds.
67
+ # We'll use 30 words as a target, allowing for some flexibility.
68
+ words_per_segment_target = 30
69
+
70
+ for sentence in sentences:
71
+ sentence_word_count = len(sentence.split())
72
+
73
+ # If adding this sentence exceeds the target AND we already have some content,
74
+ # finalize the current segment and start a new one.
75
+ if current_word_count + sentence_word_count > words_per_segment_target and current_segment_sentences:
76
+ segments.append(' '.join(current_segment_sentences))
77
+ current_segment_sentences = [sentence]
78
+ current_word_count = sentence_word_count
79
+ else:
80
+ # Add sentence to current segment
81
+ current_segment_sentences.append(sentence)
82
+ current_word_count += sentence_word_count
83
+
84
+ # Add any remaining segment
85
+ if current_segment_sentences:
86
+ segments.append(' '.join(current_segment_sentences))
87
+
88
+ # Limit to max_segments to prevent excessively long generation times
89
+ if len(segments) > max_segments:
90
+ print(f"Warning: Story has {len(segments)} segments, truncating to {max_segments}.")
91
+ segments = segments[:max_segments]
92
+
93
+ return segments
94
+
95
+ def create_prompt(self, segment_text: str, character_desc: str, style: str, segment_num: int) -> Tuple[str, str]:
96
+ """Create optimized prompt and negative prompt for image generation"""
97
+ # Extract key elements from segment
98
+ actions = self.extract_actions(segment_text)
99
+ location = self.extract_location(segment_text)
100
+ mood = self.extract_mood(segment_text)
101
+
102
+ # Define style mapping for diverse visuals
103
+ style_map = {
104
+ "cartoon": "vibrant cartoon style, clean lines, expressive, playful, children's book illustration",
105
+ "realistic": "photorealistic, cinematic still, highly detailed, realistic textures, natural volumetric lighting, lifelike",
106
+ "fantasy": "epic fantasy art, magical realism, ethereal, grand, mythical creatures, enchanted forest, dramatic lighting",
107
+ "digital_art": "digital painting, concept art, rich textures, detailed brushwork, high resolution, professional illustration",
108
+ "anime": "anime style, expressive, dynamic poses, cel-shaded, vibrant colors, Japanese animation aesthetic, detailed eyes"
109
+ }
110
+ selected_style = style_map.get(style, "highly detailed, artistic, professional illustration")
111
+
112
+ # Build comprehensive prompt
113
+ prompt_parts = []
114
+
115
+ # Character description first for consistency emphasis and core subject
116
+ if character_desc:
117
+ prompt_parts.append(f"A single {character_desc}")
118
+ else:
119
+ prompt_parts.append(f"A person")
120
+
121
+ # Add action and location, ensuring they blend with the character
122
+ if actions:
123
+ prompt_parts.append(f"is {actions}")
124
+ if location:
125
+ prompt_parts.append(f"in {location}")
126
+
127
+ # Include the original segment text for additional context for the AI
128
+ prompt_parts.append(f"Scene depicts: {segment_text}")
129
+
130
+ # Add mood last, to influence atmosphere
131
+ if mood:
132
+ prompt_parts.append(f"with a {mood} atmosphere")
133
+
134
+ prompt_parts.append(selected_style)
135
+ prompt_parts.append("masterpiece, best quality, ultra detailed, 8k, volumetric lighting, rich color, film still, professional")
136
+
137
+ final_prompt = ", ".join([p for p in prompt_parts if p and p.strip() != ''])
138
+
139
+ # Comprehensive negative prompt to avoid common Stable Diffusion flaws
140
+ negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, extra limbs, missing limbs, poorly drawn hands, poorly drawn feet, out of frame, tiling, watermark, signature, text, noisy, grainy, blurred, disfigured, monochrome, grayscale, low resolution, bad composition, amateur, multiple characters, crowd, duplicate, unrealistic, abstract, painting, drawing, cartoon, sketch, render, CGI, 3D"
141
+
142
+ return final_prompt, negative_prompt
143
+
144
+ def extract_actions(self, text: str) -> str:
145
+ """Extract main actions from text segment (improved with more variety)"""
146
+ action_keywords = {
147
+ 'walk': 'walking gracefully', 'run': 'running swiftly', 'sit': 'sitting peacefully',
148
+ 'stand': 'standing still', 'look': 'looking intently', 'smile': 'smiling brightly',
149
+ 'cry': 'crying sadly', 'laugh': 'laughing joyfully', 'jump': 'jumping high',
150
+ 'sleep': 'sleeping soundly', 'eat': 'eating thoughtfully', 'drink': 'drinking refreshing water',
151
+ 'fight': 'fighting bravely', 'talk': 'talking animatedly', 'discover': 'discovering something new',
152
+ 'explore': 'exploring cautiously', 'fly': 'flying majestically', 'venture': 'venturing forward',
153
+ 'encounter': 'encountering a challenge', 'approach': 'approaching with caution',
154
+ 'read': 'reading a book', 'write': 'writing in a journal', 'think': 'thinking deeply',
155
+ 'observe': 'observing quietly', 'listen': 'listening attentively', 'create': 'creating something',
156
+ 'destroy': 'destroying something', 'hide': 'hiding stealthily', 'search': 'searching diligently'
157
+ }
158
+
159
+ text_lower = text.lower()
160
+ found_actions = []
161
+
162
+ for keyword, description in action_keywords.items():
163
+ if keyword in text_lower:
164
+ found_actions.append(description)
165
+
166
+ return ', '.join(found_actions[:3]) if found_actions else "engaging with the environment" # Limit to 3 actions
167
+
168
+ def extract_location(self, text: str) -> str:
169
+ """Extract location/setting from text segment (improved with specific descriptions)"""
170
+ location_keywords = {
171
+ 'forest': 'a dense, ancient forest', 'castle': 'a grand, medieval castle', 'room': 'a cozy, well-lit room',
172
+ 'kitchen': 'a rustic, warm kitchen', 'garden': 'a vibrant, blooming garden', 'street': 'a bustling city street',
173
+ 'house': 'a quaint, welcoming house', 'mountain': 'a majestic, snow-capped mountain peak', 'beach': 'a serene, sandy beach at sunset',
174
+ 'city': 'a futuristic, neon-lit city', 'village': 'a charming, peaceful village', 'school': 'a busy school hallway',
175
+ 'office': 'a modern, minimalist office', 'park': 'a green, expansive park with trees', 'library': 'a quiet, old library filled with books',
176
+ 'store': 'a busy, colorful general store', 'restaurant': 'a lively, elegant restaurant', 'hospital': 'a sterile hospital corridor',
177
+ 'church': 'an old, gothic church', 'bridge': 'an ancient stone bridge over a river', 'cave': 'a mysterious, dimly lit cave',
178
+ 'desert': 'a vast, arid desert landscape', 'ocean': 'a deep blue ocean surface', 'space': 'the vastness of outer space',
179
+ 'ship': 'a large sailing ship on the sea', 'train': 'inside a moving train carriage', 'plane': 'inside an airplane cockpit'
180
+ }
181
+
182
+ text_lower = text.lower()
183
+
184
+ for keyword, description in location_keywords.items():
185
+ if keyword in text_lower:
186
+ return description
187
+
188
+ return "a richly detailed background setting" # More descriptive default if no specific location found
189
+
190
+ def extract_mood(self, text: str) -> str:
191
+ """Extract mood/atmosphere from text segment (improved with evocative descriptions)"""
192
+ mood_keywords = {
193
+ 'happy': 'joyful and uplifting, vibrant and cheerful lighting', 'sad': 'somber and melancholic, muted colors, soft lighting',
194
+ 'scary': 'ominous and frightening, dark shadows, dramatic contrasts', 'exciting': 'energetic and thrilling, dynamic motion, vibrant colors',
195
+ 'peaceful': 'serene and tranquil, soft, diffused lighting, calm atmosphere', 'angry': 'tense and dramatic, strong contrasts, dark clouds',
196
+ 'mysterious': 'enigmatic and suspenseful, foggy, low key lighting, hidden elements', 'calm': 'calm and quiet, still, harmonious',
197
+ 'gloomy': 'dark and oppressive, rainy, desolate', 'joyful': 'radiant with happiness, sparkling light',
198
+ 'adventure': 'adventurous and daring, sense of discovery, wide open spaces'
199
+ }
200
+
201
+ text_lower = text.lower()
202
+
203
+ for mood, description in mood_keywords.items():
204
+ if mood in text_lower:
205
+ return description
206
+
207
+ return "a fitting atmosphere" # Default for a general mood
208
+
209
+ def generate_image(self, prompt: str, negative_prompt: str, segment_num: int) -> Image.Image:
210
+ """Generate image for a story segment"""
211
+ # Use consistent base seed for character consistency, adjusted per segment
212
+ seed = self.current_seed + segment_num
213
+ generator = torch.Generator(device=self.device).manual_seed(seed)
214
+
215
+ # Generate image
216
+ print(f"Generating image with prompt: {prompt[:150]}...")
217
+ with torch.no_grad(): # Disable gradient calculations for inference
218
+ # Use autocast for mixed precision, even on CPU, it provides a consistent interface
219
+ with torch.autocast(device_type="cpu"):
220
+ result = self.pipe(
221
+ prompt=prompt,
222
+ negative_prompt=negative_prompt,
223
+ num_inference_steps=25, # Moderate steps for quality
224
+ guidance_scale=8.0, # Increased for stronger adherence to prompt
225
+ generator=generator,
226
+ height=512,
227
+ width=512
228
+ )
229
+
230
+ return result.images[0]
231
+
232
+ def create_video_clip(self, image: Image.Image, text: str, duration: int = 10) -> str:
233
+ """Create a video clip from image with text overlay and motion"""
234
+
235
+ # Resize image to 512x512 if it's not already (ensures consistent video size)
236
+ image = image.resize((512, 512), Image.Resampling.LANCZOS) # Use LANCZOS for high quality resizing
237
+
238
+ # Convert PIL Image to NumPy array for MoviePy
239
+ img_array = np.array(image)
240
+
241
+ # Create ImageClip from NumPy array
242
+ clip = ImageClip(img_array, duration=duration)
243
+
244
+ # Add subtle Ken Burns effect (zoom + pan)
245
+ # Zoom from 1.0 to 1.15 over the duration
246
+ clip = clip.fx(vfx.resize, lambda t: 1 + 0.15 * t / duration)
247
+
248
+ # Subtly pan (e.g., from top-left to bottom-right or vice-versa)
249
+ # This is a fixed slight pan that goes over the duration of the clip
250
+ start_x_offset = 0.05
251
+ start_y_offset = 0.05
252
+
253
+ clip = clip.fx(vfx.scroll, w=clip.w, h=clip.h, x_speed=lambda t: start_x_offset * clip.w / duration, y_speed=lambda t: start_y_offset * clip.h / duration)
254
+
255
+ # Create text overlay using MoviePy's TextClip
256
+ try:
257
+ # Look for common font paths on Linux systems
258
+ font_path_for_moviepy = None
259
+ for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
260
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
261
+ "/usr/share/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", # Some systems have it here
262
+ "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # Another common path for Arial
263
+ "/usr/share/fonts/truetype/arial.ttf"]: # Try Arial too
264
+ if os.path.exists(p):
265
+ font_path_for_moviepy = p
266
+ break
267
+
268
+ if font_path_for_moviepy:
269
+ # Use a larger font size that scales with 512x512 video
270
+ text_clip = TextClip(
271
+ text,
272
+ fontsize=30, # Increased font size for readability
273
+ color='white',
274
+ stroke_color='black',
275
+ stroke_width=2, # Stronger outline
276
+ font=font_path_for_moviepy, # Specify font file
277
+ method='caption', # For better text wrapping
278
+ size=(clip.w * 0.9, None) # 90% width for text wrapping
279
+ ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
280
+ else:
281
+ print("Warning: Could not find system font for MoviePy, using default 'sans' font.")
282
+ text_clip = TextClip(
283
+ text,
284
+ fontsize=26,
285
+ color='white',
286
+ stroke_color='black',
287
+ stroke_width=2,
288
+ font='sans', # MoviePy default sans-serif font
289
+ method='caption',
290
+ size=(clip.w * 0.9, None)
291
+ ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
292
+
293
+ except Exception as e:
294
+ print(f"Error creating MoviePy TextClip with specific font: {e}. Falling back to generic font.")
295
+ text_clip = TextClip(
296
+ text,
297
+ fontsize=26,
298
+ color='white',
299
+ stroke_color='black',
300
+ stroke_width=2,
301
+ font='sans',
302
+ method='caption',
303
+ size=(clip.w * 0.9, None)
304
+ ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
305
+
306
+
307
+ # Composite video with text
308
+ final_clip = CompositeVideoClip([clip, text_clip])
309
+
310
+ # Export video
311
+ # Using a unique filename with PID to avoid conflicts if multiple runs happen very fast
312
+ output_path = os.path.join(self.temp_dir, f"segment_{int(time.time())}_{os.getpid()}.mp4")
313
+
314
+ print(f"Exporting video to {output_path}...")
315
+ final_clip.write_videofile(
316
+ output_path,
317
+ fps=24, # Standard FPS for smooth playback
318
+ codec='libx264', # Common codec for MP4, good compatibility
319
+ audio=False, # No audio as per requirements to save CPU
320
+ verbose=False, # Suppress verbose MoviePy output
321
+ logger=None, # Suppress MoviePy logger output
322
+ preset='medium' # 'medium' preset for balance of speed and quality on CPU
323
+ )
324
+ print(f"Video exported to {output_path}")
325
+
326
+ # Close clips to free resources, crucial for MoviePy
327
+ clip.close()
328
+ text_clip.close()
329
+ final_clip.close()
330
+
331
+ return output_path
332
+
333
+ def cleanup(self):
334
+ """Clean up temporary files and directories"""
335
+ print(f"Cleaning up temporary directory: {self.temp_dir}")
336
+ if os.path.exists(self.temp_dir):
337
+ shutil.rmtree(self.temp_dir)
338
+ self.temp_dir = tempfile.mkdtemp() # Create a new temporary directory for next run
339
+
340
+ # Also clean the 'outputs' directory for old zip files to prevent disk overuse
341
+ output_files = os.listdir("outputs")
342
+ for f in output_files:
343
+ if f.endswith(".zip"):
344
+ file_path = os.path.join("outputs", f)
345
+ try:
346
+ os.remove(file_path)
347
+ print(f"Removed old zip file: {file_path}")
348
+ except Exception as e:
349
+ print(f"Error removing old zip file {file_path}: {e}")
350
+
351
+ # Clear memory caches (important for CPU models too)
352
+ if torch.cuda.is_available(): # Check if CUDA is actually available before calling
353
+ torch.cuda.empty_cache()
354
+ gc.collect() # Trigger Python's garbage collector for general memory cleanup
355
+
356
+ # Initialize generator globally
357
+ generator = StoryVideoGenerator()
358
+
359
+ def process_story_gradio(story_text: str, character_description: str, style: str, progress=gr.Progress()):
360
+ """
361
+ Gradio-compatible wrapper function for the main story processing.
362
+ Yields updates for Gradio UI components.
363
+ """
364
+
365
+ generator.cleanup() # Clean up temp files from previous runs at the start of a new request
366
+
367
+ if not story_text.strip():
368
+ # Yield initial empty state and error message
369
+ yield (
370
+ "Please enter a story to generate videos.",
371
+ "<p>No story provided.</p>",
372
+ None, # No video
373
+ [], # Empty gallery
374
+ None # No zip
375
+ )
376
+ return
377
+
378
+ try:
379
+ # Load model if not already loaded (this is optimized to run once per Space lifecycle)
380
+ progress(0, desc="Initializing AI model... (This happens once after Space starts or resets)")
381
+ generator.load_model()
382
+
383
+ # Segment the story
384
+ progress(0.05, desc="Analyzing story structure and preparing segments...")
385
+ segments = generator.segment_story(story_text)
386
+
387
+ if not segments:
388
+ yield (
389
+ "Error: Could not segment the story. Please try a longer or more detailed story.",
390
+ "<p>Story could not be segmented. Please ensure it has enough content.</p>",
391
+ None,
392
+ [],
393
+ None
394
+ )
395
+ return
396
+
397
+ total_segments = len(segments)
398
+ initial_status_message = f"Story analyzed! Will generate {total_segments} video segments (approx. {total_segments * 10} seconds total)."
399
+
400
+ # Initial yield: show segment count
401
+ yield (
402
+ initial_status_message,
403
+ f"""
404
+ <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
405
+ <p>Story will be broken into <strong>{total_segments} segments</strong>.</p>
406
+ <p>Starting video generation now...</p>
407
+ </div>
408
+ """,
409
+ None, # No video yet
410
+ [], # Empty gallery
411
+ None # No zip yet
412
+ )
413
+
414
+ # Generate a base seed for overall character consistency across segments
415
+ # Using a hash of both character description and the story for more unique runs
416
+ generator.current_seed = abs(hash(character_description.strip() + story_text.strip())) % (2**32 - 1)
417
+
418
+ generated_video_paths = []
419
+ generated_image_paths_for_gallery = []
420
+
421
+ for i, segment_text in enumerate(segments):
422
+ segment_idx = i + 1
423
+
424
+ # --- Step 1: Update status and show current prompt details ---
425
+ current_status_message = f"Processing segment {segment_idx} of {total_segments}..."
426
+ progress(0.1 + (0.8 * (i / total_segments)), desc=current_status_message) # Progress from 10% to 90%
427
+
428
+ prompt, negative_prompt = generator.create_prompt(
429
+ segment_text, character_description, style, i
430
+ )
431
+
432
+ # Prepare HTML for current segment details
433
+ segment_details_html = f"""
434
+ <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
435
+ <h4>Current Story Segment ({segment_idx}/{total_segments}):</h4>
436
+ <p><strong>{segment_text}</strong></p>
437
+ <h4>AI Prompt Used:</h4>
438
+ <p><em>{prompt}</em></p>
439
+ </div>
440
+ """
441
+
442
+ yield (
443
+ current_status_message,
444
+ segment_details_html,
445
+ None, # Still no video for this segment yet
446
+ generated_image_paths_for_gallery.copy(), # Keep existing gallery (important to send a copy)
447
+ None
448
+ )
449
+ time.sleep(0.1) # Small delay for UI update (helps Gradio refresh)
450
+
451
+ # --- Step 2: Generate Image ---
452
+ progress(0.1 + (0.8 * (i / total_segments)) + 0.02, desc=f"Generating image for segment {segment_idx}...")
453
+ image = generator.generate_image(prompt, negative_prompt, i)
454
+
455
+ # Save image for the gallery (important to save to a persistent temp path)
456
+ img_filename = f"segment_{segment_idx}_image_{int(time.time())}.png"
457
+ img_path = os.path.join(generator.temp_dir, img_filename)
458
+ image.save(img_path)
459
+ generated_image_paths_for_gallery.append(img_path)
460
+
461
+ # --- Step 3: Create Video Clip ---
462
+ progress(0.1 + (0.8 * (i / total_segments)) + 0.05, desc=f"Creating video for segment {segment_idx}...")
463
+ video_path = generator.create_video_clip(image, segment_text)
464
+ generated_video_paths.append(video_path)
465
+
466
+ # --- Step 4: Yield current segment's video and updated gallery ---
467
+ current_status_message = f"Segment {segment_idx} of {total_segments} completed! Video ready."
468
+ yield (
469
+ current_status_message,
470
+ segment_details_html, # Retain details HTML
471
+ video_path, # Provide path to the latest video
472
+ generated_image_paths_for_gallery.copy(), # Update gallery with new image (send a copy)
473
+ None
474
+ )
475
+ time.sleep(0.1) # Small delay for UI update
476
+
477
+ # --- Final Step: Generate ZIP file and update final status ---
478
+ progress(0.95, desc="All segments generated. Compiling into a downloadable ZIP file...")
479
+
480
+ # Create a unique zip file name in the 'outputs' directory
481
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
482
+ zip_filename = f"story_videos_{timestamp}.zip"
483
+ final_zip_path = os.path.join("outputs", zip_filename)
484
+
485
+ with zipfile.ZipFile(final_zip_path, 'w') as zipf:
486
+ for idx, vid_path in enumerate(generated_video_paths):
487
+ # Only add if file exists and is not a directory
488
+ if os.path.isfile(vid_path):
489
+ zipf.write(vid_path, os.path.basename(vid_path)) # Add video to zip with just filename
490
+ for idx, img_path in enumerate(generated_image_paths_for_gallery):
491
+ if os.path.isfile(img_path):
492
+ zipf.write(img_path, os.path.basename(img_path)) # Add corresponding image to zip
493
+
494
+ final_status_message = f"βœ… Story video generation complete! All {total_segments} segments generated and available for download."
495
+ progress(1.0, desc="Complete!")
496
+
497
+ yield (
498
+ final_status_message,
499
+ "<p>All segments have been processed. Download the complete ZIP file below!</p>",
500
+ generated_video_paths[-1] if generated_video_paths else None, # Show last video for final preview
501
+ generated_image_paths_for_gallery, # Final state of the gallery
502
+ final_zip_path # Provide the path to the downloadable ZIP
503
+ )
504
+
505
+ except Exception as e:
506
+ import traceback
507
+ print(f"An unexpected error occurred: {e}")
508
+ traceback.print_exc() # Print full traceback for debugging to the logs
509
+ yield (
510
+ f"An error occurred during generation: {str(e)}. Please check your input and try again.",
511
+ "<p>Error during processing. Check logs for details.</p>",
512
+ None,
513
+ [],
514
+ None
515
+ )
516
+ finally:
517
+ generator.cleanup() # Ensure cleanup after completion or error
518
+
519
+ # --- Gradio Interface Definition ---
520
+ def create_interface():
521
+ """Create the Gradio interface"""
522
+
523
+ with gr.Blocks(title="AI Text-to-Video Story Generator", theme=gr.themes.Soft()) as interface:
524
+
525
+ gr.Markdown("""
526
+ # 🎬 AI Text-to-Video Story Generator
527
+
528
+ Transform your written stories into animated video sequences! This tool breaks your story into segments
529
+ and creates a 10-second video clip for each part, maintaining character consistency throughout.
530
+
531
+ **Features:**
532
+ - ✨ Converts text stories to video sequences
533
+ - 🎭 Maintains character consistency across segments
534
+ - 🎨 Multiple art styles available
535
+ - πŸ“± Optimized for free-tier CPU processing
536
+ - πŸ“¦ Download individual clips or complete ZIP package
537
+ """)
538
+
539
+ with gr.Row():
540
+ with gr.Column(scale=2):
541
+ story_input = gr.Textbox(
542
+ label="πŸ“– Your Story",
543
+ placeholder="Enter your story here... (e.g., 'Once upon a time, there was a brave knight named Sir Arthur who lived in a magnificent castle...')",
544
+ lines=8,
545
+ max_lines=15,
546
+ info="Write your complete story here. It will be split into 10-second video segments. Keep it concise for quicker results (e.g., 3-10 sentences)."
547
+ )
548
+
549
+ character_input = gr.Textbox(
550
+ label="πŸ‘€ Main Character Description",
551
+ placeholder="Describe your main character's appearance (e.g., 'a young woman with long brown hair, wearing a blue dress, kind eyes')",
552
+ lines=3,
553
+ info="Provide a detailed description of your main character to help the AI maintain their consistent appearance throughout the video. This is crucial for consistency!"
554
+ )
555
+
556
+ style_dropdown = gr.Dropdown(
557
+ label="🎨 Art Style",
558
+ choices=[
559
+ ("Cartoon", "cartoon"),
560
+ ("Realistic", "realistic"),
561
+ ("Fantasy Art", "fantasy"),
562
+ ("Digital Art", "digital_art"),
563
+ ("Anime", "anime")
564
+ ],
565
+ value="digital_art", # Default to digital art
566
+ info="Select the artistic style for your video segments. This affects the overall visual look."
567
+ )
568
+
569
+ generate_btn = gr.Button("🎬 Generate Story Videos", variant="primary", size="lg")
570
+
571
+ with gr.Column(scale=1):
572
+ gr.Markdown("""
573
+ ### πŸ’‘ Tips for Best Results:
574
+
575
+ **Story Writing:**
576
+ - Aim for **3-10 sentences** in your story. Each will likely become a 10-second segment.
577
+ - Include **clear actions and locations** for your character (e.g., "walking in the forest").
578
+ - Describe **scenes vividly** to help the AI generate relevant visuals.
579
+
580
+ **Character Description:**
581
+ - Be **specific** about appearance (e.g., "blue eyes," "red cloak," "short stature").
582
+ - Include **clothing or distinctive features** for better consistency across videos.
583
+
584
+ **Processing Time:**
585
+ - This application runs on **free-tier CPU hardware**.
586
+ - Each 10-second segment can take **1-3 minutes** to generate.
587
+ - Please be patient! **Progress updates** will keep you informed.
588
+ - If it seems stuck, check the logs in the "Logs" tab of your Space.
589
+ """)
590
+
591
+ gr.Markdown("---")
592
+
593
+ # Output sections
594
+ status_output = gr.Textbox(
595
+ label="πŸ“Š Generation Status",
596
+ lines=2, # Shorter to reduce space, but dynamically resizes based on content
597
+ interactive=False,
598
+ value="Enter your story and click 'Generate' to begin!"
599
+ )
600
+
601
+ # HTML output for detailed current segment text and AI prompt
602
+ current_segment_details_html = gr.HTML(
603
+ label="Current Segment Details & AI Prompt",
604
+ value="<p>Details for the current segment will appear here as it's processed.</p>"
605
+ )
606
+
607
+ with gr.Row():
608
+ # Live video preview for the currently generated 10-second segment
609
+ current_video_preview = gr.Video(
610
+ label="πŸŽ₯ Live Segment Preview",
611
+ width=512,
612
+ height=512,
613
+ interactive=False,
614
+ autoplay=True, # Auto-play the new segment when it loads
615
+ show_share_button=False # Hide share button
616
+ )
617
+
618
+ # Gallery to show generated images cumulatively
619
+ image_gallery = gr.Gallery(
620
+ label="πŸ–ΌοΈ Generated Images (Overall Story Visuals)",
621
+ show_label=True,
622
+ elem_id="image_gallery", # Unique ID for potential CSS styling
623
+ columns=3, # More columns for a compact view
624
+ rows=2,
625
+ object_fit="contain",
626
+ height="auto",
627
+ allow_preview=False # Prevents pop-up on click, if desired
628
+ )
629
+
630
+ # Final downloadable ZIP file
631
+ download_zip_file = gr.File(
632
+ label="⬇️ Download All Videos & Images (ZIP)",
633
+ file_count="single", # Only one file can be downloaded
634
+ interactive=False, # User cannot upload, only download
635
+ type="filepath", # Gradio expects a file path to enable download
636
+ info="Your complete story video and individual images will be available here."
637
+ )
638
+
639
+ # Define the click action for the generate button
640
+ # The outputs here must match the order of values yielded by process_story_gradio
641
+ generate_btn.click(
642
+ fn=process_story_gradio,
643
+ inputs=[
644
+ story_input,
645
+ character_input,
646
+ style_dropdown
647
+ ],
648
+ outputs=[
649
+ status_output, # 1. status_message (str)
650
+ current_segment_details_html, # 2. segment_details_html (str HTML)
651
+ current_video_preview, # 3. video_path (str filepath)
652
+ image_gallery, # 4. image_paths (List[str] filepaths)
653
+ download_zip_file # 5. zip_file_path (str filepath)
654
+ ),
655
+ api_name="generate_story_video", # Optional: for API endpoint if deployed
656
+ concurrency_limit=1 # CRUCIAL: Ensures only one user can run at a time, for free tier
657
+ )
658
+
659
+ # Examples for quick testing
660
+ gr.Examples(
661
+ examples=[
662
+ [
663
+ "Elara, a young witch with a curious spirit, lived in a cottage nestled deep within the Whispering Woods. One day, while gathering herbs, she stumbled upon an ancient, glowing rune carved into a tree. As she touched it, a shimmering portal opened, revealing a world of floating islands and crystalline creatures. Elara, filled with wonder, stepped through, eager to explore this new magical realm.",
664
+ "A young witch with long, wavy red hair, green eyes, wearing a flowing dark green robe and carrying a small wooden staff.",
665
+ "fantasy"
666
+ ],
667
+ [
668
+ "Detective Miles Corbin adjusted his fedora as he walked down the rain-slicked alley. The neon signs of the city cast long, distorted shadows around him. He pulled out his worn notebook, reviewing the cryptic message left at the crime scene. A black cat darted past his feet, vanishing into the darkness, a faint echo of a distant siren barely audible. He knew this case would be tougher than he thought.",
669
+ "A grizzled detective in a classic trench coat and a wide-brimmed fedora, with a stern, tired expression.",
670
+ "realistic"
671
+ ],
672
+ [
673
+ "Zorp, the friendly alien, landed his spaceship in a bustling city park. Children gasped and pointed, but Zorp merely offered a three-fingered wave and a glowing smile. He then pulled out a device that projected vibrant holographic images of his home planet, a world of purple skies and bouncing jelly-like creatures. Everyone cheered, delighted by the unexpected show. Zorp felt happy to share his culture.",
674
+ "A small, green alien with large, friendly black eyes, three long fingers, and a shiny silver jumpsuit.",
675
+ "cartoon"
676
+ ]
677
+ ],
678
+ inputs=[
679
+ story_input,
680
+ character_input,
681
+ style_dropdown
682
+ ],
683
+ label="Try these example stories!",
684
+ # Uncomment the line below if you want examples to run automatically when clicked
685
+ # fn=process_story_gradio, outputs=[status_output, current_segment_details_html, current_video_preview, image_gallery, download_zip_file]
686
+ )
687
+
688
+ return interface
689
+
690
+ # Launch the Gradio app
691
+ if __name__ == "__main__":
692
+ app = create_interface()
693
+ # Set queue and concurrency_count to 1 for free tier to prevent overload and timeouts
694
+ app.queue(max_size=1, concurrency_count=1)
695
+ app.launch()
696
+