Nick021402 commited on
Commit
cfa934c
Β·
verified Β·
1 Parent(s): 3e86d86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -106
app.py CHANGED
@@ -28,10 +28,10 @@ class StoryVideoGenerator:
28
  self.pipe = None
29
  self.temp_dir = tempfile.mkdtemp() # Create a unique temporary directory
30
  self.current_seed = 42 # Base seed for consistency across runs
31
-
32
  # Ensure outputs directory exists (for the final ZIP file)
33
  os.makedirs("outputs", exist_ok=True)
34
-
35
  def load_model(self):
36
  """Load the Stable Diffusion model optimized for CPU"""
37
  if self.pipe is None:
@@ -49,27 +49,27 @@ class StoryVideoGenerator:
49
  self.pipe.enable_vae_slicing() # Memory optimization
50
  self.pipe.enable_sequential_cpu_offload() # Aggressive CPU offload for large models
51
  print("Stable Diffusion model loaded successfully on CPU.")
52
-
53
  def segment_story(self, story: str, max_segments: int = 10) -> List[str]:
54
  """Break story into logical segments for video generation"""
55
  # Clean the story text
56
  story = re.sub(r'\s+', ' ', story.strip())
57
-
58
  # Split into sentences
59
  sentences = nltk.sent_tokenize(story)
60
-
61
  segments = []
62
  current_segment_sentences = []
63
  current_word_count = 0
64
-
65
  # Aim for segments of roughly 25-35 words for 10 seconds of video.
66
  # A common speaking rate is 120-150 words per minute, so ~20-25 words per 10 seconds.
67
  # We'll use 30 words as a target, allowing for some flexibility.
68
  words_per_segment_target = 30
69
-
70
  for sentence in sentences:
71
  sentence_word_count = len(sentence.split())
72
-
73
  # If adding this sentence exceeds the target AND we already have some content,
74
  # finalize the current segment and start a new one.
75
  if current_word_count + sentence_word_count > words_per_segment_target and current_segment_sentences:
@@ -80,25 +80,25 @@ class StoryVideoGenerator:
80
  # Add sentence to current segment
81
  current_segment_sentences.append(sentence)
82
  current_word_count += sentence_word_count
83
-
84
  # Add any remaining segment
85
  if current_segment_sentences:
86
  segments.append(' '.join(current_segment_sentences))
87
-
88
  # Limit to max_segments to prevent excessively long generation times
89
  if len(segments) > max_segments:
90
  print(f"Warning: Story has {len(segments)} segments, truncating to {max_segments}.")
91
  segments = segments[:max_segments]
92
-
93
  return segments
94
-
95
  def create_prompt(self, segment_text: str, character_desc: str, style: str, segment_num: int) -> Tuple[str, str]:
96
  """Create optimized prompt and negative prompt for image generation"""
97
  # Extract key elements from segment
98
  actions = self.extract_actions(segment_text)
99
  location = self.extract_location(segment_text)
100
  mood = self.extract_mood(segment_text)
101
-
102
  # Define style mapping for diverse visuals
103
  style_map = {
104
  "cartoon": "vibrant cartoon style, clean lines, expressive, playful, children's book illustration",
@@ -108,10 +108,10 @@ class StoryVideoGenerator:
108
  "anime": "anime style, expressive, dynamic poses, cel-shaded, vibrant colors, Japanese animation aesthetic, detailed eyes"
109
  }
110
  selected_style = style_map.get(style, "highly detailed, artistic, professional illustration")
111
-
112
  # Build comprehensive prompt
113
  prompt_parts = []
114
-
115
  # Character description first for consistency emphasis and core subject
116
  if character_desc:
117
  prompt_parts.append(f"A single {character_desc}")
@@ -123,24 +123,24 @@ class StoryVideoGenerator:
123
  prompt_parts.append(f"is {actions}")
124
  if location:
125
  prompt_parts.append(f"in {location}")
126
-
127
  # Include the original segment text for additional context for the AI
128
  prompt_parts.append(f"Scene depicts: {segment_text}")
129
-
130
  # Add mood last, to influence atmosphere
131
  if mood:
132
  prompt_parts.append(f"with a {mood} atmosphere")
133
-
134
  prompt_parts.append(selected_style)
135
  prompt_parts.append("masterpiece, best quality, ultra detailed, 8k, volumetric lighting, rich color, film still, professional")
136
-
137
  final_prompt = ", ".join([p for p in prompt_parts if p and p.strip() != ''])
138
-
139
  # Comprehensive negative prompt to avoid common Stable Diffusion flaws
140
  negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, extra limbs, missing limbs, poorly drawn hands, poorly drawn feet, out of frame, tiling, watermark, signature, text, noisy, grainy, blurred, disfigured, monochrome, grayscale, low resolution, bad composition, amateur, multiple characters, crowd, duplicate, unrealistic, abstract, painting, drawing, cartoon, sketch, render, CGI, 3D"
141
-
142
  return final_prompt, negative_prompt
143
-
144
  def extract_actions(self, text: str) -> str:
145
  """Extract main actions from text segment (improved with more variety)"""
146
  action_keywords = {
@@ -155,16 +155,16 @@ class StoryVideoGenerator:
155
  'observe': 'observing quietly', 'listen': 'listening attentively', 'create': 'creating something',
156
  'destroy': 'destroying something', 'hide': 'hiding stealthily', 'search': 'searching diligently'
157
  }
158
-
159
  text_lower = text.lower()
160
  found_actions = []
161
-
162
  for keyword, description in action_keywords.items():
163
  if keyword in text_lower:
164
  found_actions.append(description)
165
-
166
  return ', '.join(found_actions[:3]) if found_actions else "engaging with the environment" # Limit to 3 actions
167
-
168
  def extract_location(self, text: str) -> str:
169
  """Extract location/setting from text segment (improved with specific descriptions)"""
170
  location_keywords = {
@@ -178,15 +178,15 @@ class StoryVideoGenerator:
178
  'desert': 'a vast, arid desert landscape', 'ocean': 'a deep blue ocean surface', 'space': 'the vastness of outer space',
179
  'ship': 'a large sailing ship on the sea', 'train': 'inside a moving train carriage', 'plane': 'inside an airplane cockpit'
180
  }
181
-
182
  text_lower = text.lower()
183
-
184
  for keyword, description in location_keywords.items():
185
  if keyword in text_lower:
186
  return description
187
-
188
  return "a richly detailed background setting" # More descriptive default if no specific location found
189
-
190
  def extract_mood(self, text: str) -> str:
191
  """Extract mood/atmosphere from text segment (improved with evocative descriptions)"""
192
  mood_keywords = {
@@ -197,21 +197,21 @@ class StoryVideoGenerator:
197
  'gloomy': 'dark and oppressive, rainy, desolate', 'joyful': 'radiant with happiness, sparkling light',
198
  'adventure': 'adventurous and daring, sense of discovery, wide open spaces'
199
  }
200
-
201
  text_lower = text.lower()
202
-
203
  for mood, description in mood_keywords.items():
204
  if mood in text_lower:
205
  return description
206
-
207
  return "a fitting atmosphere" # Default for a general mood
208
-
209
  def generate_image(self, prompt: str, negative_prompt: str, segment_num: int) -> Image.Image:
210
  """Generate image for a story segment"""
211
  # Use consistent base seed for character consistency, adjusted per segment
212
  seed = self.current_seed + segment_num
213
  generator = torch.Generator(device=self.device).manual_seed(seed)
214
-
215
  # Generate image
216
  print(f"Generating image with prompt: {prompt[:150]}...")
217
  with torch.no_grad(): # Disable gradient calculations for inference
@@ -226,45 +226,45 @@ class StoryVideoGenerator:
226
  height=512,
227
  width=512
228
  )
229
-
230
  return result.images[0]
231
-
232
  def create_video_clip(self, image: Image.Image, text: str, duration: int = 10) -> str:
233
  """Create a video clip from image with text overlay and motion"""
234
-
235
  # Resize image to 512x512 if it's not already (ensures consistent video size)
236
  image = image.resize((512, 512), Image.Resampling.LANCZOS) # Use LANCZOS for high quality resizing
237
 
238
  # Convert PIL Image to NumPy array for MoviePy
239
  img_array = np.array(image)
240
-
241
  # Create ImageClip from NumPy array
242
  clip = ImageClip(img_array, duration=duration)
243
-
244
  # Add subtle Ken Burns effect (zoom + pan)
245
  # Zoom from 1.0 to 1.15 over the duration
246
  clip = clip.fx(vfx.resize, lambda t: 1 + 0.15 * t / duration)
247
-
248
  # Subtly pan (e.g., from top-left to bottom-right or vice-versa)
249
  # This is a fixed slight pan that goes over the duration of the clip
250
  start_x_offset = 0.05
251
  start_y_offset = 0.05
252
-
253
  clip = clip.fx(vfx.scroll, w=clip.w, h=clip.h, x_speed=lambda t: start_x_offset * clip.w / duration, y_speed=lambda t: start_y_offset * clip.h / duration)
254
-
255
  # Create text overlay using MoviePy's TextClip
256
  try:
257
  # Look for common font paths on Linux systems
258
  font_path_for_moviepy = None
259
  for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
260
- "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
261
- "/usr/share/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", # Some systems have it here
262
- "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # Another common path for Arial
263
- "/usr/share/fonts/truetype/arial.ttf"]: # Try Arial too
264
  if os.path.exists(p):
265
  font_path_for_moviepy = p
266
  break
267
-
268
  if font_path_for_moviepy:
269
  # Use a larger font size that scales with 512x512 video
270
  text_clip = TextClip(
@@ -289,7 +289,7 @@ class StoryVideoGenerator:
289
  method='caption',
290
  size=(clip.w * 0.9, None)
291
  ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
292
-
293
  except Exception as e:
294
  print(f"Error creating MoviePy TextClip with specific font: {e}. Falling back to generic font.")
295
  text_clip = TextClip(
@@ -306,11 +306,11 @@ class StoryVideoGenerator:
306
 
307
  # Composite video with text
308
  final_clip = CompositeVideoClip([clip, text_clip])
309
-
310
  # Export video
311
  # Using a unique filename with PID to avoid conflicts if multiple runs happen very fast
312
  output_path = os.path.join(self.temp_dir, f"segment_{int(time.time())}_{os.getpid()}.mp4")
313
-
314
  print(f"Exporting video to {output_path}...")
315
  final_clip.write_videofile(
316
  output_path,
@@ -322,21 +322,21 @@ class StoryVideoGenerator:
322
  preset='medium' # 'medium' preset for balance of speed and quality on CPU
323
  )
324
  print(f"Video exported to {output_path}")
325
-
326
  # Close clips to free resources, crucial for MoviePy
327
  clip.close()
328
  text_clip.close()
329
  final_clip.close()
330
-
331
  return output_path
332
-
333
  def cleanup(self):
334
  """Clean up temporary files and directories"""
335
  print(f"Cleaning up temporary directory: {self.temp_dir}")
336
  if os.path.exists(self.temp_dir):
337
  shutil.rmtree(self.temp_dir)
338
  self.temp_dir = tempfile.mkdtemp() # Create a new temporary directory for next run
339
-
340
  # Also clean the 'outputs' directory for old zip files to prevent disk overuse
341
  output_files = os.listdir("outputs")
342
  for f in output_files:
@@ -361,7 +361,7 @@ def process_story_gradio(story_text: str, character_description: str, style: str
361
  Gradio-compatible wrapper function for the main story processing.
362
  Yields updates for Gradio UI components.
363
  """
364
-
365
  generator.cleanup() # Clean up temp files from previous runs at the start of a new request
366
 
367
  if not story_text.strip():
@@ -374,16 +374,16 @@ def process_story_gradio(story_text: str, character_description: str, style: str
374
  None # No zip
375
  )
376
  return
377
-
378
  try:
379
  # Load model if not already loaded (this is optimized to run once per Space lifecycle)
380
  progress(0, desc="Initializing AI model... (This happens once after Space starts or resets)")
381
  generator.load_model()
382
-
383
  # Segment the story
384
  progress(0.05, desc="Analyzing story structure and preparing segments...")
385
  segments = generator.segment_story(story_text)
386
-
387
  if not segments:
388
  yield (
389
  "Error: Could not segment the story. Please try a longer or more detailed story.",
@@ -393,10 +393,10 @@ def process_story_gradio(story_text: str, character_description: str, style: str
393
  None
394
  )
395
  return
396
-
397
  total_segments = len(segments)
398
  initial_status_message = f"Story analyzed! Will generate {total_segments} video segments (approx. {total_segments * 10} seconds total)."
399
-
400
  # Initial yield: show segment count
401
  yield (
402
  initial_status_message,
@@ -414,21 +414,21 @@ def process_story_gradio(story_text: str, character_description: str, style: str
414
  # Generate a base seed for overall character consistency across segments
415
  # Using a hash of both character description and the story for more unique runs
416
  generator.current_seed = abs(hash(character_description.strip() + story_text.strip())) % (2**32 - 1)
417
-
418
  generated_video_paths = []
419
  generated_image_paths_for_gallery = []
420
 
421
  for i, segment_text in enumerate(segments):
422
  segment_idx = i + 1
423
-
424
  # --- Step 1: Update status and show current prompt details ---
425
  current_status_message = f"Processing segment {segment_idx} of {total_segments}..."
426
  progress(0.1 + (0.8 * (i / total_segments)), desc=current_status_message) # Progress from 10% to 90%
427
-
428
  prompt, negative_prompt = generator.create_prompt(
429
  segment_text, character_description, style, i
430
  )
431
-
432
  # Prepare HTML for current segment details
433
  segment_details_html = f"""
434
  <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
@@ -438,7 +438,7 @@ def process_story_gradio(story_text: str, character_description: str, style: str
438
  <p><em>{prompt}</em></p>
439
  </div>
440
  """
441
-
442
  yield (
443
  current_status_message,
444
  segment_details_html,
@@ -451,18 +451,18 @@ def process_story_gradio(story_text: str, character_description: str, style: str
451
  # --- Step 2: Generate Image ---
452
  progress(0.1 + (0.8 * (i / total_segments)) + 0.02, desc=f"Generating image for segment {segment_idx}...")
453
  image = generator.generate_image(prompt, negative_prompt, i)
454
-
455
  # Save image for the gallery (important to save to a persistent temp path)
456
  img_filename = f"segment_{segment_idx}_image_{int(time.time())}.png"
457
  img_path = os.path.join(generator.temp_dir, img_filename)
458
  image.save(img_path)
459
  generated_image_paths_for_gallery.append(img_path)
460
-
461
  # --- Step 3: Create Video Clip ---
462
  progress(0.1 + (0.8 * (i / total_segments)) + 0.05, desc=f"Creating video for segment {segment_idx}...")
463
  video_path = generator.create_video_clip(image, segment_text)
464
  generated_video_paths.append(video_path)
465
-
466
  # --- Step 4: Yield current segment's video and updated gallery ---
467
  current_status_message = f"Segment {segment_idx} of {total_segments} completed! Video ready."
468
  yield (
@@ -476,12 +476,12 @@ def process_story_gradio(story_text: str, character_description: str, style: str
476
 
477
  # --- Final Step: Generate ZIP file and update final status ---
478
  progress(0.95, desc="All segments generated. Compiling into a downloadable ZIP file...")
479
-
480
  # Create a unique zip file name in the 'outputs' directory
481
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
482
  zip_filename = f"story_videos_{timestamp}.zip"
483
  final_zip_path = os.path.join("outputs", zip_filename)
484
-
485
  with zipfile.ZipFile(final_zip_path, 'w') as zipf:
486
  for idx, vid_path in enumerate(generated_video_paths):
487
  # Only add if file exists and is not a directory
@@ -490,10 +490,10 @@ def process_story_gradio(story_text: str, character_description: str, style: str
490
  for idx, img_path in enumerate(generated_image_paths_for_gallery):
491
  if os.path.isfile(img_path):
492
  zipf.write(img_path, os.path.basename(img_path)) # Add corresponding image to zip
493
-
494
  final_status_message = f"βœ… Story video generation complete! All {total_segments} segments generated and available for download."
495
  progress(1.0, desc="Complete!")
496
-
497
  yield (
498
  final_status_message,
499
  "<p>All segments have been processed. Download the complete ZIP file below!</p>",
@@ -501,7 +501,7 @@ def process_story_gradio(story_text: str, character_description: str, style: str
501
  generated_image_paths_for_gallery, # Final state of the gallery
502
  final_zip_path # Provide the path to the downloadable ZIP
503
  )
504
-
505
  except Exception as e:
506
  import traceback
507
  print(f"An unexpected error occurred: {e}")
@@ -519,15 +519,15 @@ def process_story_gradio(story_text: str, character_description: str, style: str
519
  # --- Gradio Interface Definition ---
520
  def create_interface():
521
  """Create the Gradio interface"""
522
-
523
  with gr.Blocks(title="AI Text-to-Video Story Generator", theme=gr.themes.Soft()) as interface:
524
-
525
  gr.Markdown("""
526
  # 🎬 AI Text-to-Video Story Generator
527
-
528
  Transform your written stories into animated video sequences! This tool breaks your story into segments
529
  and creates a 10-second video clip for each part, maintaining character consistency throughout.
530
-
531
  **Features:**
532
  - ✨ Converts text stories to video sequences
533
  - 🎭 Maintains character consistency across segments
@@ -535,7 +535,7 @@ def create_interface():
535
  - πŸ“± Optimized for free-tier CPU processing
536
  - πŸ“¦ Download individual clips or complete ZIP package
537
  """)
538
-
539
  with gr.Row():
540
  with gr.Column(scale=2):
541
  story_input = gr.Textbox(
@@ -545,14 +545,14 @@ def create_interface():
545
  max_lines=15,
546
  info="Write your complete story here. It will be split into 10-second video segments. Keep it concise for quicker results (e.g., 3-10 sentences)."
547
  )
548
-
549
  character_input = gr.Textbox(
550
  label="πŸ‘€ Main Character Description",
551
  placeholder="Describe your main character's appearance (e.g., 'a young woman with long brown hair, wearing a blue dress, kind eyes')",
552
  lines=3,
553
  info="Provide a detailed description of your main character to help the AI maintain their consistent appearance throughout the video. This is crucial for consistency!"
554
  )
555
-
556
  style_dropdown = gr.Dropdown(
557
  label="🎨 Art Style",
558
  choices=[
@@ -565,31 +565,31 @@ def create_interface():
565
  value="digital_art", # Default to digital art
566
  info="Select the artistic style for your video segments. This affects the overall visual look."
567
  )
568
-
569
  generate_btn = gr.Button("🎬 Generate Story Videos", variant="primary", size="lg")
570
-
571
  with gr.Column(scale=1):
572
  gr.Markdown("""
573
  ### πŸ’‘ Tips for Best Results:
574
-
575
  **Story Writing:**
576
  - Aim for **3-10 sentences** in your story. Each will likely become a 10-second segment.
577
  - Include **clear actions and locations** for your character (e.g., "walking in the forest").
578
  - Describe **scenes vividly** to help the AI generate relevant visuals.
579
-
580
  **Character Description:**
581
  - Be **specific** about appearance (e.g., "blue eyes," "red cloak," "short stature").
582
  - Include **clothing or distinctive features** for better consistency across videos.
583
-
584
  **Processing Time:**
585
  - This application runs on **free-tier CPU hardware**.
586
  - Each 10-second segment can take **1-3 minutes** to generate.
587
  - Please be patient! **Progress updates** will keep you informed.
588
  - If it seems stuck, check the logs in the "Logs" tab of your Space.
589
  """)
590
-
591
  gr.Markdown("---")
592
-
593
  # Output sections
594
  status_output = gr.Textbox(
595
  label="πŸ“Š Generation Status",
@@ -597,7 +597,7 @@ def create_interface():
597
  interactive=False,
598
  value="Enter your story and click 'Generate' to begin!"
599
  )
600
-
601
  # HTML output for detailed current segment text and AI prompt
602
  current_segment_details_html = gr.HTML(
603
  label="Current Segment Details & AI Prompt",
@@ -614,7 +614,7 @@ def create_interface():
614
  autoplay=True, # Auto-play the new segment when it loads
615
  show_share_button=False # Hide share button
616
  )
617
-
618
  # Gallery to show generated images cumulatively
619
  image_gallery = gr.Gallery(
620
  label="πŸ–ΌοΈ Generated Images (Overall Story Visuals)",
@@ -641,20 +641,20 @@ def create_interface():
641
  generate_btn.click(
642
  fn=process_story_gradio,
643
  inputs=[
644
- story_input,
645
- character_input,
646
  style_dropdown
647
  ],
648
  outputs=[
649
- status_output, # 1. status_message (str)
650
- current_segment_details_html, # 2. segment_details_html (str HTML)
651
- current_video_preview, # 3. video_path (str filepath)
652
- image_gallery, # 4. image_paths (List[str] filepaths)
653
- download_zip_file # 5. zip_file_path (str filepath)
654
- ),
655
- api_name="generate_story_video", # Optional: for API endpoint if deployed
656
  concurrency_limit=1 # CRUCIAL: Ensures only one user can run at a time, for free tier
657
- )
658
 
659
  # Examples for quick testing
660
  gr.Examples(
@@ -676,12 +676,12 @@ def create_interface():
676
  ]
677
  ],
678
  inputs=[
679
- story_input,
680
- character_input,
681
  style_dropdown
682
  ],
683
- label="Try these example stories!",
684
- # Uncomment the line below if you want examples to run automatically when clicked
685
  # fn=process_story_gradio, outputs=[status_output, current_segment_details_html, current_video_preview, image_gallery, download_zip_file]
686
  )
687
 
@@ -691,6 +691,5 @@ def create_interface():
691
  if __name__ == "__main__":
692
  app = create_interface()
693
  # Set queue and concurrency_count to 1 for free tier to prevent overload and timeouts
694
- app.queue(max_size=1, concurrency_count=1)
695
  app.launch()
696
-
 
28
  self.pipe = None
29
  self.temp_dir = tempfile.mkdtemp() # Create a unique temporary directory
30
  self.current_seed = 42 # Base seed for consistency across runs
31
+
32
  # Ensure outputs directory exists (for the final ZIP file)
33
  os.makedirs("outputs", exist_ok=True)
34
+
35
  def load_model(self):
36
  """Load the Stable Diffusion model optimized for CPU"""
37
  if self.pipe is None:
 
49
  self.pipe.enable_vae_slicing() # Memory optimization
50
  self.pipe.enable_sequential_cpu_offload() # Aggressive CPU offload for large models
51
  print("Stable Diffusion model loaded successfully on CPU.")
52
+
53
  def segment_story(self, story: str, max_segments: int = 10) -> List[str]:
54
  """Break story into logical segments for video generation"""
55
  # Clean the story text
56
  story = re.sub(r'\s+', ' ', story.strip())
57
+
58
  # Split into sentences
59
  sentences = nltk.sent_tokenize(story)
60
+
61
  segments = []
62
  current_segment_sentences = []
63
  current_word_count = 0
64
+
65
  # Aim for segments of roughly 25-35 words for 10 seconds of video.
66
  # A common speaking rate is 120-150 words per minute, so ~20-25 words per 10 seconds.
67
  # We'll use 30 words as a target, allowing for some flexibility.
68
  words_per_segment_target = 30
69
+
70
  for sentence in sentences:
71
  sentence_word_count = len(sentence.split())
72
+
73
  # If adding this sentence exceeds the target AND we already have some content,
74
  # finalize the current segment and start a new one.
75
  if current_word_count + sentence_word_count > words_per_segment_target and current_segment_sentences:
 
80
  # Add sentence to current segment
81
  current_segment_sentences.append(sentence)
82
  current_word_count += sentence_word_count
83
+
84
  # Add any remaining segment
85
  if current_segment_sentences:
86
  segments.append(' '.join(current_segment_sentences))
87
+
88
  # Limit to max_segments to prevent excessively long generation times
89
  if len(segments) > max_segments:
90
  print(f"Warning: Story has {len(segments)} segments, truncating to {max_segments}.")
91
  segments = segments[:max_segments]
92
+
93
  return segments
94
+
95
  def create_prompt(self, segment_text: str, character_desc: str, style: str, segment_num: int) -> Tuple[str, str]:
96
  """Create optimized prompt and negative prompt for image generation"""
97
  # Extract key elements from segment
98
  actions = self.extract_actions(segment_text)
99
  location = self.extract_location(segment_text)
100
  mood = self.extract_mood(segment_text)
101
+
102
  # Define style mapping for diverse visuals
103
  style_map = {
104
  "cartoon": "vibrant cartoon style, clean lines, expressive, playful, children's book illustration",
 
108
  "anime": "anime style, expressive, dynamic poses, cel-shaded, vibrant colors, Japanese animation aesthetic, detailed eyes"
109
  }
110
  selected_style = style_map.get(style, "highly detailed, artistic, professional illustration")
111
+
112
  # Build comprehensive prompt
113
  prompt_parts = []
114
+
115
  # Character description first for consistency emphasis and core subject
116
  if character_desc:
117
  prompt_parts.append(f"A single {character_desc}")
 
123
  prompt_parts.append(f"is {actions}")
124
  if location:
125
  prompt_parts.append(f"in {location}")
126
+
127
  # Include the original segment text for additional context for the AI
128
  prompt_parts.append(f"Scene depicts: {segment_text}")
129
+
130
  # Add mood last, to influence atmosphere
131
  if mood:
132
  prompt_parts.append(f"with a {mood} atmosphere")
133
+
134
  prompt_parts.append(selected_style)
135
  prompt_parts.append("masterpiece, best quality, ultra detailed, 8k, volumetric lighting, rich color, film still, professional")
136
+
137
  final_prompt = ", ".join([p for p in prompt_parts if p and p.strip() != ''])
138
+
139
  # Comprehensive negative prompt to avoid common Stable Diffusion flaws
140
  negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, extra limbs, missing limbs, poorly drawn hands, poorly drawn feet, out of frame, tiling, watermark, signature, text, noisy, grainy, blurred, disfigured, monochrome, grayscale, low resolution, bad composition, amateur, multiple characters, crowd, duplicate, unrealistic, abstract, painting, drawing, cartoon, sketch, render, CGI, 3D"
141
+
142
  return final_prompt, negative_prompt
143
+
144
  def extract_actions(self, text: str) -> str:
145
  """Extract main actions from text segment (improved with more variety)"""
146
  action_keywords = {
 
155
  'observe': 'observing quietly', 'listen': 'listening attentively', 'create': 'creating something',
156
  'destroy': 'destroying something', 'hide': 'hiding stealthily', 'search': 'searching diligently'
157
  }
158
+
159
  text_lower = text.lower()
160
  found_actions = []
161
+
162
  for keyword, description in action_keywords.items():
163
  if keyword in text_lower:
164
  found_actions.append(description)
165
+
166
  return ', '.join(found_actions[:3]) if found_actions else "engaging with the environment" # Limit to 3 actions
167
+
168
  def extract_location(self, text: str) -> str:
169
  """Extract location/setting from text segment (improved with specific descriptions)"""
170
  location_keywords = {
 
178
  'desert': 'a vast, arid desert landscape', 'ocean': 'a deep blue ocean surface', 'space': 'the vastness of outer space',
179
  'ship': 'a large sailing ship on the sea', 'train': 'inside a moving train carriage', 'plane': 'inside an airplane cockpit'
180
  }
181
+
182
  text_lower = text.lower()
183
+
184
  for keyword, description in location_keywords.items():
185
  if keyword in text_lower:
186
  return description
187
+
188
  return "a richly detailed background setting" # More descriptive default if no specific location found
189
+
190
  def extract_mood(self, text: str) -> str:
191
  """Extract mood/atmosphere from text segment (improved with evocative descriptions)"""
192
  mood_keywords = {
 
197
  'gloomy': 'dark and oppressive, rainy, desolate', 'joyful': 'radiant with happiness, sparkling light',
198
  'adventure': 'adventurous and daring, sense of discovery, wide open spaces'
199
  }
200
+
201
  text_lower = text.lower()
202
+
203
  for mood, description in mood_keywords.items():
204
  if mood in text_lower:
205
  return description
206
+
207
  return "a fitting atmosphere" # Default for a general mood
208
+
209
  def generate_image(self, prompt: str, negative_prompt: str, segment_num: int) -> Image.Image:
210
  """Generate image for a story segment"""
211
  # Use consistent base seed for character consistency, adjusted per segment
212
  seed = self.current_seed + segment_num
213
  generator = torch.Generator(device=self.device).manual_seed(seed)
214
+
215
  # Generate image
216
  print(f"Generating image with prompt: {prompt[:150]}...")
217
  with torch.no_grad(): # Disable gradient calculations for inference
 
226
  height=512,
227
  width=512
228
  )
229
+
230
  return result.images[0]
231
+
232
  def create_video_clip(self, image: Image.Image, text: str, duration: int = 10) -> str:
233
  """Create a video clip from image with text overlay and motion"""
234
+
235
  # Resize image to 512x512 if it's not already (ensures consistent video size)
236
  image = image.resize((512, 512), Image.Resampling.LANCZOS) # Use LANCZOS for high quality resizing
237
 
238
  # Convert PIL Image to NumPy array for MoviePy
239
  img_array = np.array(image)
240
+
241
  # Create ImageClip from NumPy array
242
  clip = ImageClip(img_array, duration=duration)
243
+
244
  # Add subtle Ken Burns effect (zoom + pan)
245
  # Zoom from 1.0 to 1.15 over the duration
246
  clip = clip.fx(vfx.resize, lambda t: 1 + 0.15 * t / duration)
247
+
248
  # Subtly pan (e.g., from top-left to bottom-right or vice-versa)
249
  # This is a fixed slight pan that goes over the duration of the clip
250
  start_x_offset = 0.05
251
  start_y_offset = 0.05
252
+
253
  clip = clip.fx(vfx.scroll, w=clip.w, h=clip.h, x_speed=lambda t: start_x_offset * clip.w / duration, y_speed=lambda t: start_y_offset * clip.h / duration)
254
+
255
  # Create text overlay using MoviePy's TextClip
256
  try:
257
  # Look for common font paths on Linux systems
258
  font_path_for_moviepy = None
259
  for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
260
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
261
+ "/usr/share/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", # Some systems have it here
262
+ "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # Another common path for Arial
263
+ "/usr/share/fonts/truetype/arial.ttf"]: # Try Arial too
264
  if os.path.exists(p):
265
  font_path_for_moviepy = p
266
  break
267
+
268
  if font_path_for_moviepy:
269
  # Use a larger font size that scales with 512x512 video
270
  text_clip = TextClip(
 
289
  method='caption',
290
  size=(clip.w * 0.9, None)
291
  ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
292
+
293
  except Exception as e:
294
  print(f"Error creating MoviePy TextClip with specific font: {e}. Falling back to generic font.")
295
  text_clip = TextClip(
 
306
 
307
  # Composite video with text
308
  final_clip = CompositeVideoClip([clip, text_clip])
309
+
310
  # Export video
311
  # Using a unique filename with PID to avoid conflicts if multiple runs happen very fast
312
  output_path = os.path.join(self.temp_dir, f"segment_{int(time.time())}_{os.getpid()}.mp4")
313
+
314
  print(f"Exporting video to {output_path}...")
315
  final_clip.write_videofile(
316
  output_path,
 
322
  preset='medium' # 'medium' preset for balance of speed and quality on CPU
323
  )
324
  print(f"Video exported to {output_path}")
325
+
326
  # Close clips to free resources, crucial for MoviePy
327
  clip.close()
328
  text_clip.close()
329
  final_clip.close()
330
+
331
  return output_path
332
+
333
  def cleanup(self):
334
  """Clean up temporary files and directories"""
335
  print(f"Cleaning up temporary directory: {self.temp_dir}")
336
  if os.path.exists(self.temp_dir):
337
  shutil.rmtree(self.temp_dir)
338
  self.temp_dir = tempfile.mkdtemp() # Create a new temporary directory for next run
339
+
340
  # Also clean the 'outputs' directory for old zip files to prevent disk overuse
341
  output_files = os.listdir("outputs")
342
  for f in output_files:
 
361
  Gradio-compatible wrapper function for the main story processing.
362
  Yields updates for Gradio UI components.
363
  """
364
+
365
  generator.cleanup() # Clean up temp files from previous runs at the start of a new request
366
 
367
  if not story_text.strip():
 
374
  None # No zip
375
  )
376
  return
377
+
378
  try:
379
  # Load model if not already loaded (this is optimized to run once per Space lifecycle)
380
  progress(0, desc="Initializing AI model... (This happens once after Space starts or resets)")
381
  generator.load_model()
382
+
383
  # Segment the story
384
  progress(0.05, desc="Analyzing story structure and preparing segments...")
385
  segments = generator.segment_story(story_text)
386
+
387
  if not segments:
388
  yield (
389
  "Error: Could not segment the story. Please try a longer or more detailed story.",
 
393
  None
394
  )
395
  return
396
+
397
  total_segments = len(segments)
398
  initial_status_message = f"Story analyzed! Will generate {total_segments} video segments (approx. {total_segments * 10} seconds total)."
399
+
400
  # Initial yield: show segment count
401
  yield (
402
  initial_status_message,
 
414
  # Generate a base seed for overall character consistency across segments
415
  # Using a hash of both character description and the story for more unique runs
416
  generator.current_seed = abs(hash(character_description.strip() + story_text.strip())) % (2**32 - 1)
417
+
418
  generated_video_paths = []
419
  generated_image_paths_for_gallery = []
420
 
421
  for i, segment_text in enumerate(segments):
422
  segment_idx = i + 1
423
+
424
  # --- Step 1: Update status and show current prompt details ---
425
  current_status_message = f"Processing segment {segment_idx} of {total_segments}..."
426
  progress(0.1 + (0.8 * (i / total_segments)), desc=current_status_message) # Progress from 10% to 90%
427
+
428
  prompt, negative_prompt = generator.create_prompt(
429
  segment_text, character_description, style, i
430
  )
431
+
432
  # Prepare HTML for current segment details
433
  segment_details_html = f"""
434
  <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
 
438
  <p><em>{prompt}</em></p>
439
  </div>
440
  """
441
+
442
  yield (
443
  current_status_message,
444
  segment_details_html,
 
451
  # --- Step 2: Generate Image ---
452
  progress(0.1 + (0.8 * (i / total_segments)) + 0.02, desc=f"Generating image for segment {segment_idx}...")
453
  image = generator.generate_image(prompt, negative_prompt, i)
454
+
455
  # Save image for the gallery (important to save to a persistent temp path)
456
  img_filename = f"segment_{segment_idx}_image_{int(time.time())}.png"
457
  img_path = os.path.join(generator.temp_dir, img_filename)
458
  image.save(img_path)
459
  generated_image_paths_for_gallery.append(img_path)
460
+
461
  # --- Step 3: Create Video Clip ---
462
  progress(0.1 + (0.8 * (i / total_segments)) + 0.05, desc=f"Creating video for segment {segment_idx}...")
463
  video_path = generator.create_video_clip(image, segment_text)
464
  generated_video_paths.append(video_path)
465
+
466
  # --- Step 4: Yield current segment's video and updated gallery ---
467
  current_status_message = f"Segment {segment_idx} of {total_segments} completed! Video ready."
468
  yield (
 
476
 
477
  # --- Final Step: Generate ZIP file and update final status ---
478
  progress(0.95, desc="All segments generated. Compiling into a downloadable ZIP file...")
479
+
480
  # Create a unique zip file name in the 'outputs' directory
481
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
482
  zip_filename = f"story_videos_{timestamp}.zip"
483
  final_zip_path = os.path.join("outputs", zip_filename)
484
+
485
  with zipfile.ZipFile(final_zip_path, 'w') as zipf:
486
  for idx, vid_path in enumerate(generated_video_paths):
487
  # Only add if file exists and is not a directory
 
490
  for idx, img_path in enumerate(generated_image_paths_for_gallery):
491
  if os.path.isfile(img_path):
492
  zipf.write(img_path, os.path.basename(img_path)) # Add corresponding image to zip
493
+
494
  final_status_message = f"βœ… Story video generation complete! All {total_segments} segments generated and available for download."
495
  progress(1.0, desc="Complete!")
496
+
497
  yield (
498
  final_status_message,
499
  "<p>All segments have been processed. Download the complete ZIP file below!</p>",
 
501
  generated_image_paths_for_gallery, # Final state of the gallery
502
  final_zip_path # Provide the path to the downloadable ZIP
503
  )
504
+
505
  except Exception as e:
506
  import traceback
507
  print(f"An unexpected error occurred: {e}")
 
519
  # --- Gradio Interface Definition ---
520
  def create_interface():
521
  """Create the Gradio interface"""
522
+
523
  with gr.Blocks(title="AI Text-to-Video Story Generator", theme=gr.themes.Soft()) as interface:
524
+
525
  gr.Markdown("""
526
  # 🎬 AI Text-to-Video Story Generator
527
+
528
  Transform your written stories into animated video sequences! This tool breaks your story into segments
529
  and creates a 10-second video clip for each part, maintaining character consistency throughout.
530
+
531
  **Features:**
532
  - ✨ Converts text stories to video sequences
533
  - 🎭 Maintains character consistency across segments
 
535
  - πŸ“± Optimized for free-tier CPU processing
536
  - πŸ“¦ Download individual clips or complete ZIP package
537
  """)
538
+
539
  with gr.Row():
540
  with gr.Column(scale=2):
541
  story_input = gr.Textbox(
 
545
  max_lines=15,
546
  info="Write your complete story here. It will be split into 10-second video segments. Keep it concise for quicker results (e.g., 3-10 sentences)."
547
  )
548
+
549
  character_input = gr.Textbox(
550
  label="πŸ‘€ Main Character Description",
551
  placeholder="Describe your main character's appearance (e.g., 'a young woman with long brown hair, wearing a blue dress, kind eyes')",
552
  lines=3,
553
  info="Provide a detailed description of your main character to help the AI maintain their consistent appearance throughout the video. This is crucial for consistency!"
554
  )
555
+
556
  style_dropdown = gr.Dropdown(
557
  label="🎨 Art Style",
558
  choices=[
 
565
  value="digital_art", # Default to digital art
566
  info="Select the artistic style for your video segments. This affects the overall visual look."
567
  )
568
+
569
  generate_btn = gr.Button("🎬 Generate Story Videos", variant="primary", size="lg")
570
+
571
  with gr.Column(scale=1):
572
  gr.Markdown("""
573
  ### πŸ’‘ Tips for Best Results:
574
+
575
  **Story Writing:**
576
  - Aim for **3-10 sentences** in your story. Each will likely become a 10-second segment.
577
  - Include **clear actions and locations** for your character (e.g., "walking in the forest").
578
  - Describe **scenes vividly** to help the AI generate relevant visuals.
579
+
580
  **Character Description:**
581
  - Be **specific** about appearance (e.g., "blue eyes," "red cloak," "short stature").
582
  - Include **clothing or distinctive features** for better consistency across videos.
583
+
584
  **Processing Time:**
585
  - This application runs on **free-tier CPU hardware**.
586
  - Each 10-second segment can take **1-3 minutes** to generate.
587
  - Please be patient! **Progress updates** will keep you informed.
588
  - If it seems stuck, check the logs in the "Logs" tab of your Space.
589
  """)
590
+
591
  gr.Markdown("---")
592
+
593
  # Output sections
594
  status_output = gr.Textbox(
595
  label="πŸ“Š Generation Status",
 
597
  interactive=False,
598
  value="Enter your story and click 'Generate' to begin!"
599
  )
600
+
601
  # HTML output for detailed current segment text and AI prompt
602
  current_segment_details_html = gr.HTML(
603
  label="Current Segment Details & AI Prompt",
 
614
  autoplay=True, # Auto-play the new segment when it loads
615
  show_share_button=False # Hide share button
616
  )
617
+
618
  # Gallery to show generated images cumulatively
619
  image_gallery = gr.Gallery(
620
  label="πŸ–ΌοΈ Generated Images (Overall Story Visuals)",
 
641
  generate_btn.click(
642
  fn=process_story_gradio,
643
  inputs=[
644
+ story_input,
645
+ character_input,
646
  style_dropdown
647
  ],
648
  outputs=[
649
+ status_output,
650
+ current_segment_details_html,
651
+ current_video_preview,
652
+ image_gallery,
653
+ download_zip_file
654
+ ], # CORRECTED: This closes the 'outputs' list.
655
+ api_name="generate_story_video",
656
  concurrency_limit=1 # CRUCIAL: Ensures only one user can run at a time, for free tier
657
+ ) # This correctly closes the 'generate_btn.click' method call
658
 
659
  # Examples for quick testing
660
  gr.Examples(
 
676
  ]
677
  ],
678
  inputs=[
679
+ story_input,
680
+ character_input,
681
  style_dropdown
682
  ],
683
+ label="Try these example stories!"
684
+ # You can uncomment the line below if you want examples to run automatically when clicked
685
  # fn=process_story_gradio, outputs=[status_output, current_segment_details_html, current_video_preview, image_gallery, download_zip_file]
686
  )
687
 
 
691
  if __name__ == "__main__":
692
  app = create_interface()
693
  # Set queue and concurrency_count to 1 for free tier to prevent overload and timeouts
694
+ app.queue(max_size=1, concurrency_count=1)
695
  app.launch()