Spaces:

Nick021402
/

Text2Video_Story_Ai

Runtime error

App Files Files Community

Nick021402 commited on May 25

Commit

cfa934c

verified ·

1 Parent(s): 3e86d86

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -106

app.py CHANGED Viewed

@@ -28,10 +28,10 @@ class StoryVideoGenerator:
         self.pipe = None
         self.temp_dir = tempfile.mkdtemp() # Create a unique temporary directory
         self.current_seed = 42 # Base seed for consistency across runs
         # Ensure outputs directory exists (for the final ZIP file)
         os.makedirs("outputs", exist_ok=True)
     def load_model(self):
         """Load the Stable Diffusion model optimized for CPU"""
         if self.pipe is None:
@@ -49,27 +49,27 @@ class StoryVideoGenerator:
             self.pipe.enable_vae_slicing()       # Memory optimization
             self.pipe.enable_sequential_cpu_offload() # Aggressive CPU offload for large models
             print("Stable Diffusion model loaded successfully on CPU.")
     def segment_story(self, story: str, max_segments: int = 10) -> List[str]:
         """Break story into logical segments for video generation"""
         # Clean the story text
         story = re.sub(r'\s+', ' ', story.strip())
         # Split into sentences
         sentences = nltk.sent_tokenize(story)
         segments = []
         current_segment_sentences = []
         current_word_count = 0
         # Aim for segments of roughly 25-35 words for 10 seconds of video.
         # A common speaking rate is 120-150 words per minute, so ~20-25 words per 10 seconds.
         # We'll use 30 words as a target, allowing for some flexibility.
         words_per_segment_target = 30
         for sentence in sentences:
             sentence_word_count = len(sentence.split())
             # If adding this sentence exceeds the target AND we already have some content,
             # finalize the current segment and start a new one.
             if current_word_count + sentence_word_count > words_per_segment_target and current_segment_sentences:
@@ -80,25 +80,25 @@ class StoryVideoGenerator:
                 # Add sentence to current segment
                 current_segment_sentences.append(sentence)
                 current_word_count += sentence_word_count
         # Add any remaining segment
         if current_segment_sentences:
             segments.append(' '.join(current_segment_sentences))
         # Limit to max_segments to prevent excessively long generation times
         if len(segments) > max_segments:
             print(f"Warning: Story has {len(segments)} segments, truncating to {max_segments}.")
             segments = segments[:max_segments]
         return segments
     def create_prompt(self, segment_text: str, character_desc: str, style: str, segment_num: int) -> Tuple[str, str]:
         """Create optimized prompt and negative prompt for image generation"""
         # Extract key elements from segment
         actions = self.extract_actions(segment_text)
         location = self.extract_location(segment_text)
         mood = self.extract_mood(segment_text)
         # Define style mapping for diverse visuals
         style_map = {
             "cartoon": "vibrant cartoon style, clean lines, expressive, playful, children's book illustration",
@@ -108,10 +108,10 @@ class StoryVideoGenerator:
             "anime": "anime style, expressive, dynamic poses, cel-shaded, vibrant colors, Japanese animation aesthetic, detailed eyes"
         }
         selected_style = style_map.get(style, "highly detailed, artistic, professional illustration")
         # Build comprehensive prompt
         prompt_parts = []
         # Character description first for consistency emphasis and core subject
         if character_desc:
             prompt_parts.append(f"A single {character_desc}")
@@ -123,24 +123,24 @@ class StoryVideoGenerator:
             prompt_parts.append(f"is {actions}")
         if location:
             prompt_parts.append(f"in {location}")
         # Include the original segment text for additional context for the AI
         prompt_parts.append(f"Scene depicts: {segment_text}")
         # Add mood last, to influence atmosphere
         if mood:
             prompt_parts.append(f"with a {mood} atmosphere")
         prompt_parts.append(selected_style)
         prompt_parts.append("masterpiece, best quality, ultra detailed, 8k, volumetric lighting, rich color, film still, professional")
         final_prompt = ", ".join([p for p in prompt_parts if p and p.strip() != ''])
         # Comprehensive negative prompt to avoid common Stable Diffusion flaws
         negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, extra limbs, missing limbs, poorly drawn hands, poorly drawn feet, out of frame, tiling, watermark, signature, text, noisy, grainy, blurred, disfigured, monochrome, grayscale, low resolution, bad composition, amateur, multiple characters, crowd, duplicate, unrealistic, abstract, painting, drawing, cartoon, sketch, render, CGI, 3D"
         return final_prompt, negative_prompt
     def extract_actions(self, text: str) -> str:
         """Extract main actions from text segment (improved with more variety)"""
         action_keywords = {
@@ -155,16 +155,16 @@ class StoryVideoGenerator:
             'observe': 'observing quietly', 'listen': 'listening attentively', 'create': 'creating something',
             'destroy': 'destroying something', 'hide': 'hiding stealthily', 'search': 'searching diligently'
         }
         text_lower = text.lower()
         found_actions = []
         for keyword, description in action_keywords.items():
             if keyword in text_lower:
                 found_actions.append(description)
         return ', '.join(found_actions[:3]) if found_actions else "engaging with the environment" # Limit to 3 actions
     def extract_location(self, text: str) -> str:
         """Extract location/setting from text segment (improved with specific descriptions)"""
         location_keywords = {
@@ -178,15 +178,15 @@ class StoryVideoGenerator:
             'desert': 'a vast, arid desert landscape', 'ocean': 'a deep blue ocean surface', 'space': 'the vastness of outer space',
             'ship': 'a large sailing ship on the sea', 'train': 'inside a moving train carriage', 'plane': 'inside an airplane cockpit'
         }
         text_lower = text.lower()
         for keyword, description in location_keywords.items():
             if keyword in text_lower:
                 return description
         return "a richly detailed background setting" # More descriptive default if no specific location found
     def extract_mood(self, text: str) -> str:
         """Extract mood/atmosphere from text segment (improved with evocative descriptions)"""
         mood_keywords = {
@@ -197,21 +197,21 @@ class StoryVideoGenerator:
             'gloomy': 'dark and oppressive, rainy, desolate', 'joyful': 'radiant with happiness, sparkling light',
             'adventure': 'adventurous and daring, sense of discovery, wide open spaces'
         }
         text_lower = text.lower()
         for mood, description in mood_keywords.items():
             if mood in text_lower:
                 return description
         return "a fitting atmosphere" # Default for a general mood
     def generate_image(self, prompt: str, negative_prompt: str, segment_num: int) -> Image.Image:
         """Generate image for a story segment"""
         # Use consistent base seed for character consistency, adjusted per segment
         seed = self.current_seed + segment_num
         generator = torch.Generator(device=self.device).manual_seed(seed)
         # Generate image
         print(f"Generating image with prompt: {prompt[:150]}...")
         with torch.no_grad(): # Disable gradient calculations for inference
@@ -226,45 +226,45 @@ class StoryVideoGenerator:
                     height=512,
                     width=512
                 )
         return result.images[0]
     def create_video_clip(self, image: Image.Image, text: str, duration: int = 10) -> str:
         """Create a video clip from image with text overlay and motion"""
         # Resize image to 512x512 if it's not already (ensures consistent video size)
         image = image.resize((512, 512), Image.Resampling.LANCZOS) # Use LANCZOS for high quality resizing
         # Convert PIL Image to NumPy array for MoviePy
         img_array = np.array(image)
         # Create ImageClip from NumPy array
         clip = ImageClip(img_array, duration=duration)
         # Add subtle Ken Burns effect (zoom + pan)
         # Zoom from 1.0 to 1.15 over the duration
         clip = clip.fx(vfx.resize, lambda t: 1 + 0.15 * t / duration)
         # Subtly pan (e.g., from top-left to bottom-right or vice-versa)
         # This is a fixed slight pan that goes over the duration of the clip
         start_x_offset = 0.05
         start_y_offset = 0.05
         clip = clip.fx(vfx.scroll, w=clip.w, h=clip.h, x_speed=lambda t: start_x_offset * clip.w / duration, y_speed=lambda t: start_y_offset * clip.h / duration)
         # Create text overlay using MoviePy's TextClip
         try:
             # Look for common font paths on Linux systems
             font_path_for_moviepy = None
             for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
-                    "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
-                    "/usr/share/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", # Some systems have it here
-                    "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # Another common path for Arial
-                    "/usr/share/fonts/truetype/arial.ttf"]: # Try Arial too
                 if os.path.exists(p):
                     font_path_for_moviepy = p
                     break
             if font_path_for_moviepy:
                 # Use a larger font size that scales with 512x512 video
                 text_clip = TextClip(
@@ -289,7 +289,7 @@ class StoryVideoGenerator:
                     method='caption',
                     size=(clip.w * 0.9, None)
                 ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
         except Exception as e:
             print(f"Error creating MoviePy TextClip with specific font: {e}. Falling back to generic font.")
             text_clip = TextClip(
@@ -306,11 +306,11 @@ class StoryVideoGenerator:
         # Composite video with text
         final_clip = CompositeVideoClip([clip, text_clip])
         # Export video
         # Using a unique filename with PID to avoid conflicts if multiple runs happen very fast
         output_path = os.path.join(self.temp_dir, f"segment_{int(time.time())}_{os.getpid()}.mp4")
         print(f"Exporting video to {output_path}...")
         final_clip.write_videofile(
             output_path,
@@ -322,21 +322,21 @@ class StoryVideoGenerator:
             preset='medium'  # 'medium' preset for balance of speed and quality on CPU
         )
         print(f"Video exported to {output_path}")
         # Close clips to free resources, crucial for MoviePy
         clip.close()
         text_clip.close()
         final_clip.close()
         return output_path
     def cleanup(self):
         """Clean up temporary files and directories"""
         print(f"Cleaning up temporary directory: {self.temp_dir}")
         if os.path.exists(self.temp_dir):
             shutil.rmtree(self.temp_dir)
         self.temp_dir = tempfile.mkdtemp() # Create a new temporary directory for next run
         # Also clean the 'outputs' directory for old zip files to prevent disk overuse
         output_files = os.listdir("outputs")
         for f in output_files:
@@ -361,7 +361,7 @@ def process_story_gradio(story_text: str, character_description: str, style: str
     Gradio-compatible wrapper function for the main story processing.
     Yields updates for Gradio UI components.
     """
     generator.cleanup() # Clean up temp files from previous runs at the start of a new request
     if not story_text.strip():
@@ -374,16 +374,16 @@ def process_story_gradio(story_text: str, character_description: str, style: str
             None  # No zip
         )
         return
     try:
         # Load model if not already loaded (this is optimized to run once per Space lifecycle)
         progress(0, desc="Initializing AI model... (This happens once after Space starts or resets)")
         generator.load_model()
         # Segment the story
         progress(0.05, desc="Analyzing story structure and preparing segments...")
         segments = generator.segment_story(story_text)
         if not segments:
             yield (
                 "Error: Could not segment the story. Please try a longer or more detailed story.",
@@ -393,10 +393,10 @@ def process_story_gradio(story_text: str, character_description: str, style: str
                 None
             )
             return
         total_segments = len(segments)
         initial_status_message = f"Story analyzed! Will generate {total_segments} video segments (approx. {total_segments * 10} seconds total)."
         # Initial yield: show segment count
         yield (
             initial_status_message,
@@ -414,21 +414,21 @@ def process_story_gradio(story_text: str, character_description: str, style: str
         # Generate a base seed for overall character consistency across segments
         # Using a hash of both character description and the story for more unique runs
         generator.current_seed = abs(hash(character_description.strip() + story_text.strip())) % (2**32 - 1)
         generated_video_paths = []
         generated_image_paths_for_gallery = []
         for i, segment_text in enumerate(segments):
             segment_idx = i + 1
             # --- Step 1: Update status and show current prompt details ---
             current_status_message = f"Processing segment {segment_idx} of {total_segments}..."
             progress(0.1 + (0.8 * (i / total_segments)), desc=current_status_message) # Progress from 10% to 90%
             prompt, negative_prompt = generator.create_prompt(
                 segment_text, character_description, style, i
             )
             # Prepare HTML for current segment details
             segment_details_html = f"""
             <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
@@ -438,7 +438,7 @@ def process_story_gradio(story_text: str, character_description: str, style: str
                 <p><em>{prompt}</em></p>
             </div>
             """
             yield (
                 current_status_message,
                 segment_details_html,
@@ -451,18 +451,18 @@ def process_story_gradio(story_text: str, character_description: str, style: str
             # --- Step 2: Generate Image ---
             progress(0.1 + (0.8 * (i / total_segments)) + 0.02, desc=f"Generating image for segment {segment_idx}...")
             image = generator.generate_image(prompt, negative_prompt, i)
             # Save image for the gallery (important to save to a persistent temp path)
             img_filename = f"segment_{segment_idx}_image_{int(time.time())}.png"
             img_path = os.path.join(generator.temp_dir, img_filename)
             image.save(img_path)
             generated_image_paths_for_gallery.append(img_path)
             # --- Step 3: Create Video Clip ---
             progress(0.1 + (0.8 * (i / total_segments)) + 0.05, desc=f"Creating video for segment {segment_idx}...")
             video_path = generator.create_video_clip(image, segment_text)
             generated_video_paths.append(video_path)
             # --- Step 4: Yield current segment's video and updated gallery ---
             current_status_message = f"Segment {segment_idx} of {total_segments} completed! Video ready."
             yield (
@@ -476,12 +476,12 @@ def process_story_gradio(story_text: str, character_description: str, style: str
         # --- Final Step: Generate ZIP file and update final status ---
         progress(0.95, desc="All segments generated. Compiling into a downloadable ZIP file...")
         # Create a unique zip file name in the 'outputs' directory
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         zip_filename = f"story_videos_{timestamp}.zip"
         final_zip_path = os.path.join("outputs", zip_filename)
         with zipfile.ZipFile(final_zip_path, 'w') as zipf:
             for idx, vid_path in enumerate(generated_video_paths):
                 # Only add if file exists and is not a directory
@@ -490,10 +490,10 @@ def process_story_gradio(story_text: str, character_description: str, style: str
             for idx, img_path in enumerate(generated_image_paths_for_gallery):
                 if os.path.isfile(img_path):
                     zipf.write(img_path, os.path.basename(img_path)) # Add corresponding image to zip
         final_status_message = f"✅ Story video generation complete! All {total_segments} segments generated and available for download."
         progress(1.0, desc="Complete!")
         yield (
             final_status_message,
             "<p>All segments have been processed. Download the complete ZIP file below!</p>",
@@ -501,7 +501,7 @@ def process_story_gradio(story_text: str, character_description: str, style: str
             generated_image_paths_for_gallery, # Final state of the gallery
             final_zip_path # Provide the path to the downloadable ZIP
         )
     except Exception as e:
         import traceback
         print(f"An unexpected error occurred: {e}")
@@ -519,15 +519,15 @@ def process_story_gradio(story_text: str, character_description: str, style: str
 # --- Gradio Interface Definition ---
 def create_interface():
     """Create the Gradio interface"""
     with gr.Blocks(title="AI Text-to-Video Story Generator", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🎬 AI Text-to-Video Story Generator
         Transform your written stories into animated video sequences! This tool breaks your story into segments
         and creates a 10-second video clip for each part, maintaining character consistency throughout.
         **Features:**
         - ✨ Converts text stories to video sequences
         - 🎭 Maintains character consistency across segments
@@ -535,7 +535,7 @@ def create_interface():
         - 📱 Optimized for free-tier CPU processing
         - 📦 Download individual clips or complete ZIP package
         """)
         with gr.Row():
             with gr.Column(scale=2):
                 story_input = gr.Textbox(
@@ -545,14 +545,14 @@ def create_interface():
                     max_lines=15,
                     info="Write your complete story here. It will be split into 10-second video segments. Keep it concise for quicker results (e.g., 3-10 sentences)."
                 )
                 character_input = gr.Textbox(
                     label="👤 Main Character Description",
                     placeholder="Describe your main character's appearance (e.g., 'a young woman with long brown hair, wearing a blue dress, kind eyes')",
                     lines=3,
                     info="Provide a detailed description of your main character to help the AI maintain their consistent appearance throughout the video. This is crucial for consistency!"
                 )
                 style_dropdown = gr.Dropdown(
                     label="🎨 Art Style",
                     choices=[
@@ -565,31 +565,31 @@ def create_interface():
                     value="digital_art", # Default to digital art
                     info="Select the artistic style for your video segments. This affects the overall visual look."
                 )
                 generate_btn = gr.Button("🎬 Generate Story Videos", variant="primary", size="lg")
             with gr.Column(scale=1):
                 gr.Markdown("""
                 ### 💡 Tips for Best Results:
                 **Story Writing:**
                 -   Aim for **3-10 sentences** in your story. Each will likely become a 10-second segment.
                 -   Include **clear actions and locations** for your character (e.g., "walking in the forest").
                 -   Describe **scenes vividly** to help the AI generate relevant visuals.
                 **Character Description:**
                 -   Be **specific** about appearance (e.g., "blue eyes," "red cloak," "short stature").
                 -   Include **clothing or distinctive features** for better consistency across videos.
                 **Processing Time:**
                 -   This application runs on **free-tier CPU hardware**.
                 -   Each 10-second segment can take **1-3 minutes** to generate.
                 -   Please be patient! **Progress updates** will keep you informed.
                 -   If it seems stuck, check the logs in the "Logs" tab of your Space.
                 """)
         gr.Markdown("---")
         # Output sections
         status_output = gr.Textbox(
             label="📊 Generation Status",
@@ -597,7 +597,7 @@ def create_interface():
             interactive=False,
             value="Enter your story and click 'Generate' to begin!"
         )
         # HTML output for detailed current segment text and AI prompt
         current_segment_details_html = gr.HTML(
             label="Current Segment Details & AI Prompt",
@@ -614,7 +614,7 @@ def create_interface():
                 autoplay=True, # Auto-play the new segment when it loads
                 show_share_button=False # Hide share button
             )
             # Gallery to show generated images cumulatively
             image_gallery = gr.Gallery(
                 label="🖼️ Generated Images (Overall Story Visuals)",
@@ -641,20 +641,20 @@ def create_interface():
         generate_btn.click(
             fn=process_story_gradio,
             inputs=[
-                story_input,
-                character_input,
                 style_dropdown
             ],
             outputs=[
-                status_output,          # 1. status_message (str)
-                current_segment_details_html, # 2. segment_details_html (str HTML)
-                current_video_preview,  # 3. video_path (str filepath)
-                image_gallery,          # 4. image_paths (List[str] filepaths)
-                download_zip_file       # 5. zip_file_path (str filepath)
-            ),
-            api_name="generate_story_video", # Optional: for API endpoint if deployed
             concurrency_limit=1 # CRUCIAL: Ensures only one user can run at a time, for free tier
-        )
         # Examples for quick testing
         gr.Examples(
@@ -676,12 +676,12 @@ def create_interface():
                 ]
             ],
             inputs=[
-                story_input,
-                character_input,
                 style_dropdown
             ],
-            label="Try these example stories!",
-            # Uncomment the line below if you want examples to run automatically when clicked
             # fn=process_story_gradio, outputs=[status_output, current_segment_details_html, current_video_preview, image_gallery, download_zip_file]
         )
@@ -691,6 +691,5 @@ def create_interface():
 if __name__ == "__main__":
     app = create_interface()
     # Set queue and concurrency_count to 1 for free tier to prevent overload and timeouts
-    app.queue(max_size=1, concurrency_count=1)
     app.launch()

         self.pipe = None
         self.temp_dir = tempfile.mkdtemp() # Create a unique temporary directory
         self.current_seed = 42 # Base seed for consistency across runs
         # Ensure outputs directory exists (for the final ZIP file)
         os.makedirs("outputs", exist_ok=True)
     def load_model(self):
         """Load the Stable Diffusion model optimized for CPU"""
         if self.pipe is None:
             self.pipe.enable_vae_slicing()       # Memory optimization
             self.pipe.enable_sequential_cpu_offload() # Aggressive CPU offload for large models
             print("Stable Diffusion model loaded successfully on CPU.")
     def segment_story(self, story: str, max_segments: int = 10) -> List[str]:
         """Break story into logical segments for video generation"""
         # Clean the story text
         story = re.sub(r'\s+', ' ', story.strip())
         # Split into sentences
         sentences = nltk.sent_tokenize(story)
         segments = []
         current_segment_sentences = []
         current_word_count = 0
         # Aim for segments of roughly 25-35 words for 10 seconds of video.
         # A common speaking rate is 120-150 words per minute, so ~20-25 words per 10 seconds.
         # We'll use 30 words as a target, allowing for some flexibility.
         words_per_segment_target = 30
         for sentence in sentences:
             sentence_word_count = len(sentence.split())
             # If adding this sentence exceeds the target AND we already have some content,
             # finalize the current segment and start a new one.
             if current_word_count + sentence_word_count > words_per_segment_target and current_segment_sentences:
                 # Add sentence to current segment
                 current_segment_sentences.append(sentence)
                 current_word_count += sentence_word_count
         # Add any remaining segment
         if current_segment_sentences:
             segments.append(' '.join(current_segment_sentences))
         # Limit to max_segments to prevent excessively long generation times
         if len(segments) > max_segments:
             print(f"Warning: Story has {len(segments)} segments, truncating to {max_segments}.")
             segments = segments[:max_segments]
         return segments
     def create_prompt(self, segment_text: str, character_desc: str, style: str, segment_num: int) -> Tuple[str, str]:
         """Create optimized prompt and negative prompt for image generation"""
         # Extract key elements from segment
         actions = self.extract_actions(segment_text)
         location = self.extract_location(segment_text)
         mood = self.extract_mood(segment_text)
         # Define style mapping for diverse visuals
         style_map = {
             "cartoon": "vibrant cartoon style, clean lines, expressive, playful, children's book illustration",
             "anime": "anime style, expressive, dynamic poses, cel-shaded, vibrant colors, Japanese animation aesthetic, detailed eyes"
         }
         selected_style = style_map.get(style, "highly detailed, artistic, professional illustration")
         # Build comprehensive prompt
         prompt_parts = []
         # Character description first for consistency emphasis and core subject
         if character_desc:
             prompt_parts.append(f"A single {character_desc}")
             prompt_parts.append(f"is {actions}")
         if location:
             prompt_parts.append(f"in {location}")
         # Include the original segment text for additional context for the AI
         prompt_parts.append(f"Scene depicts: {segment_text}")
         # Add mood last, to influence atmosphere
         if mood:
             prompt_parts.append(f"with a {mood} atmosphere")
         prompt_parts.append(selected_style)
         prompt_parts.append("masterpiece, best quality, ultra detailed, 8k, volumetric lighting, rich color, film still, professional")
         final_prompt = ", ".join([p for p in prompt_parts if p and p.strip() != ''])
         # Comprehensive negative prompt to avoid common Stable Diffusion flaws
         negative_prompt = "blurry, low quality, distorted, deformed, ugly, bad anatomy, extra limbs, missing limbs, poorly drawn hands, poorly drawn feet, out of frame, tiling, watermark, signature, text, noisy, grainy, blurred, disfigured, monochrome, grayscale, low resolution, bad composition, amateur, multiple characters, crowd, duplicate, unrealistic, abstract, painting, drawing, cartoon, sketch, render, CGI, 3D"
         return final_prompt, negative_prompt
     def extract_actions(self, text: str) -> str:
         """Extract main actions from text segment (improved with more variety)"""
         action_keywords = {
             'observe': 'observing quietly', 'listen': 'listening attentively', 'create': 'creating something',
             'destroy': 'destroying something', 'hide': 'hiding stealthily', 'search': 'searching diligently'
         }
         text_lower = text.lower()
         found_actions = []
         for keyword, description in action_keywords.items():
             if keyword in text_lower:
                 found_actions.append(description)
         return ', '.join(found_actions[:3]) if found_actions else "engaging with the environment" # Limit to 3 actions
     def extract_location(self, text: str) -> str:
         """Extract location/setting from text segment (improved with specific descriptions)"""
         location_keywords = {
             'desert': 'a vast, arid desert landscape', 'ocean': 'a deep blue ocean surface', 'space': 'the vastness of outer space',
             'ship': 'a large sailing ship on the sea', 'train': 'inside a moving train carriage', 'plane': 'inside an airplane cockpit'
         }
         text_lower = text.lower()
         for keyword, description in location_keywords.items():
             if keyword in text_lower:
                 return description
         return "a richly detailed background setting" # More descriptive default if no specific location found
     def extract_mood(self, text: str) -> str:
         """Extract mood/atmosphere from text segment (improved with evocative descriptions)"""
         mood_keywords = {
             'gloomy': 'dark and oppressive, rainy, desolate', 'joyful': 'radiant with happiness, sparkling light',
             'adventure': 'adventurous and daring, sense of discovery, wide open spaces'
         }
         text_lower = text.lower()
         for mood, description in mood_keywords.items():
             if mood in text_lower:
                 return description
         return "a fitting atmosphere" # Default for a general mood
     def generate_image(self, prompt: str, negative_prompt: str, segment_num: int) -> Image.Image:
         """Generate image for a story segment"""
         # Use consistent base seed for character consistency, adjusted per segment
         seed = self.current_seed + segment_num
         generator = torch.Generator(device=self.device).manual_seed(seed)
         # Generate image
         print(f"Generating image with prompt: {prompt[:150]}...")
         with torch.no_grad(): # Disable gradient calculations for inference
                     height=512,
                     width=512
                 )
         return result.images[0]
     def create_video_clip(self, image: Image.Image, text: str, duration: int = 10) -> str:
         """Create a video clip from image with text overlay and motion"""
         # Resize image to 512x512 if it's not already (ensures consistent video size)
         image = image.resize((512, 512), Image.Resampling.LANCZOS) # Use LANCZOS for high quality resizing
         # Convert PIL Image to NumPy array for MoviePy
         img_array = np.array(image)
         # Create ImageClip from NumPy array
         clip = ImageClip(img_array, duration=duration)
         # Add subtle Ken Burns effect (zoom + pan)
         # Zoom from 1.0 to 1.15 over the duration
         clip = clip.fx(vfx.resize, lambda t: 1 + 0.15 * t / duration)
         # Subtly pan (e.g., from top-left to bottom-right or vice-versa)
         # This is a fixed slight pan that goes over the duration of the clip
         start_x_offset = 0.05
         start_y_offset = 0.05
         clip = clip.fx(vfx.scroll, w=clip.w, h=clip.h, x_speed=lambda t: start_x_offset * clip.w / duration, y_speed=lambda t: start_y_offset * clip.h / duration)
         # Create text overlay using MoviePy's TextClip
         try:
             # Look for common font paths on Linux systems
             font_path_for_moviepy = None
             for p in ["/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+                      "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
+                      "/usr/share/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", # Some systems have it here
+                      "/usr/share/fonts/truetype/msttcorefonts/Arial.ttf", # Another common path for Arial
+                      "/usr/share/fonts/truetype/arial.ttf"]: # Try Arial too
                 if os.path.exists(p):
                     font_path_for_moviepy = p
                     break
             if font_path_for_moviepy:
                 # Use a larger font size that scales with 512x512 video
                 text_clip = TextClip(
                     method='caption',
                     size=(clip.w * 0.9, None)
                 ).set_duration(duration).set_position(('center', 'bottom')).set_margin(bottom=30)
         except Exception as e:
             print(f"Error creating MoviePy TextClip with specific font: {e}. Falling back to generic font.")
             text_clip = TextClip(
         # Composite video with text
         final_clip = CompositeVideoClip([clip, text_clip])
         # Export video
         # Using a unique filename with PID to avoid conflicts if multiple runs happen very fast
         output_path = os.path.join(self.temp_dir, f"segment_{int(time.time())}_{os.getpid()}.mp4")
         print(f"Exporting video to {output_path}...")
         final_clip.write_videofile(
             output_path,
             preset='medium'  # 'medium' preset for balance of speed and quality on CPU
         )
         print(f"Video exported to {output_path}")
         # Close clips to free resources, crucial for MoviePy
         clip.close()
         text_clip.close()
         final_clip.close()
         return output_path
     def cleanup(self):
         """Clean up temporary files and directories"""
         print(f"Cleaning up temporary directory: {self.temp_dir}")
         if os.path.exists(self.temp_dir):
             shutil.rmtree(self.temp_dir)
         self.temp_dir = tempfile.mkdtemp() # Create a new temporary directory for next run
         # Also clean the 'outputs' directory for old zip files to prevent disk overuse
         output_files = os.listdir("outputs")
         for f in output_files:
     Gradio-compatible wrapper function for the main story processing.
     Yields updates for Gradio UI components.
     """
     generator.cleanup() # Clean up temp files from previous runs at the start of a new request
     if not story_text.strip():
             None  # No zip
         )
         return
     try:
         # Load model if not already loaded (this is optimized to run once per Space lifecycle)
         progress(0, desc="Initializing AI model... (This happens once after Space starts or resets)")
         generator.load_model()
         # Segment the story
         progress(0.05, desc="Analyzing story structure and preparing segments...")
         segments = generator.segment_story(story_text)
         if not segments:
             yield (
                 "Error: Could not segment the story. Please try a longer or more detailed story.",
                 None
             )
             return
         total_segments = len(segments)
         initial_status_message = f"Story analyzed! Will generate {total_segments} video segments (approx. {total_segments * 10} seconds total)."
         # Initial yield: show segment count
         yield (
             initial_status_message,
         # Generate a base seed for overall character consistency across segments
         # Using a hash of both character description and the story for more unique runs
         generator.current_seed = abs(hash(character_description.strip() + story_text.strip())) % (2**32 - 1)
         generated_video_paths = []
         generated_image_paths_for_gallery = []
         for i, segment_text in enumerate(segments):
             segment_idx = i + 1
             # --- Step 1: Update status and show current prompt details ---
             current_status_message = f"Processing segment {segment_idx} of {total_segments}..."
             progress(0.1 + (0.8 * (i / total_segments)), desc=current_status_message) # Progress from 10% to 90%
             prompt, negative_prompt = generator.create_prompt(
                 segment_text, character_description, style, i
             )
             # Prepare HTML for current segment details
             segment_details_html = f"""
             <div style='background-color: #e0f7fa; padding: 15px; border-radius: 8px; margin-top: 10px;'>
                 <p><em>{prompt}</em></p>
             </div>
             """
             yield (
                 current_status_message,
                 segment_details_html,
             # --- Step 2: Generate Image ---
             progress(0.1 + (0.8 * (i / total_segments)) + 0.02, desc=f"Generating image for segment {segment_idx}...")
             image = generator.generate_image(prompt, negative_prompt, i)
             # Save image for the gallery (important to save to a persistent temp path)
             img_filename = f"segment_{segment_idx}_image_{int(time.time())}.png"
             img_path = os.path.join(generator.temp_dir, img_filename)
             image.save(img_path)
             generated_image_paths_for_gallery.append(img_path)
             # --- Step 3: Create Video Clip ---
             progress(0.1 + (0.8 * (i / total_segments)) + 0.05, desc=f"Creating video for segment {segment_idx}...")
             video_path = generator.create_video_clip(image, segment_text)
             generated_video_paths.append(video_path)
             # --- Step 4: Yield current segment's video and updated gallery ---
             current_status_message = f"Segment {segment_idx} of {total_segments} completed! Video ready."
             yield (
         # --- Final Step: Generate ZIP file and update final status ---
         progress(0.95, desc="All segments generated. Compiling into a downloadable ZIP file...")
         # Create a unique zip file name in the 'outputs' directory
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         zip_filename = f"story_videos_{timestamp}.zip"
         final_zip_path = os.path.join("outputs", zip_filename)
         with zipfile.ZipFile(final_zip_path, 'w') as zipf:
             for idx, vid_path in enumerate(generated_video_paths):
                 # Only add if file exists and is not a directory
             for idx, img_path in enumerate(generated_image_paths_for_gallery):
                 if os.path.isfile(img_path):
                     zipf.write(img_path, os.path.basename(img_path)) # Add corresponding image to zip
         final_status_message = f"✅ Story video generation complete! All {total_segments} segments generated and available for download."
         progress(1.0, desc="Complete!")
         yield (
             final_status_message,
             "<p>All segments have been processed. Download the complete ZIP file below!</p>",
             generated_image_paths_for_gallery, # Final state of the gallery
             final_zip_path # Provide the path to the downloadable ZIP
         )
     except Exception as e:
         import traceback
         print(f"An unexpected error occurred: {e}")
 # --- Gradio Interface Definition ---
 def create_interface():
     """Create the Gradio interface"""
     with gr.Blocks(title="AI Text-to-Video Story Generator", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🎬 AI Text-to-Video Story Generator
         Transform your written stories into animated video sequences! This tool breaks your story into segments
         and creates a 10-second video clip for each part, maintaining character consistency throughout.
         **Features:**
         - ✨ Converts text stories to video sequences
         - 🎭 Maintains character consistency across segments
         - 📱 Optimized for free-tier CPU processing
         - 📦 Download individual clips or complete ZIP package
         """)
         with gr.Row():
             with gr.Column(scale=2):
                 story_input = gr.Textbox(
                     max_lines=15,
                     info="Write your complete story here. It will be split into 10-second video segments. Keep it concise for quicker results (e.g., 3-10 sentences)."
                 )
                 character_input = gr.Textbox(
                     label="👤 Main Character Description",
                     placeholder="Describe your main character's appearance (e.g., 'a young woman with long brown hair, wearing a blue dress, kind eyes')",
                     lines=3,
                     info="Provide a detailed description of your main character to help the AI maintain their consistent appearance throughout the video. This is crucial for consistency!"
                 )
                 style_dropdown = gr.Dropdown(
                     label="🎨 Art Style",
                     choices=[
                     value="digital_art", # Default to digital art
                     info="Select the artistic style for your video segments. This affects the overall visual look."
                 )
                 generate_btn = gr.Button("🎬 Generate Story Videos", variant="primary", size="lg")
             with gr.Column(scale=1):
                 gr.Markdown("""
                 ### 💡 Tips for Best Results:
                 **Story Writing:**
                 -   Aim for **3-10 sentences** in your story. Each will likely become a 10-second segment.
                 -   Include **clear actions and locations** for your character (e.g., "walking in the forest").
                 -   Describe **scenes vividly** to help the AI generate relevant visuals.
                 **Character Description:**
                 -   Be **specific** about appearance (e.g., "blue eyes," "red cloak," "short stature").
                 -   Include **clothing or distinctive features** for better consistency across videos.
                 **Processing Time:**
                 -   This application runs on **free-tier CPU hardware**.
                 -   Each 10-second segment can take **1-3 minutes** to generate.
                 -   Please be patient! **Progress updates** will keep you informed.
                 -   If it seems stuck, check the logs in the "Logs" tab of your Space.
                 """)
         gr.Markdown("---")
         # Output sections
         status_output = gr.Textbox(
             label="📊 Generation Status",
             interactive=False,
             value="Enter your story and click 'Generate' to begin!"
         )
         # HTML output for detailed current segment text and AI prompt
         current_segment_details_html = gr.HTML(
             label="Current Segment Details & AI Prompt",
                 autoplay=True, # Auto-play the new segment when it loads
                 show_share_button=False # Hide share button
             )
             # Gallery to show generated images cumulatively
             image_gallery = gr.Gallery(
                 label="🖼️ Generated Images (Overall Story Visuals)",
         generate_btn.click(
             fn=process_story_gradio,
             inputs=[
+                story_input,
+                character_input,
                 style_dropdown
             ],
             outputs=[
+                status_output,
+                current_segment_details_html,
+                current_video_preview,
+                image_gallery,
+                download_zip_file
+            ], # CORRECTED: This closes the 'outputs' list.
+            api_name="generate_story_video",
             concurrency_limit=1 # CRUCIAL: Ensures only one user can run at a time, for free tier
+        ) # This correctly closes the 'generate_btn.click' method call
         # Examples for quick testing
         gr.Examples(
                 ]
             ],
             inputs=[
+                story_input,
+                character_input,
                 style_dropdown
             ],
+            label="Try these example stories!"
+            # You can uncomment the line below if you want examples to run automatically when clicked
             # fn=process_story_gradio, outputs=[status_output, current_segment_details_html, current_video_preview, image_gallery, download_zip_file]
         )
 if __name__ == "__main__":
     app = create_interface()
     # Set queue and concurrency_count to 1 for free tier to prevent overload and timeouts
+    app.queue(max_size=1, concurrency_count=1)
     app.launch()