Spaces:

Munaf1987
/

replacebg

Running

App Files Files Community

Munaf1987 commited on Jul 1

Commit

7499b76

verified ·

1 Parent(s): 9ed4fc1

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -68

app.py CHANGED Viewed

@@ -152,6 +152,27 @@ class ProfessionalCartoonFilmGenerator:
             torch.cuda.empty_cache()
             gc.collect()
     def generate_professional_script(self, user_input: str) -> Dict[str, Any]:
         """Generate a professional cartoon script with detailed character development"""
@@ -417,42 +438,71 @@ class ProfessionalCartoonFilmGenerator:
                         except Exception as e:
                             print(f"⚠️ LoRA loading failed: {e}")
-                # Professional character prompt
-                prompt = f"""
-                anime style, professional cartoon character design, {character['description']},
-                character sheet style, multiple poses reference, clean white background,
-                2D animation model sheet, Disney-Pixar quality, highly detailed,
-                consistent character design, expressive face, perfect for animation,
-                {character.get('animation_style', 'high-quality character design')}
-                """
                 negative_prompt = """
                 realistic, 3D render, dark, scary, inappropriate, low quality, blurry,
                 inconsistent, amateur, simple, crude, manga, sketch
                 """
-                # Handle different pipeline types
-                if hasattr(self.flux_pipe, 'max_sequence_length'):
-                    # FLUX pipeline
-                    image = self.flux_pipe(
-                        prompt=prompt,
-                        negative_prompt=negative_prompt,
-                        num_inference_steps=25,  # High quality steps
-                        guidance_scale=3.5,
-                        height=1024,  # High resolution
-                        width=1024,
-                        max_sequence_length=256
-                    ).images[0]
-                else:
-                    # Stable Diffusion pipeline
-                    image = self.flux_pipe(
-                        prompt=prompt,
-                        negative_prompt=negative_prompt,
-                        num_inference_steps=25,  # High quality steps
-                        guidance_scale=7.5,
-                        height=1024,  # High resolution
-                        width=1024
-                    ).images[0]
                 char_path = f"{self.temp_dir}/character_{character['name'].replace(' ', '_')}.png"
                 image.save(char_path)
@@ -480,42 +530,73 @@ class ProfessionalCartoonFilmGenerator:
             try:
                 print(f"🏞️ Creating cinematic background for scene {scene['scene_number']}")
-                prompt = f"""
-                Professional cartoon background art, {scene['background']},
-                {scene['mood']} atmosphere, {color_palette} color palette,
-                cinematic composition, {scene.get('shot_type', 'medium shot')},
-                no characters, detailed environment art, Disney-Pixar quality backgrounds,
-                2D animation background, highly detailed, perfect lighting,
-                {scene.get('animation_notes', 'professional background art')}
-                """
                 negative_prompt = """
                 characters, people, animals, realistic, dark, scary, low quality,
                 blurry, simple, amateur, 3D render
                 """
-                # Handle different pipeline types for backgrounds
-                if hasattr(self.flux_pipe, 'max_sequence_length'):
-                    # FLUX pipeline
-                    image = self.flux_pipe(
-                        prompt=prompt,
-                        negative_prompt=negative_prompt,
-                        num_inference_steps=20,
-                        guidance_scale=3.0,
-                        height=768,   # 4:3 aspect ratio for traditional animation
-                        width=1024,
-                        max_sequence_length=256
-                    ).images[0]
-                else:
-                    # Stable Diffusion pipeline
-                    image = self.flux_pipe(
-                        prompt=prompt,
-                        negative_prompt=negative_prompt,
-                        num_inference_steps=20,
-                        guidance_scale=7.0,
-                        height=768,   # 4:3 aspect ratio for traditional animation
-                        width=1024
-                    ).images[0]
                 bg_path = f"{self.temp_dir}/background_scene_{scene['scene_number']}.png"
                 image.save(bg_path)
@@ -637,14 +718,17 @@ class ProfessionalCartoonFilmGenerator:
         try:
             characters_text = ", ".join(scene['characters_present'])
-            # Professional prompt for Open-Sora
-            prompt = f"""
-            Professional 2D cartoon animation, {characters_text} in {scene['background']},
-            {scene['mood']} mood, {scene.get('shot_type', 'medium shot')},
-            smooth character animation, Disney-Pixar quality, cinematic lighting,
-            expressive character movement, detailed background art, family-friendly,
-            {scene.get('animation_notes', 'high-quality animation')}
-            """
             video_path = f"{self.temp_dir}/scene_{scene['scene_number']}.mp4"

             torch.cuda.empty_cache()
             gc.collect()
+    def optimize_prompt_for_clip(self, prompt: str, max_tokens: int = 70) -> str:
+        """Optimize prompt to fit within CLIP token limit"""
+        try:
+            # Simple word-based token estimation (CLIP uses ~1.3 words per token)
+            words = prompt.split()
+            if len(words) <= max_tokens:
+                return prompt
+            # Truncate to fit within token limit
+            optimized_words = words[:max_tokens]
+            optimized_prompt = " ".join(optimized_words)
+            print(f"📝 Prompt optimized: {len(words)} words → {len(optimized_words)} words")
+            return optimized_prompt
+        except Exception as e:
+            print(f"⚠️ Prompt optimization failed: {e}")
+            # Fallback: return first 50 words
+            words = prompt.split()
+            return " ".join(words[:50])
     def generate_professional_script(self, user_input: str) -> Dict[str, Any]:
         """Generate a professional cartoon script with detailed character development"""
                         except Exception as e:
                             print(f"⚠️ LoRA loading failed: {e}")
+                # Professional character prompt (optimized for CLIP token limit)
+                character_desc = character['description'][:100]  # Limit description length
+                animation_style = character.get('animation_style', 'high-quality character design')[:50]
+                prompt = f"anime style, professional cartoon character, {character_desc}, character sheet, clean background, 2D animation, Disney quality, detailed, {animation_style}"
+                # Use the optimization function to ensure CLIP compatibility
+                prompt = self.optimize_prompt_for_clip(prompt)
                 negative_prompt = """
                 realistic, 3D render, dark, scary, inappropriate, low quality, blurry,
                 inconsistent, amateur, simple, crude, manga, sketch
                 """
+                # Handle different pipeline types with CLIP token error handling
+                try:
+                    if hasattr(self.flux_pipe, 'max_sequence_length'):
+                        # FLUX pipeline
+                        image = self.flux_pipe(
+                            prompt=prompt,
+                            negative_prompt=negative_prompt,
+                            num_inference_steps=25,  # High quality steps
+                            guidance_scale=3.5,
+                            height=1024,  # High resolution
+                            width=1024,
+                            max_sequence_length=256
+                        ).images[0]
+                    else:
+                        # Stable Diffusion pipeline
+                        image = self.flux_pipe(
+                            prompt=prompt,
+                            negative_prompt=negative_prompt,
+                            num_inference_steps=25,  # High quality steps
+                            guidance_scale=7.5,
+                            height=1024,  # High resolution
+                            width=1024
+                        ).images[0]
+                except Exception as e:
+                    if "CLIP" in str(e) and "token" in str(e).lower():
+                        print(f"⚠️ CLIP token error detected, using simplified prompt...")
+                        # Fallback to very simple prompt
+                        simple_prompt = f"anime character, {character['name']}, clean background"
+                        simple_prompt = self.optimize_prompt_for_clip(simple_prompt, max_tokens=30)
+                        if hasattr(self.flux_pipe, 'max_sequence_length'):
+                            image = self.flux_pipe(
+                                prompt=simple_prompt,
+                                negative_prompt="low quality, blurry",
+                                num_inference_steps=20,
+                                guidance_scale=3.0,
+                                height=1024,
+                                width=1024,
+                                max_sequence_length=128
+                            ).images[0]
+                        else:
+                            image = self.flux_pipe(
+                                prompt=simple_prompt,
+                                negative_prompt="low quality, blurry",
+                                num_inference_steps=20,
+                                guidance_scale=7.0,
+                                height=1024,
+                                width=1024
+                            ).images[0]
+                    else:
+                        raise e
                 char_path = f"{self.temp_dir}/character_{character['name'].replace(' ', '_')}.png"
                 image.save(char_path)
             try:
                 print(f"🏞️ Creating cinematic background for scene {scene['scene_number']}")
+                # Professional background prompt (optimized for CLIP token limit)
+                background_desc = scene['background'][:80]  # Limit background description
+                mood = scene['mood'][:30]
+                shot_type = scene.get('shot_type', 'medium shot')[:20]
+                animation_notes = scene.get('animation_notes', 'professional background art')[:40]
+                prompt = f"Professional cartoon background, {background_desc}, {mood} atmosphere, {color_palette} colors, {shot_type}, no characters, detailed environment, Disney quality, {animation_notes}"
+                # Use the optimization function to ensure CLIP compatibility
+                prompt = self.optimize_prompt_for_clip(prompt)
                 negative_prompt = """
                 characters, people, animals, realistic, dark, scary, low quality,
                 blurry, simple, amateur, 3D render
                 """
+                # Handle different pipeline types for backgrounds with CLIP token error handling
+                try:
+                    if hasattr(self.flux_pipe, 'max_sequence_length'):
+                        # FLUX pipeline
+                        image = self.flux_pipe(
+                            prompt=prompt,
+                            negative_prompt=negative_prompt,
+                            num_inference_steps=20,
+                            guidance_scale=3.0,
+                            height=768,   # 4:3 aspect ratio for traditional animation
+                            width=1024,
+                            max_sequence_length=256
+                        ).images[0]
+                    else:
+                        # Stable Diffusion pipeline
+                        image = self.flux_pipe(
+                            prompt=prompt,
+                            negative_prompt=negative_prompt,
+                            num_inference_steps=20,
+                            guidance_scale=7.0,
+                            height=768,   # 4:3 aspect ratio for traditional animation
+                            width=1024
+                        ).images[0]
+                except Exception as e:
+                    if "CLIP" in str(e) and "token" in str(e).lower():
+                        print(f"⚠️ CLIP token error detected for background, using simplified prompt...")
+                        # Fallback to very simple prompt
+                        simple_prompt = f"cartoon background, {scene['background'][:40]}, clean"
+                        simple_prompt = self.optimize_prompt_for_clip(simple_prompt, max_tokens=25)
+                        if hasattr(self.flux_pipe, 'max_sequence_length'):
+                            image = self.flux_pipe(
+                                prompt=simple_prompt,
+                                negative_prompt="characters, low quality",
+                                num_inference_steps=15,
+                                guidance_scale=3.0,
+                                height=768,
+                                width=1024,
+                                max_sequence_length=128
+                            ).images[0]
+                        else:
+                            image = self.flux_pipe(
+                                prompt=simple_prompt,
+                                negative_prompt="characters, low quality",
+                                num_inference_steps=15,
+                                guidance_scale=7.0,
+                                height=768,
+                                width=1024
+                            ).images[0]
+                    else:
+                        raise e
                 bg_path = f"{self.temp_dir}/background_scene_{scene['scene_number']}.png"
                 image.save(bg_path)
         try:
             characters_text = ", ".join(scene['characters_present'])
+            # Professional prompt for Open-Sora (optimized for CLIP token limit)
+            characters_text = characters_text[:60]  # Limit character text
+            background_desc = scene['background'][:60]
+            mood = scene['mood'][:20]
+            shot_type = scene.get('shot_type', 'medium shot')[:15]
+            animation_notes = scene.get('animation_notes', 'high-quality animation')[:30]
+            prompt = f"Professional 2D cartoon animation, {characters_text} in {background_desc}, {mood} mood, {shot_type}, smooth animation, Disney quality, cinematic lighting, {animation_notes}"
+            # Use the optimization function to ensure CLIP compatibility
+            prompt = self.optimize_prompt_for_clip(prompt)
             video_path = f"{self.temp_dir}/scene_{scene['scene_number']}.mp4"