Spaces:

TheAwakenOne
/

Cosmos-Predict2-2B-Text2Image

Running on Zero

App Files Files Community

TheAwakenOne commited on 3 days ago

Commit

79b4b89

verified ·

1 Parent(s): e98097c

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -197

app.py CHANGED Viewed

@@ -3,225 +3,172 @@
 Cosmos-Predict2 for Hugging Face Spaces ZeroGPU
 """
 import os
 import gradio as gr
-import torch
 import spaces
-from diffusers import DiffusionPipeline
 import gc
-from typing import Optional
 import warnings
 # Suppress warnings for cleaner output
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
-class CosmosZeroGPUApp:
-    def __init__(self):
-        self.pipe = None
-        self.model_loaded = False
-        print("🌌 Cosmos-Predict2 ZeroGPU App Starting...")
-    def get_memory_info(self):
-        """Get current memory usage - simplified for ZeroGPU"""
-        if torch.cuda.is_available():
-            vram_used = torch.cuda.memory_allocated(0) / 1024**3
-            return f"GPU Memory Used: {vram_used:.1f}GB (H200 - 70GB Available)"
         else:
-            return "GPU: Not allocated (ZeroGPU will assign when needed)"
-    @spaces.GPU(duration=300)  # 5 minutes for model loading
-    def load_model(self, progress=gr.Progress()):
-        """Load model with ZeroGPU"""
-        if self.model_loaded:
-            return "✅ Model already loaded!", self.get_memory_info()
-        try:
-            progress(0.1, desc="🔄 Initializing ZeroGPU...")
-            # ZeroGPU automatically handles device allocation
-            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            print(f"🎮 Using device: {device}")
-            progress(0.3, desc="📥 Loading Cosmos-Predict2 model...")
-            model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
-            # Load model - much simpler with 70GB VRAM!
-            self.pipe = DiffusionPipeline.from_pretrained(
-                model_id,
-                torch_dtype=torch.bfloat16,  # Use bfloat16 for better performance
-                device_map="auto",
-                use_safetensors=True,
-                trust_remote_code=True
             )
-            progress(0.7, desc="⚡ Optimizing for H200...")
-            # Move to GPU
-            if torch.cuda.is_available():
-                self.pipe = self.pipe.to(device)
-            # Enable optimizations (optional with 70GB VRAM, but still good for speed)
-            try:
-                self.pipe.enable_attention_slicing()
-                print("✅ Attention slicing enabled")
-            except:
-                pass
-            try:
-                self.pipe.enable_xformers_memory_efficient_attention()
-                print("✅ xformers enabled")
-            except:
-                print("📝 xformers not available (optional)")
-            # Compile model for faster inference (optional)
-            try:
-                if hasattr(self.pipe, 'unet'):
-                    self.pipe.unet = torch.compile(self.pipe.unet, mode="reduce-overhead", fullgraph=True)
-                    print("✅ Model compiled for faster inference")
-            except:
-                print("📝 Model compilation not available (optional)")
-            progress(0.9, desc="🏁 Finalizing...")
-            self.model_loaded = True
-            torch.cuda.empty_cache()
-            progress(1.0, desc="✅ Ready!")
-            return "✅ Model loaded successfully on ZeroGPU H200!", self.get_memory_info()
-        except Exception as e:
-            self.model_loaded = False
-            error_msg = str(e)
-            if "401" in error_msg or "restricted" in error_msg:
-                return "❌ Access denied. Please ensure the model is publicly accessible.", self.get_memory_info()
-            return f"❌ Error loading model: {error_msg}", self.get_memory_info()
-    def unload_model(self):
-        """Unload model"""
-        if self.pipe is not None:
-            del self.pipe
-            self.pipe = None
-        self.model_loaded = False
         torch.cuda.empty_cache()
-        gc.collect()
-        return "✅ Model unloaded!", self.get_memory_info()
-    @spaces.GPU(duration=120)  # 2 minutes for generation
-    def generate_image(self, prompt, negative_prompt="", num_steps=25, guidance_scale=7.5,
-                      seed=-1, width=1024, height=1024, progress=gr.Progress()):
-        """Generate image with ZeroGPU H200"""
-        if not self.model_loaded or self.pipe is None:
-            return None, "❌ Please load the model first!", self.get_memory_info()
-        try:
-            progress(0.1, desc="🎨 Preparing generation...")
-            # With 70GB VRAM, we can use much larger resolutions!
-            max_pixels = 2048 * 2048  # 4MP max for reasonable generation times
-            current_pixels = width * height
-            if current_pixels > max_pixels:
-                # Scale down proportionally
-                scale = (max_pixels / current_pixels) ** 0.5
-                width = int(width * scale)
-                height = int(height * scale)
-                # Round to nearest 64 for compatibility
-                width = (width // 64) * 64
-                height = (height // 64) * 64
-                size_msg = f"📉 Scaled to {width}x{height} for optimal performance"
-            else:
-                size_msg = f"📈 Generating at {width}x{height}"
-            # Set seed for reproducibility
-            generator = None
-            if seed != -1:
-                generator = torch.Generator(device="cuda").manual_seed(seed)
-            progress(0.3, desc=f"🎨 Generating {width}x{height} image...")
-            print(f"🎨 Generating: {width}x{height}, {num_steps} steps, guidance: {guidance_scale}")
-            # Generate with the powerful H200!
-            with torch.inference_mode():
-                result = self.pipe(
-                    prompt=prompt,
-                    negative_prompt=negative_prompt if negative_prompt else None,
-                    num_inference_steps=num_steps,
-                    guidance_scale=guidance_scale,
-                    height=height,
-                    width=width,
-                    generator=generator,
-                    output_type="pil"
-                )
-            progress(0.9, desc="🏁 Finalizing...")
-            # Extract image
-            if hasattr(result, 'images'):
-                image = result.images[0]
-            elif isinstance(result, list):
-                image = result[0]
-            else:
-                image = result
-            # Cleanup
-            del result
-            torch.cuda.empty_cache()
-            progress(1.0, desc="✅ Complete!")
-            return image, f"✅ Generated successfully! {size_msg}", self.get_memory_info()
-        except Exception as e:
-            torch.cuda.empty_cache()
-            return None, f"❌ Generation failed: {str(e)}", self.get_memory_info()
-# Initialize app
-app = CosmosZeroGPUApp()
 # Create Gradio interface
 def create_interface():
     with gr.Blocks(title="Cosmos-Predict2 ZeroGPU", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🌌 Cosmos-Predict2 on ZeroGPU
-        **Powered by ZeroGPU • High-resolution generation • Fast inference**
-        This Space uses ZeroGPU for efficient GPU allocation. The GPU is assigned when you load the model or generate images.
         """)
         # Memory status
         memory_display = gr.Textbox(
             label="📊 GPU Status",
-            value=app.get_memory_info(),
             interactive=False
         )
         with gr.Row():
             with gr.Column():
-                # Model management
-                gr.Markdown("### 🎮 Model Management")
-                with gr.Row():
-                    load_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
-                    unload_btn = gr.Button("🗑️ Unload", variant="secondary")
-                model_status = gr.Textbox(label="Model Status", interactive=False)
                 # Generation settings
-                gr.Markdown("### 🎨 Generation Settings")
                 prompt = gr.Textbox(
                     label="Prompt",
                     placeholder="A futuristic robot in a high-tech laboratory with holographic displays...",
-                    lines=3
                 )
                 negative_prompt = gr.Textbox(
-                    label="Negative Prompt (Optional)",
-                    placeholder="blurry, low quality, distorted, ugly, deformed...",
                     lines=2
                 )
@@ -233,7 +180,9 @@ def create_interface():
                     width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
                     height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
-                seed = gr.Number(label="Seed (-1 = random)", value=-1, precision=0)
                 generate_btn = gr.Button("🎨 Generate Image", variant="primary", size="lg")
@@ -241,37 +190,28 @@ def create_interface():
                 # Output
                 output_image = gr.Image(label="Generated Image", height=600)
                 generation_status = gr.Textbox(label="Generation Status", interactive=False)
                 # ZeroGPU info
                 gr.Markdown("""
                 ### 💡 ZeroGPU Features:
                 - **70GB VRAM**: Generate high-resolution images up to 2048x2048
-                - **Dynamic allocation**: GPU assigned only when needed
                 - **H200 powered**: Latest NVIDIA architecture for fast inference
-                - **Free to use**: Available to all users (PRO users get higher priority)
-                - **Auto-optimization**: Model compilation and memory efficiency
                 """)
         # Event handlers
-        load_btn.click(
-            app.load_model,
-            outputs=[model_status, memory_display]
-        )
-        unload_btn.click(
-            app.unload_model,
-            outputs=[model_status, memory_display]
-        )
         generate_btn.click(
-            app.generate_image,
-            inputs=[prompt, negative_prompt, steps, guidance, seed, width, height],
-            outputs=[output_image, generation_status, memory_display]
         )
         # Auto-refresh memory status
         def refresh_memory():
-            return app.get_memory_info()
         # Update memory display every 10 seconds
         gr.Timer(value=10).tick(refresh_memory, outputs=[memory_display])
@@ -284,7 +224,9 @@ def create_interface():
                 ["A futuristic space station orbiting Earth, with solar panels and docking bays, sci-fi concept art, cinematic"],
                 ["A serene Japanese garden with cherry blossoms, koi pond, and traditional architecture, peaceful atmosphere, masterpiece"],
                 ["A steampunk mechanical owl with brass gears and copper pipes, intricate details, vintage engineering"],
-                ["An underwater city with bioluminescent coral and glass domes, marine life swimming around, fantasy architecture"]
             ],
             inputs=[prompt],
             label="🎨 Example Prompts (optimized for high-resolution generation)"
@@ -293,11 +235,12 @@ def create_interface():
         # Usage tips
         gr.Markdown("""
         ### 🚀 Usage Tips:
-        1. **First time**: Click "Load Model" to download and initialize Cosmos-Predict2
         2. **High-res**: Try resolutions up to 2048x2048 with the powerful H200 GPU
         3. **Quality**: Use 25-30 steps for high quality, 15-20 for faster generation
         4. **Prompts**: Be descriptive and specific for best results
-        5. **Negative prompts**: Help avoid unwanted elements in your images
         """)
     return interface

 Cosmos-Predict2 for Hugging Face Spaces ZeroGPU
 """
+import subprocess
 import os
+# Install flash-attn for better performance
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True
+)
 import gradio as gr
 import spaces
+import torch
+from diffusers import Cosmos2TextToImagePipeline
+from transformers import AutoModelForCausalLM, SiglipProcessor
+import random
 import gc
 import warnings
 # Suppress warnings for cleaner output
 warnings.filterwarnings("ignore", category=UserWarning)
 warnings.filterwarnings("ignore", category=FutureWarning)
+# Add flash_attention_2 to the safeguard model for better performance
+def patch_from_pretrained(cls):
+    orig_method = cls.from_pretrained
+    def new_from_pretrained(*args, **kwargs):
+        kwargs.setdefault("attn_implementation", "flash_attention_2")
+        kwargs.setdefault("torch_dtype", torch.bfloat16)
+        return orig_method(*args, **kwargs)
+    cls.from_pretrained = new_from_pretrained
+patch_from_pretrained(AutoModelForCausalLM)
+# Add a `use_fast` to the safeguard image processor
+def patch_processor_fast(cls):
+    orig_method = cls.from_pretrained
+    def new_from_pretrained(*args, **kwargs):
+        kwargs.setdefault("use_fast", True)
+        return orig_method(*args, **kwargs)
+    cls.from_pretrained = new_from_pretrained
+patch_processor_fast(SiglipProcessor)
+print("🌌 Loading Cosmos-Predict2 model...")
+# Load the model at startup
+model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
+pipe = Cosmos2TextToImagePipeline.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16
+)
+pipe.to("cuda")
+print("✅ Cosmos-Predict2 model loaded successfully!")
+# Default negative prompt for better quality
+DEFAULT_NEGATIVE_PROMPT = "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality."
+def get_memory_info():
+    """Get current memory usage"""
+    if torch.cuda.is_available():
+        vram_used = torch.cuda.memory_allocated(0) / 1024**3
+        return f"GPU Memory Used: {vram_used:.1f}GB (H200 - 70GB Available)"
+    else:
+        return "GPU: Not allocated (ZeroGPU will assign when needed)"
+@spaces.GPU(duration=120)  # 2 minutes for generation
+def generate_image(prompt, negative_prompt="", num_steps=25, guidance_scale=7.5,
+                  seed=-1, width=1024, height=1024, randomize_seed=True,
+                  progress=gr.Progress(track_tqdm=True)):
+    """Generate image with ZeroGPU H200"""
+    try:
+        # Handle seed
+        if randomize_seed or seed == -1:
+            actual_seed = random.randint(0, 1000000)
         else:
+            actual_seed = seed
+        generator = torch.Generator().manual_seed(actual_seed)
+        # Use default negative prompt if none provided
+        if not negative_prompt.strip():
+            negative_prompt = DEFAULT_NEGATIVE_PROMPT
+        # With 70GB VRAM, we can use much larger resolutions!
+        max_pixels = 2048 * 2048  # 4MP max for reasonable generation times
+        current_pixels = width * height
+        if current_pixels > max_pixels:
+            # Scale down proportionally
+            scale = (max_pixels / current_pixels) ** 0.5
+            width = int(width * scale)
+            height = int(height * scale)
+            # Round to nearest 64 for compatibility
+            width = (width // 64) * 64
+            height = (height // 64) * 64
+            size_msg = f"📉 Scaled to {width}x{height} for optimal performance"
+        else:
+            size_msg = f"📈 Generating at {width}x{height}"
+        print(f"🎨 Generating: {width}x{height}, {num_steps} steps, guidance: {guidance_scale}, seed: {actual_seed}")
+        # Generate with the powerful H200!
+        with torch.inference_mode():
+            result = pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                num_inference_steps=num_steps,
+                guidance_scale=guidance_scale,
+                height=height,
+                width=width,
+                generator=generator
             )
+        # Extract image
+        if hasattr(result, 'images'):
+            image = result.images[0]
+        elif isinstance(result, list):
+            image = result[0]
+        else:
+            image = result
+        # Cleanup
+        del result
         torch.cuda.empty_cache()
+        return image, f"✅ Generated successfully! {size_msg} (Seed: {actual_seed})", get_memory_info(), actual_seed
+    except Exception as e:
+        torch.cuda.empty_cache()
+        return None, f"❌ Generation failed: {str(e)}", get_memory_info(), seed
 # Create Gradio interface
 def create_interface():
     with gr.Blocks(title="Cosmos-Predict2 ZeroGPU", theme=gr.themes.Soft()) as interface:
         gr.Markdown("""
         # 🌌 Cosmos-Predict2 on ZeroGPU
+        **Powered by Huggingface Spaces • High-resolution generation • Fast inference**
+        This Space uses ZeroGPU for efficient GPU allocation. The model is pre-loaded and ready to generate!
         """)
         # Memory status
         memory_display = gr.Textbox(
             label="📊 GPU Status",
+            value=get_memory_info(),
             interactive=False
         )
         with gr.Row():
             with gr.Column():
                 # Generation settings
+                gr.Markdown("### 🎨 Generate High-Quality Images")
                 prompt = gr.Textbox(
                     label="Prompt",
                     placeholder="A futuristic robot in a high-tech laboratory with holographic displays...",
+                    lines=4,
+                    value="A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface."
                 )
                 negative_prompt = gr.Textbox(
+                    label="Negative Prompt (Optional - has smart default)",
+                    placeholder="Leave empty to use optimized default negative prompt...",
                     lines=2
                 )
                     width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
                     height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
+                with gr.Row():
+                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+                    seed = gr.Number(label="Seed", value=42, precision=0)
                 generate_btn = gr.Button("🎨 Generate Image", variant="primary", size="lg")
                 # Output
                 output_image = gr.Image(label="Generated Image", height=600)
                 generation_status = gr.Textbox(label="Generation Status", interactive=False)
+                seed_output = gr.Number(label="Used Seed", interactive=False)
                 # ZeroGPU info
                 gr.Markdown("""
                 ### 💡 ZeroGPU Features:
                 - **70GB VRAM**: Generate high-resolution images up to 2048x2048
+                - **Pre-loaded Model**: No waiting for model loading
                 - **H200 powered**: Latest NVIDIA architecture for fast inference
+                - **Smart defaults**: Optimized negative prompt included
+                - **Flash Attention**: Enhanced performance optimizations
                 """)
         # Event handlers
         generate_btn.click(
+            generate_image,
+            inputs=[prompt, negative_prompt, steps, guidance, seed, width, height, randomize_seed],
+            outputs=[output_image, generation_status, memory_display, seed_output]
         )
         # Auto-refresh memory status
         def refresh_memory():
+            return get_memory_info()
         # Update memory display every 10 seconds
         gr.Timer(value=10).tick(refresh_memory, outputs=[memory_display])
                 ["A futuristic space station orbiting Earth, with solar panels and docking bays, sci-fi concept art, cinematic"],
                 ["A serene Japanese garden with cherry blossoms, koi pond, and traditional architecture, peaceful atmosphere, masterpiece"],
                 ["A steampunk mechanical owl with brass gears and copper pipes, intricate details, vintage engineering"],
+                ["A well-worn broom sweeps across a dusty wooden floor, its bristles gathering crumbs and flecks of debris in swift, rhythmic strokes"],
+                ["A robotic arm tightens a bolt beneath the hood of a car, its tool head rotating with practiced torque, precision engineering"],
+                ["A nighttime city bus terminal gradually shifts from stillness to subtle movement, urban night scene with illuminated signage"]
             ],
             inputs=[prompt],
             label="🎨 Example Prompts (optimized for high-resolution generation)"
         # Usage tips
         gr.Markdown("""
         ### 🚀 Usage Tips:
+        1. **Ready to go**: Model is pre-loaded, just click generate!
         2. **High-res**: Try resolutions up to 2048x2048 with the powerful H200 GPU
         3. **Quality**: Use 25-30 steps for high quality, 15-20 for faster generation
         4. **Prompts**: Be descriptive and specific for best results
+        5. **Negative prompts**: Leave empty to use optimized defaults, or customize as needed
+        6. **Seeds**: Use randomize for variety, or set specific seed for reproducible results
         """)
     return interface