Spaces:

TheAwakenOne
/

Cosmos-Predict2-2B-Text2Image

Running on Zero

App Files Files Community

TheAwakenOne commited on 3 days ago

Commit

5a10d46

verified ·

1 Parent(s): efc1566

Update app.py

Browse files

Files changed (1) hide show

app.py +298 -142

app.py CHANGED Viewed

@@ -1,154 +1,310 @@
-import gradio as gr
-import numpy as np
-import random
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-# @spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
 """
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
             )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
                 )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
                 )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2,  # Replace with defaults that work for your model
                 )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs=[result, seed],
-    )
 if __name__ == "__main__":
-    demo.launch()

+#!/usr/bin/env python3
+"""
+Cosmos-Predict2 for Hugging Face Spaces ZeroGPU
+Optimized for H200 with 70GB VRAM - much simpler than RTX 5080 version!
 """
+import os
+import gradio as gr
+import torch
+import spaces
+from diffusers import DiffusionPipeline
+import gc
+from typing import Optional
+import warnings
+# Suppress warnings for cleaner output
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+class CosmosZeroGPUApp:
+    def __init__(self):
+        self.pipe = None
+        self.model_loaded = False
+        print("🌌 Cosmos-Predict2 ZeroGPU App Starting...")
+    def get_memory_info(self):
+        """Get current memory usage - simplified for ZeroGPU"""
+        if torch.cuda.is_available():
+            vram_used = torch.cuda.memory_allocated(0) / 1024**3
+            return f"GPU Memory Used: {vram_used:.1f}GB (H200 - 70GB Available)"
+        else:
+            return "GPU: Not allocated (ZeroGPU will assign when needed)"
+    @spaces.GPU(duration=300)  # 5 minutes for model loading
+    def load_model(self, progress=gr.Progress()):
+        """Load model with ZeroGPU"""
+        if self.model_loaded:
+            return "✅ Model already loaded!", self.get_memory_info()
+        try:
+            progress(0.1, desc="🔄 Initializing ZeroGPU...")
+            # ZeroGPU automatically handles device allocation
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            print(f"🎮 Using device: {device}")
+            progress(0.3, desc="📥 Loading Cosmos-Predict2 model...")
+            model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
+            # Load model - much simpler with 70GB VRAM!
+            self.pipe = DiffusionPipeline.from_pretrained(
+                model_id,
+                torch_dtype=torch.bfloat16,  # Use bfloat16 for better performance
+                device_map="auto",
+                use_safetensors=True,
+                trust_remote_code=True
             )
+            progress(0.7, desc="⚡ Optimizing for H200...")
+            # Move to GPU
+            if torch.cuda.is_available():
+                self.pipe = self.pipe.to(device)
+            # Enable optimizations (optional with 70GB VRAM, but still good for speed)
+            try:
+                self.pipe.enable_attention_slicing()
+                print("✅ Attention slicing enabled")
+            except:
+                pass
+            try:
+                self.pipe.enable_xformers_memory_efficient_attention()
+                print("✅ xformers enabled")
+            except:
+                print("📝 xformers not available (optional)")
+            # Compile model for faster inference (optional)
+            try:
+                if hasattr(self.pipe, 'unet'):
+                    self.pipe.unet = torch.compile(self.pipe.unet, mode="reduce-overhead", fullgraph=True)
+                    print("✅ Model compiled for faster inference")
+            except:
+                print("📝 Model compilation not available (optional)")
+            progress(0.9, desc="🏁 Finalizing...")
+            self.model_loaded = True
+            torch.cuda.empty_cache()
+            progress(1.0, desc="✅ Ready!")
+            return "✅ Model loaded successfully on ZeroGPU H200!", self.get_memory_info()
+        except Exception as e:
+            self.model_loaded = False
+            error_msg = str(e)
+            if "401" in error_msg or "restricted" in error_msg:
+                return "❌ Access denied. Please ensure the model is publicly accessible.", self.get_memory_info()
+            return f"❌ Error loading model: {error_msg}", self.get_memory_info()
+    def unload_model(self):
+        """Unload model"""
+        if self.pipe is not None:
+            del self.pipe
+            self.pipe = None
+        self.model_loaded = False
+        torch.cuda.empty_cache()
+        gc.collect()
+        return "✅ Model unloaded!", self.get_memory_info()
+    @spaces.GPU(duration=120)  # 2 minutes for generation
+    def generate_image(self, prompt, negative_prompt="", num_steps=25, guidance_scale=7.5,
+                      seed=-1, width=1024, height=1024, progress=gr.Progress()):
+        """Generate image with ZeroGPU H200"""
+        if not self.model_loaded or self.pipe is None:
+            return None, "❌ Please load the model first!", self.get_memory_info()
+        try:
+            progress(0.1, desc="🎨 Preparing generation...")
+            # With 70GB VRAM, we can use much larger resolutions!
+            max_pixels = 2048 * 2048  # 4MP max for reasonable generation times
+            current_pixels = width * height
+            if current_pixels > max_pixels:
+                # Scale down proportionally
+                scale = (max_pixels / current_pixels) ** 0.5
+                width = int(width * scale)
+                height = int(height * scale)
+                # Round to nearest 64 for compatibility
+                width = (width // 64) * 64
+                height = (height // 64) * 64
+                size_msg = f"📉 Scaled to {width}x{height} for optimal performance"
+            else:
+                size_msg = f"📈 Generating at {width}x{height}"
+            # Set seed for reproducibility
+            generator = None
+            if seed != -1:
+                generator = torch.Generator(device="cuda").manual_seed(seed)
+            progress(0.3, desc=f"🎨 Generating {width}x{height} image...")
+            print(f"🎨 Generating: {width}x{height}, {num_steps} steps, guidance: {guidance_scale}")
+            # Generate with the powerful H200!
+            with torch.inference_mode():
+                result = self.pipe(
+                    prompt=prompt,
+                    negative_prompt=negative_prompt if negative_prompt else None,
+                    num_inference_steps=num_steps,
+                    guidance_scale=guidance_scale,
+                    height=height,
+                    width=width,
+                    generator=generator,
+                    output_type="pil"
                 )
+            progress(0.9, desc="🏁 Finalizing...")
+            # Extract image
+            if hasattr(result, 'images'):
+                image = result.images[0]
+            elif isinstance(result, list):
+                image = result[0]
+            else:
+                image = result
+            # Cleanup
+            del result
+            torch.cuda.empty_cache()
+            progress(1.0, desc="✅ Complete!")
+            return image, f"✅ Generated successfully! {size_msg}", self.get_memory_info()
+        except Exception as e:
+            torch.cuda.empty_cache()
+            return None, f"❌ Generation failed: {str(e)}", self.get_memory_info()
+# Initialize app
+app = CosmosZeroGPUApp()
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Cosmos-Predict2 ZeroGPU", theme=gr.themes.Soft()) as interface:
+        gr.Markdown("""
+        # 🌌 Cosmos-Predict2 on ZeroGPU
+        **Powered by NVIDIA H200 with 70GB VRAM • High-resolution generation • Fast inference**
+        This Space uses ZeroGPU for efficient GPU allocation. The GPU is assigned when you load the model or generate images.
+        """)
+        # Memory status
+        memory_display = gr.Textbox(
+            label="📊 GPU Status",
+            value=app.get_memory_info(),
+            interactive=False
+        )
+        with gr.Row():
+            with gr.Column():
+                # Model management
+                gr.Markdown("### 🎮 Model Management")
+                with gr.Row():
+                    load_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
+                    unload_btn = gr.Button("🗑️ Unload", variant="secondary")
+                model_status = gr.Textbox(label="Model Status", interactive=False)
+                # Generation settings
+                gr.Markdown("### 🎨 Generation Settings")
+                prompt = gr.Textbox(
+                    label="Prompt",
+                    placeholder="A futuristic robot in a high-tech laboratory with holographic displays...",
+                    lines=3
                 )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt (Optional)",
+                    placeholder="blurry, low quality, distorted, ugly, deformed...",
+                    lines=2
                 )
+                with gr.Row():
+                    steps = gr.Slider(10, 50, value=25, step=5, label="Inference Steps")
+                    guidance = gr.Slider(1, 15, value=7.5, step=0.5, label="Guidance Scale")
+                with gr.Row():
+                    width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
+                    height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
+                seed = gr.Number(label="Seed (-1 = random)", value=-1, precision=0)
+                generate_btn = gr.Button("🎨 Generate Image", variant="primary", size="lg")
+            with gr.Column():
+                # Output
+                output_image = gr.Image(label="Generated Image", height=600)
+                generation_status = gr.Textbox(label="Generation Status", interactive=False)
+                # ZeroGPU info
+                gr.Markdown("""
+                ### 💡 ZeroGPU Features:
+                - **70GB VRAM**: Generate high-resolution images up to 2048x2048
+                - **Dynamic allocation**: GPU assigned only when needed
+                - **H200 powered**: Latest NVIDIA architecture for fast inference
+                - **Free to use**: Available to all users (PRO users get higher priority)
+                - **Auto-optimization**: Model compilation and memory efficiency
+                """)
+        # Event handlers
+        load_btn.click(
+            app.load_model,
+            outputs=[model_status, memory_display]
+        )
+        unload_btn.click(
+            app.unload_model,
+            outputs=[model_status, memory_display]
+        )
+        generate_btn.click(
+            app.generate_image,
+            inputs=[prompt, negative_prompt, steps, guidance, seed, width, height],
+            outputs=[output_image, generation_status, memory_display]
+        )
+        # Auto-refresh memory status
+        def refresh_memory():
+            return app.get_memory_info()
+        # Update memory display every 10 seconds
+        gr.Timer(value=10).tick(refresh_memory, outputs=[memory_display])
+        # Examples optimized for high-resolution
+        gr.Examples(
+            examples=[
+                ["A detailed cyberpunk cityscape at night with neon signs, flying cars, and holographic advertisements, highly detailed, 8k resolution"],
+                ["A majestic dragon soaring through storm clouds with lightning, fantasy art, dramatic lighting, ultra detailed"],
+                ["A futuristic space station orbiting Earth, with solar panels and docking bays, sci-fi concept art, cinematic"],
+                ["A serene Japanese garden with cherry blossoms, koi pond, and traditional architecture, peaceful atmosphere, masterpiece"],
+                ["A steampunk mechanical owl with brass gears and copper pipes, intricate details, vintage engineering"],
+                ["An underwater city with bioluminescent coral and glass domes, marine life swimming around, fantasy architecture"]
+            ],
+            inputs=[prompt],
+            label="🎨 Example Prompts (optimized for high-resolution generation)"
+        )
+        # Usage tips
+        gr.Markdown("""
+        ### 🚀 Usage Tips:
+        1. **First time**: Click "Load Model" to download and initialize Cosmos-Predict2
+        2. **High-res**: Try resolutions up to 2048x2048 with the powerful H200 GPU
+        3. **Quality**: Use 25-30 steps for high quality, 15-20 for faster generation
+        4. **Prompts**: Be descriptive and specific for best results
+        5. **Negative prompts**: Help avoid unwanted elements in your images
+        """)
+    return interface
 if __name__ == "__main__":
+    print("🚀 Starting Cosmos-Predict2 ZeroGPU Space...")
+    interface = create_interface()
+    interface.launch()