TheAwakenOne commited on
Commit
79b4b89
·
verified ·
1 Parent(s): e98097c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -197
app.py CHANGED
@@ -3,225 +3,172 @@
3
  Cosmos-Predict2 for Hugging Face Spaces ZeroGPU
4
  """
5
 
 
6
  import os
 
 
 
 
 
 
 
 
7
  import gradio as gr
8
- import torch
9
  import spaces
10
- from diffusers import DiffusionPipeline
 
 
 
11
  import gc
12
- from typing import Optional
13
  import warnings
14
 
15
  # Suppress warnings for cleaner output
16
  warnings.filterwarnings("ignore", category=UserWarning)
17
  warnings.filterwarnings("ignore", category=FutureWarning)
18
 
19
- class CosmosZeroGPUApp:
20
- def __init__(self):
21
- self.pipe = None
22
- self.model_loaded = False
23
- print("🌌 Cosmos-Predict2 ZeroGPU App Starting...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def get_memory_info(self):
26
- """Get current memory usage - simplified for ZeroGPU"""
27
- if torch.cuda.is_available():
28
- vram_used = torch.cuda.memory_allocated(0) / 1024**3
29
- return f"GPU Memory Used: {vram_used:.1f}GB (H200 - 70GB Available)"
30
  else:
31
- return "GPU: Not allocated (ZeroGPU will assign when needed)"
32
-
33
- @spaces.GPU(duration=300) # 5 minutes for model loading
34
- def load_model(self, progress=gr.Progress()):
35
- """Load model with ZeroGPU"""
36
- if self.model_loaded:
37
- return "✅ Model already loaded!", self.get_memory_info()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- try:
40
- progress(0.1, desc="🔄 Initializing ZeroGPU...")
41
-
42
- # ZeroGPU automatically handles device allocation
43
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44
- print(f"🎮 Using device: {device}")
45
-
46
- progress(0.3, desc="📥 Loading Cosmos-Predict2 model...")
47
-
48
- model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
49
-
50
- # Load model - much simpler with 70GB VRAM!
51
- self.pipe = DiffusionPipeline.from_pretrained(
52
- model_id,
53
- torch_dtype=torch.bfloat16, # Use bfloat16 for better performance
54
- device_map="auto",
55
- use_safetensors=True,
56
- trust_remote_code=True
57
  )
58
-
59
- progress(0.7, desc="⚡ Optimizing for H200...")
60
-
61
- # Move to GPU
62
- if torch.cuda.is_available():
63
- self.pipe = self.pipe.to(device)
64
-
65
- # Enable optimizations (optional with 70GB VRAM, but still good for speed)
66
- try:
67
- self.pipe.enable_attention_slicing()
68
- print("✅ Attention slicing enabled")
69
- except:
70
- pass
71
-
72
- try:
73
- self.pipe.enable_xformers_memory_efficient_attention()
74
- print("✅ xformers enabled")
75
- except:
76
- print("📝 xformers not available (optional)")
77
-
78
- # Compile model for faster inference (optional)
79
- try:
80
- if hasattr(self.pipe, 'unet'):
81
- self.pipe.unet = torch.compile(self.pipe.unet, mode="reduce-overhead", fullgraph=True)
82
- print("✅ Model compiled for faster inference")
83
- except:
84
- print("📝 Model compilation not available (optional)")
85
-
86
- progress(0.9, desc="🏁 Finalizing...")
87
-
88
- self.model_loaded = True
89
- torch.cuda.empty_cache()
90
-
91
- progress(1.0, desc="✅ Ready!")
92
- return "✅ Model loaded successfully on ZeroGPU H200!", self.get_memory_info()
93
-
94
- except Exception as e:
95
- self.model_loaded = False
96
- error_msg = str(e)
97
- if "401" in error_msg or "restricted" in error_msg:
98
- return "❌ Access denied. Please ensure the model is publicly accessible.", self.get_memory_info()
99
- return f"❌ Error loading model: {error_msg}", self.get_memory_info()
100
-
101
- def unload_model(self):
102
- """Unload model"""
103
- if self.pipe is not None:
104
- del self.pipe
105
- self.pipe = None
106
 
107
- self.model_loaded = False
 
 
 
 
 
 
 
 
 
108
  torch.cuda.empty_cache()
109
- gc.collect()
110
 
111
- return "✅ Model unloaded!", self.get_memory_info()
112
-
113
- @spaces.GPU(duration=120) # 2 minutes for generation
114
- def generate_image(self, prompt, negative_prompt="", num_steps=25, guidance_scale=7.5,
115
- seed=-1, width=1024, height=1024, progress=gr.Progress()):
116
- """Generate image with ZeroGPU H200"""
117
- if not self.model_loaded or self.pipe is None:
118
- return None, "❌ Please load the model first!", self.get_memory_info()
119
 
120
- try:
121
- progress(0.1, desc="🎨 Preparing generation...")
122
-
123
- # With 70GB VRAM, we can use much larger resolutions!
124
- max_pixels = 2048 * 2048 # 4MP max for reasonable generation times
125
- current_pixels = width * height
126
-
127
- if current_pixels > max_pixels:
128
- # Scale down proportionally
129
- scale = (max_pixels / current_pixels) ** 0.5
130
- width = int(width * scale)
131
- height = int(height * scale)
132
- # Round to nearest 64 for compatibility
133
- width = (width // 64) * 64
134
- height = (height // 64) * 64
135
- size_msg = f"📉 Scaled to {width}x{height} for optimal performance"
136
- else:
137
- size_msg = f"📈 Generating at {width}x{height}"
138
-
139
- # Set seed for reproducibility
140
- generator = None
141
- if seed != -1:
142
- generator = torch.Generator(device="cuda").manual_seed(seed)
143
-
144
- progress(0.3, desc=f"🎨 Generating {width}x{height} image...")
145
-
146
- print(f"🎨 Generating: {width}x{height}, {num_steps} steps, guidance: {guidance_scale}")
147
-
148
- # Generate with the powerful H200!
149
- with torch.inference_mode():
150
- result = self.pipe(
151
- prompt=prompt,
152
- negative_prompt=negative_prompt if negative_prompt else None,
153
- num_inference_steps=num_steps,
154
- guidance_scale=guidance_scale,
155
- height=height,
156
- width=width,
157
- generator=generator,
158
- output_type="pil"
159
- )
160
-
161
- progress(0.9, desc="🏁 Finalizing...")
162
-
163
- # Extract image
164
- if hasattr(result, 'images'):
165
- image = result.images[0]
166
- elif isinstance(result, list):
167
- image = result[0]
168
- else:
169
- image = result
170
-
171
- # Cleanup
172
- del result
173
- torch.cuda.empty_cache()
174
-
175
- progress(1.0, desc="✅ Complete!")
176
- return image, f"✅ Generated successfully! {size_msg}", self.get_memory_info()
177
-
178
- except Exception as e:
179
- torch.cuda.empty_cache()
180
- return None, f"❌ Generation failed: {str(e)}", self.get_memory_info()
181
-
182
- # Initialize app
183
- app = CosmosZeroGPUApp()
184
 
185
  # Create Gradio interface
186
  def create_interface():
187
  with gr.Blocks(title="Cosmos-Predict2 ZeroGPU", theme=gr.themes.Soft()) as interface:
188
  gr.Markdown("""
189
  # 🌌 Cosmos-Predict2 on ZeroGPU
190
- **Powered by ZeroGPU • High-resolution generation • Fast inference**
191
 
192
- This Space uses ZeroGPU for efficient GPU allocation. The GPU is assigned when you load the model or generate images.
193
  """)
194
 
195
  # Memory status
196
  memory_display = gr.Textbox(
197
  label="📊 GPU Status",
198
- value=app.get_memory_info(),
199
  interactive=False
200
  )
201
 
202
  with gr.Row():
203
  with gr.Column():
204
- # Model management
205
- gr.Markdown("### 🎮 Model Management")
206
-
207
- with gr.Row():
208
- load_btn = gr.Button("🔄 Load Model", variant="primary", size="lg")
209
- unload_btn = gr.Button("🗑️ Unload", variant="secondary")
210
-
211
- model_status = gr.Textbox(label="Model Status", interactive=False)
212
-
213
  # Generation settings
214
- gr.Markdown("### 🎨 Generation Settings")
215
 
216
  prompt = gr.Textbox(
217
  label="Prompt",
218
  placeholder="A futuristic robot in a high-tech laboratory with holographic displays...",
219
- lines=3
 
220
  )
221
 
222
  negative_prompt = gr.Textbox(
223
- label="Negative Prompt (Optional)",
224
- placeholder="blurry, low quality, distorted, ugly, deformed...",
225
  lines=2
226
  )
227
 
@@ -233,7 +180,9 @@ def create_interface():
233
  width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
234
  height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
235
 
236
- seed = gr.Number(label="Seed (-1 = random)", value=-1, precision=0)
 
 
237
 
238
  generate_btn = gr.Button("🎨 Generate Image", variant="primary", size="lg")
239
 
@@ -241,37 +190,28 @@ def create_interface():
241
  # Output
242
  output_image = gr.Image(label="Generated Image", height=600)
243
  generation_status = gr.Textbox(label="Generation Status", interactive=False)
 
244
 
245
  # ZeroGPU info
246
  gr.Markdown("""
247
  ### 💡 ZeroGPU Features:
248
  - **70GB VRAM**: Generate high-resolution images up to 2048x2048
249
- - **Dynamic allocation**: GPU assigned only when needed
250
  - **H200 powered**: Latest NVIDIA architecture for fast inference
251
- - **Free to use**: Available to all users (PRO users get higher priority)
252
- - **Auto-optimization**: Model compilation and memory efficiency
253
  """)
254
 
255
  # Event handlers
256
- load_btn.click(
257
- app.load_model,
258
- outputs=[model_status, memory_display]
259
- )
260
-
261
- unload_btn.click(
262
- app.unload_model,
263
- outputs=[model_status, memory_display]
264
- )
265
-
266
  generate_btn.click(
267
- app.generate_image,
268
- inputs=[prompt, negative_prompt, steps, guidance, seed, width, height],
269
- outputs=[output_image, generation_status, memory_display]
270
  )
271
 
272
  # Auto-refresh memory status
273
  def refresh_memory():
274
- return app.get_memory_info()
275
 
276
  # Update memory display every 10 seconds
277
  gr.Timer(value=10).tick(refresh_memory, outputs=[memory_display])
@@ -284,7 +224,9 @@ def create_interface():
284
  ["A futuristic space station orbiting Earth, with solar panels and docking bays, sci-fi concept art, cinematic"],
285
  ["A serene Japanese garden with cherry blossoms, koi pond, and traditional architecture, peaceful atmosphere, masterpiece"],
286
  ["A steampunk mechanical owl with brass gears and copper pipes, intricate details, vintage engineering"],
287
- ["An underwater city with bioluminescent coral and glass domes, marine life swimming around, fantasy architecture"]
 
 
288
  ],
289
  inputs=[prompt],
290
  label="🎨 Example Prompts (optimized for high-resolution generation)"
@@ -293,11 +235,12 @@ def create_interface():
293
  # Usage tips
294
  gr.Markdown("""
295
  ### 🚀 Usage Tips:
296
- 1. **First time**: Click "Load Model" to download and initialize Cosmos-Predict2
297
  2. **High-res**: Try resolutions up to 2048x2048 with the powerful H200 GPU
298
  3. **Quality**: Use 25-30 steps for high quality, 15-20 for faster generation
299
  4. **Prompts**: Be descriptive and specific for best results
300
- 5. **Negative prompts**: Help avoid unwanted elements in your images
 
301
  """)
302
 
303
  return interface
 
3
  Cosmos-Predict2 for Hugging Face Spaces ZeroGPU
4
  """
5
 
6
+ import subprocess
7
  import os
8
+
9
+ # Install flash-attn for better performance
10
+ subprocess.run(
11
+ "pip install flash-attn --no-build-isolation",
12
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
13
+ shell=True
14
+ )
15
+
16
  import gradio as gr
 
17
  import spaces
18
+ import torch
19
+ from diffusers import Cosmos2TextToImagePipeline
20
+ from transformers import AutoModelForCausalLM, SiglipProcessor
21
+ import random
22
  import gc
 
23
  import warnings
24
 
25
  # Suppress warnings for cleaner output
26
  warnings.filterwarnings("ignore", category=UserWarning)
27
  warnings.filterwarnings("ignore", category=FutureWarning)
28
 
29
+ # Add flash_attention_2 to the safeguard model for better performance
30
+ def patch_from_pretrained(cls):
31
+ orig_method = cls.from_pretrained
32
+ def new_from_pretrained(*args, **kwargs):
33
+ kwargs.setdefault("attn_implementation", "flash_attention_2")
34
+ kwargs.setdefault("torch_dtype", torch.bfloat16)
35
+ return orig_method(*args, **kwargs)
36
+ cls.from_pretrained = new_from_pretrained
37
+
38
+ patch_from_pretrained(AutoModelForCausalLM)
39
+
40
+ # Add a `use_fast` to the safeguard image processor
41
+ def patch_processor_fast(cls):
42
+ orig_method = cls.from_pretrained
43
+ def new_from_pretrained(*args, **kwargs):
44
+ kwargs.setdefault("use_fast", True)
45
+ return orig_method(*args, **kwargs)
46
+ cls.from_pretrained = new_from_pretrained
47
+
48
+ patch_processor_fast(SiglipProcessor)
49
+
50
+ print("🌌 Loading Cosmos-Predict2 model...")
51
+
52
+ # Load the model at startup
53
+ model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
54
+ pipe = Cosmos2TextToImagePipeline.from_pretrained(
55
+ model_id,
56
+ torch_dtype=torch.bfloat16
57
+ )
58
+ pipe.to("cuda")
59
+
60
+ print("✅ Cosmos-Predict2 model loaded successfully!")
61
+
62
+ # Default negative prompt for better quality
63
+ DEFAULT_NEGATIVE_PROMPT = "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality."
64
+
65
+ def get_memory_info():
66
+ """Get current memory usage"""
67
+ if torch.cuda.is_available():
68
+ vram_used = torch.cuda.memory_allocated(0) / 1024**3
69
+ return f"GPU Memory Used: {vram_used:.1f}GB (H200 - 70GB Available)"
70
+ else:
71
+ return "GPU: Not allocated (ZeroGPU will assign when needed)"
72
+
73
+ @spaces.GPU(duration=120) # 2 minutes for generation
74
+ def generate_image(prompt, negative_prompt="", num_steps=25, guidance_scale=7.5,
75
+ seed=-1, width=1024, height=1024, randomize_seed=True,
76
+ progress=gr.Progress(track_tqdm=True)):
77
+ """Generate image with ZeroGPU H200"""
78
 
79
+ try:
80
+ # Handle seed
81
+ if randomize_seed or seed == -1:
82
+ actual_seed = random.randint(0, 1000000)
 
83
  else:
84
+ actual_seed = seed
85
+
86
+ generator = torch.Generator().manual_seed(actual_seed)
87
+
88
+ # Use default negative prompt if none provided
89
+ if not negative_prompt.strip():
90
+ negative_prompt = DEFAULT_NEGATIVE_PROMPT
91
+
92
+ # With 70GB VRAM, we can use much larger resolutions!
93
+ max_pixels = 2048 * 2048 # 4MP max for reasonable generation times
94
+ current_pixels = width * height
95
+
96
+ if current_pixels > max_pixels:
97
+ # Scale down proportionally
98
+ scale = (max_pixels / current_pixels) ** 0.5
99
+ width = int(width * scale)
100
+ height = int(height * scale)
101
+ # Round to nearest 64 for compatibility
102
+ width = (width // 64) * 64
103
+ height = (height // 64) * 64
104
+ size_msg = f"📉 Scaled to {width}x{height} for optimal performance"
105
+ else:
106
+ size_msg = f"📈 Generating at {width}x{height}"
107
+
108
+ print(f"🎨 Generating: {width}x{height}, {num_steps} steps, guidance: {guidance_scale}, seed: {actual_seed}")
109
 
110
+ # Generate with the powerful H200!
111
+ with torch.inference_mode():
112
+ result = pipe(
113
+ prompt=prompt,
114
+ negative_prompt=negative_prompt,
115
+ num_inference_steps=num_steps,
116
+ guidance_scale=guidance_scale,
117
+ height=height,
118
+ width=width,
119
+ generator=generator
 
 
 
 
 
 
 
 
120
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ # Extract image
123
+ if hasattr(result, 'images'):
124
+ image = result.images[0]
125
+ elif isinstance(result, list):
126
+ image = result[0]
127
+ else:
128
+ image = result
129
+
130
+ # Cleanup
131
+ del result
132
  torch.cuda.empty_cache()
 
133
 
134
+ return image, f"✅ Generated successfully! {size_msg} (Seed: {actual_seed})", get_memory_info(), actual_seed
 
 
 
 
 
 
 
135
 
136
+ except Exception as e:
137
+ torch.cuda.empty_cache()
138
+ return None, f"❌ Generation failed: {str(e)}", get_memory_info(), seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # Create Gradio interface
141
  def create_interface():
142
  with gr.Blocks(title="Cosmos-Predict2 ZeroGPU", theme=gr.themes.Soft()) as interface:
143
  gr.Markdown("""
144
  # 🌌 Cosmos-Predict2 on ZeroGPU
145
+ **Powered by Huggingface Spaces • High-resolution generation • Fast inference**
146
 
147
+ This Space uses ZeroGPU for efficient GPU allocation. The model is pre-loaded and ready to generate!
148
  """)
149
 
150
  # Memory status
151
  memory_display = gr.Textbox(
152
  label="📊 GPU Status",
153
+ value=get_memory_info(),
154
  interactive=False
155
  )
156
 
157
  with gr.Row():
158
  with gr.Column():
 
 
 
 
 
 
 
 
 
159
  # Generation settings
160
+ gr.Markdown("### 🎨 Generate High-Quality Images")
161
 
162
  prompt = gr.Textbox(
163
  label="Prompt",
164
  placeholder="A futuristic robot in a high-tech laboratory with holographic displays...",
165
+ lines=4,
166
+ value="A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface."
167
  )
168
 
169
  negative_prompt = gr.Textbox(
170
+ label="Negative Prompt (Optional - has smart default)",
171
+ placeholder="Leave empty to use optimized default negative prompt...",
172
  lines=2
173
  )
174
 
 
180
  width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
181
  height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
182
 
183
+ with gr.Row():
184
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
185
+ seed = gr.Number(label="Seed", value=42, precision=0)
186
 
187
  generate_btn = gr.Button("🎨 Generate Image", variant="primary", size="lg")
188
 
 
190
  # Output
191
  output_image = gr.Image(label="Generated Image", height=600)
192
  generation_status = gr.Textbox(label="Generation Status", interactive=False)
193
+ seed_output = gr.Number(label="Used Seed", interactive=False)
194
 
195
  # ZeroGPU info
196
  gr.Markdown("""
197
  ### 💡 ZeroGPU Features:
198
  - **70GB VRAM**: Generate high-resolution images up to 2048x2048
199
+ - **Pre-loaded Model**: No waiting for model loading
200
  - **H200 powered**: Latest NVIDIA architecture for fast inference
201
+ - **Smart defaults**: Optimized negative prompt included
202
+ - **Flash Attention**: Enhanced performance optimizations
203
  """)
204
 
205
  # Event handlers
 
 
 
 
 
 
 
 
 
 
206
  generate_btn.click(
207
+ generate_image,
208
+ inputs=[prompt, negative_prompt, steps, guidance, seed, width, height, randomize_seed],
209
+ outputs=[output_image, generation_status, memory_display, seed_output]
210
  )
211
 
212
  # Auto-refresh memory status
213
  def refresh_memory():
214
+ return get_memory_info()
215
 
216
  # Update memory display every 10 seconds
217
  gr.Timer(value=10).tick(refresh_memory, outputs=[memory_display])
 
224
  ["A futuristic space station orbiting Earth, with solar panels and docking bays, sci-fi concept art, cinematic"],
225
  ["A serene Japanese garden with cherry blossoms, koi pond, and traditional architecture, peaceful atmosphere, masterpiece"],
226
  ["A steampunk mechanical owl with brass gears and copper pipes, intricate details, vintage engineering"],
227
+ ["A well-worn broom sweeps across a dusty wooden floor, its bristles gathering crumbs and flecks of debris in swift, rhythmic strokes"],
228
+ ["A robotic arm tightens a bolt beneath the hood of a car, its tool head rotating with practiced torque, precision engineering"],
229
+ ["A nighttime city bus terminal gradually shifts from stillness to subtle movement, urban night scene with illuminated signage"]
230
  ],
231
  inputs=[prompt],
232
  label="🎨 Example Prompts (optimized for high-resolution generation)"
 
235
  # Usage tips
236
  gr.Markdown("""
237
  ### 🚀 Usage Tips:
238
+ 1. **Ready to go**: Model is pre-loaded, just click generate!
239
  2. **High-res**: Try resolutions up to 2048x2048 with the powerful H200 GPU
240
  3. **Quality**: Use 25-30 steps for high quality, 15-20 for faster generation
241
  4. **Prompts**: Be descriptive and specific for best results
242
+ 5. **Negative prompts**: Leave empty to use optimized defaults, or customize as needed
243
+ 6. **Seeds**: Use randomize for variety, or set specific seed for reproducible results
244
  """)
245
 
246
  return interface