TheAwakenOne commited on
Commit
5a10d46
ยท
verified ยท
1 Parent(s): efc1566

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +298 -142
app.py CHANGED
@@ -1,154 +1,310 @@
1
- import gradio as gr
2
- import numpy as np
3
- import random
4
-
5
- # import spaces #[uncomment to use ZeroGPU]
6
- from diffusers import DiffusionPipeline
7
- import torch
8
-
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
-
12
- if torch.cuda.is_available():
13
- torch_dtype = torch.float16
14
- else:
15
- torch_dtype = torch.float32
16
-
17
- pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
- pipe = pipe.to(device)
19
-
20
- MAX_SEED = np.iinfo(np.int32).max
21
- MAX_IMAGE_SIZE = 1024
22
-
23
-
24
- # @spaces.GPU #[uncomment to use ZeroGPU]
25
- def infer(
26
- prompt,
27
- negative_prompt,
28
- seed,
29
- randomize_seed,
30
- width,
31
- height,
32
- guidance_scale,
33
- num_inference_steps,
34
- progress=gr.Progress(track_tqdm=True),
35
- ):
36
- if randomize_seed:
37
- seed = random.randint(0, MAX_SEED)
38
-
39
- generator = torch.Generator().manual_seed(seed)
40
-
41
- image = pipe(
42
- prompt=prompt,
43
- negative_prompt=negative_prompt,
44
- guidance_scale=guidance_scale,
45
- num_inference_steps=num_inference_steps,
46
- width=width,
47
- height=height,
48
- generator=generator,
49
- ).images[0]
50
-
51
- return image, seed
52
-
53
-
54
- examples = [
55
- "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
56
- "An astronaut riding a green horse",
57
- "A delicious ceviche cheesecake slice",
58
- ]
59
-
60
- css = """
61
- #col-container {
62
- margin: 0 auto;
63
- max-width: 640px;
64
- }
65
  """
66
 
67
- with gr.Blocks(css=css) as demo:
68
- with gr.Column(elem_id="col-container"):
69
- gr.Markdown(" # Text-to-Image Gradio Template")
70
-
71
- with gr.Row():
72
- prompt = gr.Text(
73
- label="Prompt",
74
- show_label=False,
75
- max_lines=1,
76
- placeholder="Enter your prompt",
77
- container=False,
78
- )
79
-
80
- run_button = gr.Button("Run", scale=0, variant="primary")
81
-
82
- result = gr.Image(label="Result", show_label=False)
83
 
84
- with gr.Accordion("Advanced Settings", open=False):
85
- negative_prompt = gr.Text(
86
- label="Negative prompt",
87
- max_lines=1,
88
- placeholder="Enter a negative prompt",
89
- visible=False,
90
- )
91
 
92
- seed = gr.Slider(
93
- label="Seed",
94
- minimum=0,
95
- maximum=MAX_SEED,
96
- step=1,
97
- value=0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
-
100
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
101
-
102
- with gr.Row():
103
- width = gr.Slider(
104
- label="Width",
105
- minimum=256,
106
- maximum=MAX_IMAGE_SIZE,
107
- step=32,
108
- value=1024, # Replace with defaults that work for your model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- height = gr.Slider(
112
- label="Height",
113
- minimum=256,
114
- maximum=MAX_IMAGE_SIZE,
115
- step=32,
116
- value=1024, # Replace with defaults that work for your model
117
- )
118
 
119
- with gr.Row():
120
- guidance_scale = gr.Slider(
121
- label="Guidance scale",
122
- minimum=0.0,
123
- maximum=10.0,
124
- step=0.1,
125
- value=0.0, # Replace with defaults that work for your model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
127
-
128
- num_inference_steps = gr.Slider(
129
- label="Number of inference steps",
130
- minimum=1,
131
- maximum=50,
132
- step=1,
133
- value=2, # Replace with defaults that work for your model
134
  )
135
-
136
- gr.Examples(examples=examples, inputs=[prompt])
137
- gr.on(
138
- triggers=[run_button.click, prompt.submit],
139
- fn=infer,
140
- inputs=[
141
- prompt,
142
- negative_prompt,
143
- seed,
144
- randomize_seed,
145
- width,
146
- height,
147
- guidance_scale,
148
- num_inference_steps,
149
- ],
150
- outputs=[result, seed],
151
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  if __name__ == "__main__":
154
- demo.launch()
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Cosmos-Predict2 for Hugging Face Spaces ZeroGPU
4
+ Optimized for H200 with 70GB VRAM - much simpler than RTX 5080 version!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
7
+ import os
8
+ import gradio as gr
9
+ import torch
10
+ import spaces
11
+ from diffusers import DiffusionPipeline
12
+ import gc
13
+ from typing import Optional
14
+ import warnings
 
 
 
 
 
 
 
 
15
 
16
+ # Suppress warnings for cleaner output
17
+ warnings.filterwarnings("ignore", category=UserWarning)
18
+ warnings.filterwarnings("ignore", category=FutureWarning)
 
 
 
 
19
 
20
+ class CosmosZeroGPUApp:
21
+ def __init__(self):
22
+ self.pipe = None
23
+ self.model_loaded = False
24
+ print("๐ŸŒŒ Cosmos-Predict2 ZeroGPU App Starting...")
25
+
26
+ def get_memory_info(self):
27
+ """Get current memory usage - simplified for ZeroGPU"""
28
+ if torch.cuda.is_available():
29
+ vram_used = torch.cuda.memory_allocated(0) / 1024**3
30
+ return f"GPU Memory Used: {vram_used:.1f}GB (H200 - 70GB Available)"
31
+ else:
32
+ return "GPU: Not allocated (ZeroGPU will assign when needed)"
33
+
34
+ @spaces.GPU(duration=300) # 5 minutes for model loading
35
+ def load_model(self, progress=gr.Progress()):
36
+ """Load model with ZeroGPU"""
37
+ if self.model_loaded:
38
+ return "โœ… Model already loaded!", self.get_memory_info()
39
+
40
+ try:
41
+ progress(0.1, desc="๐Ÿ”„ Initializing ZeroGPU...")
42
+
43
+ # ZeroGPU automatically handles device allocation
44
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
45
+ print(f"๐ŸŽฎ Using device: {device}")
46
+
47
+ progress(0.3, desc="๐Ÿ“ฅ Loading Cosmos-Predict2 model...")
48
+
49
+ model_id = "nvidia/Cosmos-Predict2-2B-Text2Image"
50
+
51
+ # Load model - much simpler with 70GB VRAM!
52
+ self.pipe = DiffusionPipeline.from_pretrained(
53
+ model_id,
54
+ torch_dtype=torch.bfloat16, # Use bfloat16 for better performance
55
+ device_map="auto",
56
+ use_safetensors=True,
57
+ trust_remote_code=True
58
  )
59
+
60
+ progress(0.7, desc="โšก Optimizing for H200...")
61
+
62
+ # Move to GPU
63
+ if torch.cuda.is_available():
64
+ self.pipe = self.pipe.to(device)
65
+
66
+ # Enable optimizations (optional with 70GB VRAM, but still good for speed)
67
+ try:
68
+ self.pipe.enable_attention_slicing()
69
+ print("โœ… Attention slicing enabled")
70
+ except:
71
+ pass
72
+
73
+ try:
74
+ self.pipe.enable_xformers_memory_efficient_attention()
75
+ print("โœ… xformers enabled")
76
+ except:
77
+ print("๐Ÿ“ xformers not available (optional)")
78
+
79
+ # Compile model for faster inference (optional)
80
+ try:
81
+ if hasattr(self.pipe, 'unet'):
82
+ self.pipe.unet = torch.compile(self.pipe.unet, mode="reduce-overhead", fullgraph=True)
83
+ print("โœ… Model compiled for faster inference")
84
+ except:
85
+ print("๐Ÿ“ Model compilation not available (optional)")
86
+
87
+ progress(0.9, desc="๐Ÿ Finalizing...")
88
+
89
+ self.model_loaded = True
90
+ torch.cuda.empty_cache()
91
+
92
+ progress(1.0, desc="โœ… Ready!")
93
+ return "โœ… Model loaded successfully on ZeroGPU H200!", self.get_memory_info()
94
+
95
+ except Exception as e:
96
+ self.model_loaded = False
97
+ error_msg = str(e)
98
+ if "401" in error_msg or "restricted" in error_msg:
99
+ return "โŒ Access denied. Please ensure the model is publicly accessible.", self.get_memory_info()
100
+ return f"โŒ Error loading model: {error_msg}", self.get_memory_info()
101
+
102
+ def unload_model(self):
103
+ """Unload model"""
104
+ if self.pipe is not None:
105
+ del self.pipe
106
+ self.pipe = None
107
+
108
+ self.model_loaded = False
109
+ torch.cuda.empty_cache()
110
+ gc.collect()
111
+
112
+ return "โœ… Model unloaded!", self.get_memory_info()
113
+
114
+ @spaces.GPU(duration=120) # 2 minutes for generation
115
+ def generate_image(self, prompt, negative_prompt="", num_steps=25, guidance_scale=7.5,
116
+ seed=-1, width=1024, height=1024, progress=gr.Progress()):
117
+ """Generate image with ZeroGPU H200"""
118
+ if not self.model_loaded or self.pipe is None:
119
+ return None, "โŒ Please load the model first!", self.get_memory_info()
120
+
121
+ try:
122
+ progress(0.1, desc="๐ŸŽจ Preparing generation...")
123
+
124
+ # With 70GB VRAM, we can use much larger resolutions!
125
+ max_pixels = 2048 * 2048 # 4MP max for reasonable generation times
126
+ current_pixels = width * height
127
+
128
+ if current_pixels > max_pixels:
129
+ # Scale down proportionally
130
+ scale = (max_pixels / current_pixels) ** 0.5
131
+ width = int(width * scale)
132
+ height = int(height * scale)
133
+ # Round to nearest 64 for compatibility
134
+ width = (width // 64) * 64
135
+ height = (height // 64) * 64
136
+ size_msg = f"๐Ÿ“‰ Scaled to {width}x{height} for optimal performance"
137
+ else:
138
+ size_msg = f"๐Ÿ“ˆ Generating at {width}x{height}"
139
+
140
+ # Set seed for reproducibility
141
+ generator = None
142
+ if seed != -1:
143
+ generator = torch.Generator(device="cuda").manual_seed(seed)
144
+
145
+ progress(0.3, desc=f"๐ŸŽจ Generating {width}x{height} image...")
146
+
147
+ print(f"๐ŸŽจ Generating: {width}x{height}, {num_steps} steps, guidance: {guidance_scale}")
148
+
149
+ # Generate with the powerful H200!
150
+ with torch.inference_mode():
151
+ result = self.pipe(
152
+ prompt=prompt,
153
+ negative_prompt=negative_prompt if negative_prompt else None,
154
+ num_inference_steps=num_steps,
155
+ guidance_scale=guidance_scale,
156
+ height=height,
157
+ width=width,
158
+ generator=generator,
159
+ output_type="pil"
160
  )
161
+
162
+ progress(0.9, desc="๐Ÿ Finalizing...")
163
+
164
+ # Extract image
165
+ if hasattr(result, 'images'):
166
+ image = result.images[0]
167
+ elif isinstance(result, list):
168
+ image = result[0]
169
+ else:
170
+ image = result
171
+
172
+ # Cleanup
173
+ del result
174
+ torch.cuda.empty_cache()
175
+
176
+ progress(1.0, desc="โœ… Complete!")
177
+ return image, f"โœ… Generated successfully! {size_msg}", self.get_memory_info()
178
+
179
+ except Exception as e:
180
+ torch.cuda.empty_cache()
181
+ return None, f"โŒ Generation failed: {str(e)}", self.get_memory_info()
182
 
183
+ # Initialize app
184
+ app = CosmosZeroGPUApp()
 
 
 
 
 
185
 
186
+ # Create Gradio interface
187
+ def create_interface():
188
+ with gr.Blocks(title="Cosmos-Predict2 ZeroGPU", theme=gr.themes.Soft()) as interface:
189
+ gr.Markdown("""
190
+ # ๐ŸŒŒ Cosmos-Predict2 on ZeroGPU
191
+ **Powered by NVIDIA H200 with 70GB VRAM โ€ข High-resolution generation โ€ข Fast inference**
192
+
193
+ This Space uses ZeroGPU for efficient GPU allocation. The GPU is assigned when you load the model or generate images.
194
+ """)
195
+
196
+ # Memory status
197
+ memory_display = gr.Textbox(
198
+ label="๐Ÿ“Š GPU Status",
199
+ value=app.get_memory_info(),
200
+ interactive=False
201
+ )
202
+
203
+ with gr.Row():
204
+ with gr.Column():
205
+ # Model management
206
+ gr.Markdown("### ๐ŸŽฎ Model Management")
207
+
208
+ with gr.Row():
209
+ load_btn = gr.Button("๐Ÿ”„ Load Model", variant="primary", size="lg")
210
+ unload_btn = gr.Button("๐Ÿ—‘๏ธ Unload", variant="secondary")
211
+
212
+ model_status = gr.Textbox(label="Model Status", interactive=False)
213
+
214
+ # Generation settings
215
+ gr.Markdown("### ๐ŸŽจ Generation Settings")
216
+
217
+ prompt = gr.Textbox(
218
+ label="Prompt",
219
+ placeholder="A futuristic robot in a high-tech laboratory with holographic displays...",
220
+ lines=3
221
  )
222
+
223
+ negative_prompt = gr.Textbox(
224
+ label="Negative Prompt (Optional)",
225
+ placeholder="blurry, low quality, distorted, ugly, deformed...",
226
+ lines=2
 
 
227
  )
228
+
229
+ with gr.Row():
230
+ steps = gr.Slider(10, 50, value=25, step=5, label="Inference Steps")
231
+ guidance = gr.Slider(1, 15, value=7.5, step=0.5, label="Guidance Scale")
232
+
233
+ with gr.Row():
234
+ width = gr.Slider(512, 2048, value=1024, step=64, label="Width")
235
+ height = gr.Slider(512, 2048, value=1024, step=64, label="Height")
236
+
237
+ seed = gr.Number(label="Seed (-1 = random)", value=-1, precision=0)
238
+
239
+ generate_btn = gr.Button("๐ŸŽจ Generate Image", variant="primary", size="lg")
240
+
241
+ with gr.Column():
242
+ # Output
243
+ output_image = gr.Image(label="Generated Image", height=600)
244
+ generation_status = gr.Textbox(label="Generation Status", interactive=False)
245
+
246
+ # ZeroGPU info
247
+ gr.Markdown("""
248
+ ### ๐Ÿ’ก ZeroGPU Features:
249
+ - **70GB VRAM**: Generate high-resolution images up to 2048x2048
250
+ - **Dynamic allocation**: GPU assigned only when needed
251
+ - **H200 powered**: Latest NVIDIA architecture for fast inference
252
+ - **Free to use**: Available to all users (PRO users get higher priority)
253
+ - **Auto-optimization**: Model compilation and memory efficiency
254
+ """)
255
+
256
+ # Event handlers
257
+ load_btn.click(
258
+ app.load_model,
259
+ outputs=[model_status, memory_display]
260
+ )
261
+
262
+ unload_btn.click(
263
+ app.unload_model,
264
+ outputs=[model_status, memory_display]
265
+ )
266
+
267
+ generate_btn.click(
268
+ app.generate_image,
269
+ inputs=[prompt, negative_prompt, steps, guidance, seed, width, height],
270
+ outputs=[output_image, generation_status, memory_display]
271
+ )
272
+
273
+ # Auto-refresh memory status
274
+ def refresh_memory():
275
+ return app.get_memory_info()
276
+
277
+ # Update memory display every 10 seconds
278
+ gr.Timer(value=10).tick(refresh_memory, outputs=[memory_display])
279
+
280
+ # Examples optimized for high-resolution
281
+ gr.Examples(
282
+ examples=[
283
+ ["A detailed cyberpunk cityscape at night with neon signs, flying cars, and holographic advertisements, highly detailed, 8k resolution"],
284
+ ["A majestic dragon soaring through storm clouds with lightning, fantasy art, dramatic lighting, ultra detailed"],
285
+ ["A futuristic space station orbiting Earth, with solar panels and docking bays, sci-fi concept art, cinematic"],
286
+ ["A serene Japanese garden with cherry blossoms, koi pond, and traditional architecture, peaceful atmosphere, masterpiece"],
287
+ ["A steampunk mechanical owl with brass gears and copper pipes, intricate details, vintage engineering"],
288
+ ["An underwater city with bioluminescent coral and glass domes, marine life swimming around, fantasy architecture"]
289
+ ],
290
+ inputs=[prompt],
291
+ label="๐ŸŽจ Example Prompts (optimized for high-resolution generation)"
292
+ )
293
+
294
+ # Usage tips
295
+ gr.Markdown("""
296
+ ### ๐Ÿš€ Usage Tips:
297
+ 1. **First time**: Click "Load Model" to download and initialize Cosmos-Predict2
298
+ 2. **High-res**: Try resolutions up to 2048x2048 with the powerful H200 GPU
299
+ 3. **Quality**: Use 25-30 steps for high quality, 15-20 for faster generation
300
+ 4. **Prompts**: Be descriptive and specific for best results
301
+ 5. **Negative prompts**: Help avoid unwanted elements in your images
302
+ """)
303
+
304
+ return interface
305
 
306
  if __name__ == "__main__":
307
+ print("๐Ÿš€ Starting Cosmos-Predict2 ZeroGPU Space...")
308
+
309
+ interface = create_interface()
310
+ interface.launch()