ozilion commited on
Commit
05424ef
Β·
verified Β·
1 Parent(s): 7a8e438

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +292 -349
app.py CHANGED
@@ -6,124 +6,87 @@ import numpy as np
6
  import tempfile
7
  from typing import Optional, Tuple
8
  import time
9
- import subprocess
10
- import sys
11
 
12
- # ZeroGPU with H200
13
  try:
14
  import spaces
15
  SPACES_AVAILABLE = True
16
- print("βœ… Spaces library loaded - H200 ready!")
17
  except ImportError:
18
  SPACES_AVAILABLE = False
19
  class spaces:
20
  @staticmethod
21
- def GPU(duration=300):
22
  def decorator(func): return func
23
  return decorator
24
 
25
- # Environment check
26
  IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true"
27
  IS_SPACES = os.environ.get("SPACE_ID") is not None
28
  HAS_CUDA = torch.cuda.is_available()
29
 
30
- print(f"πŸš€ Environment: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
31
 
32
- def install_missing_packages():
33
- """Install any missing packages"""
34
- try:
35
- print("πŸ”„ Checking and installing packages...")
36
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "diffusers>=0.31.0"])
37
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "transformers>=4.36.0"])
38
- subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "accelerate"])
39
- print("βœ… Packages updated successfully")
40
- return True
41
- except Exception as e:
42
- print(f"❌ Package installation failed: {e}")
43
- return False
44
-
45
- def check_available_pipelines():
46
- """Check what pipelines are actually available"""
47
- available = {}
48
-
49
- try:
50
- from diffusers import DiffusionPipeline
51
- available['DiffusionPipeline'] = True
52
- except ImportError:
53
- available['DiffusionPipeline'] = False
54
-
55
- try:
56
- from diffusers import LTXVideoPipeline
57
- available['LTXVideoPipeline'] = True
58
- except ImportError:
59
- available['LTXVideoPipeline'] = False
60
-
61
- try:
62
- from diffusers import HunyuanVideoPipeline
63
- available['HunyuanVideoPipeline'] = True
64
- except ImportError:
65
- available['HunyuanVideoPipeline'] = False
66
-
67
- try:
68
- from diffusers import CogVideoXPipeline
69
- available['CogVideoXPipeline'] = True
70
- except ImportError:
71
- available['CogVideoXPipeline'] = False
72
-
73
- return available
74
-
75
- # Simplified working models - confirmed to work
76
  WORKING_MODELS = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  {
78
  "id": "cerspense/zeroscope_v2_576w",
79
  "name": "Zeroscope V2",
80
- "pipeline": "DiffusionPipeline",
 
 
81
  "resolution": (576, 320),
82
  "max_frames": 24,
83
  "dtype": torch.float16,
84
- "description": "Fast and reliable video generation"
 
85
  },
86
  {
87
  "id": "damo-vilab/text-to-video-ms-1.7b",
88
  "name": "ModelScope T2V",
89
- "pipeline": "DiffusionPipeline",
 
 
90
  "resolution": (256, 256),
91
- "max_frames": 16,
92
  "dtype": torch.float16,
93
- "description": "Stable text-to-video model"
 
94
  },
95
  {
96
  "id": "ali-vilab/text-to-video-ms-1.7b",
97
  "name": "AliVilab T2V",
98
- "pipeline": "DiffusionPipeline",
99
- "resolution": (256, 256),
 
 
100
  "max_frames": 16,
101
  "dtype": torch.float16,
102
- "description": "Alternative ModelScope version"
103
- }
104
- ]
105
-
106
- # Try premium models but with fallbacks
107
- PREMIUM_MODELS = [
108
- {
109
- "id": "Lightricks/LTX-Video",
110
- "name": "LTX-Video",
111
- "pipeline": "LTXVideoPipeline",
112
- "fallback_pipeline": "DiffusionPipeline",
113
- "resolution": (512, 512),
114
- "max_frames": 50,
115
- "dtype": torch.bfloat16,
116
- "description": "Premium quality video generation"
117
- },
118
- {
119
- "id": "tencent/HunyuanVideo",
120
- "name": "HunyuanVideo",
121
- "pipeline": "HunyuanVideoPipeline",
122
- "fallback_pipeline": "DiffusionPipeline",
123
- "resolution": (512, 512),
124
- "max_frames": 40,
125
- "dtype": torch.bfloat16,
126
- "description": "Advanced video model"
127
  }
128
  ]
129
 
@@ -133,151 +96,144 @@ MODEL_INFO = None
133
  LOADING_LOGS = []
134
 
135
  def log_loading(message):
136
- """Log loading attempts"""
137
  global LOADING_LOGS
138
  print(message)
139
- LOADING_LOGS.append(message)
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- def load_any_working_model():
142
- """Load any working model - premium first, then fallbacks"""
143
  global MODEL, MODEL_INFO, LOADING_LOGS
144
 
145
  if MODEL is not None:
146
  return True
147
 
148
  LOADING_LOGS = []
149
- log_loading("πŸš€ Starting H200 model loading...")
150
 
151
- # Install packages first
152
- if not install_missing_packages():
153
- log_loading("❌ Package installation failed")
154
 
155
- # Check available pipelines
156
- available_pipelines = check_available_pipelines()
157
- log_loading(f"πŸ“‹ Available pipelines: {available_pipelines}")
158
 
159
- # Try premium models first
160
- log_loading("🎯 Attempting premium models...")
161
- for model_config in PREMIUM_MODELS:
162
- if try_load_model(model_config, available_pipelines):
163
  return True
164
 
165
- # Fallback to working models
166
- log_loading("πŸ”„ Falling back to reliable models...")
167
- for model_config in WORKING_MODELS:
168
- if try_load_model(model_config, available_pipelines):
169
- return True
170
-
171
- log_loading("❌ All models failed to load")
172
  return False
173
 
174
- def try_load_model(model_config, available_pipelines):
175
- """Try to load a specific model with fallbacks"""
176
  global MODEL, MODEL_INFO
177
 
178
- model_id = model_config["id"]
179
- model_name = model_config["name"]
180
 
181
- log_loading(f"πŸ”„ Trying {model_name}...")
 
182
 
183
  try:
184
- from diffusers import DiffusionPipeline
185
-
186
- # Strategy 1: Try specific pipeline if available
187
- primary_pipeline = model_config.get("pipeline", "DiffusionPipeline")
188
- if available_pipelines.get(primary_pipeline, False):
189
- try:
190
- log_loading(f" πŸ“₯ Loading with {primary_pipeline}...")
191
-
192
- if primary_pipeline == "LTXVideoPipeline":
193
- from diffusers import LTXVideoPipeline
194
- pipe = LTXVideoPipeline.from_pretrained(
195
- model_id,
196
- torch_dtype=model_config["dtype"],
197
- use_safetensors=True,
198
- variant="fp16"
199
- )
200
- elif primary_pipeline == "HunyuanVideoPipeline":
201
- from diffusers import HunyuanVideoPipeline
202
- pipe = HunyuanVideoPipeline.from_pretrained(
203
- model_id,
204
- torch_dtype=model_config["dtype"],
205
- use_safetensors=True,
206
- variant="fp16"
207
- )
208
- else:
209
- pipe = DiffusionPipeline.from_pretrained(
210
- model_id,
211
- torch_dtype=model_config["dtype"],
212
- use_safetensors=True,
213
- variant="fp16"
214
- )
215
-
216
- log_loading(f" βœ… Loaded with {primary_pipeline}")
217
-
218
- except Exception as e:
219
- log_loading(f" ❌ {primary_pipeline} failed: {e}")
220
- raise e
221
 
222
- # Strategy 2: Fallback to DiffusionPipeline
 
 
 
223
  else:
224
- log_loading(f" πŸ”„ Using DiffusionPipeline fallback...")
225
- pipe = DiffusionPipeline.from_pretrained(
226
- model_id,
227
- torch_dtype=model_config["dtype"],
228
- use_safetensors=True,
229
- variant="fp16",
230
- trust_remote_code=True
231
- )
232
-
233
- # Move to H200 GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  if HAS_CUDA:
235
  pipe = pipe.to("cuda")
236
- log_loading(f" πŸ“± Moved to H200 CUDA")
237
 
238
- # Enable optimizations
239
- if hasattr(pipe, 'enable_sequential_cpu_offload'):
240
- pipe.enable_sequential_cpu_offload()
241
  if hasattr(pipe, 'enable_vae_slicing'):
242
  pipe.enable_vae_slicing()
 
 
243
  if hasattr(pipe, 'enable_vae_tiling'):
244
  pipe.enable_vae_tiling()
 
245
 
246
- log_loading(f" ⚑ Optimizations enabled")
 
 
247
 
248
- # Test generation
249
- log_loading(f" πŸ§ͺ Testing {model_name}...")
 
 
 
 
250
 
251
  MODEL = pipe
252
- MODEL_INFO = model_config
253
 
254
- log_loading(f"βœ… {model_name} loaded and ready!")
255
  return True
256
 
257
  except Exception as e:
258
  log_loading(f"❌ {model_name} failed: {str(e)}")
259
- # Clear memory before trying next
260
  if HAS_CUDA:
261
  torch.cuda.empty_cache()
262
  gc.collect()
263
  return False
264
 
265
- @spaces.GPU(duration=180) if SPACES_AVAILABLE else lambda x: x
266
  def generate_video(
267
  prompt: str,
268
  negative_prompt: str = "",
269
- num_frames: int = 16,
270
- num_inference_steps: int = 20,
271
  guidance_scale: float = 7.5,
272
  seed: int = -1
273
  ) -> Tuple[Optional[str], str]:
274
- """Generate video with loaded model"""
275
 
276
  global MODEL, MODEL_INFO
277
 
278
  # Load model if needed
279
- if not load_any_working_model():
280
- return None, f"❌ No models could be loaded. Check logs for details."
 
281
 
282
  # Input validation
283
  if not prompt.strip():
@@ -287,14 +243,12 @@ def generate_video(
287
  max_frames = MODEL_INFO["max_frames"]
288
  width, height = MODEL_INFO["resolution"]
289
 
290
- # Limit parameters to model capabilities
291
  num_frames = min(max(num_frames, 8), max_frames)
292
 
293
  try:
294
- # Clear H200 memory
295
- if HAS_CUDA:
296
- torch.cuda.empty_cache()
297
- gc.collect()
298
 
299
  # Set seed
300
  if seed == -1:
@@ -303,165 +257,195 @@ def generate_video(
303
  device = "cuda" if HAS_CUDA else "cpu"
304
  generator = torch.Generator(device=device).manual_seed(seed)
305
 
306
- print(f"🎬 H200 Generation: {MODEL_INFO['name']} - {prompt[:50]}...")
 
 
307
  start_time = time.time()
308
 
309
- # Generate with autocast
310
  with torch.autocast(device, dtype=MODEL_INFO["dtype"]):
311
- result = MODEL(
312
- prompt=prompt,
313
- negative_prompt=negative_prompt if negative_prompt.strip() else None,
314
- num_frames=num_frames,
315
- height=height,
316
- width=width,
317
- num_inference_steps=num_inference_steps,
318
- guidance_scale=guidance_scale,
319
- generator=generator
320
- )
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  end_time = time.time()
323
  generation_time = end_time - start_time
324
 
325
- # Export video
326
- video_frames = result.frames[0]
 
 
 
 
 
327
 
 
328
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
329
  from diffusers.utils import export_to_video
330
- export_to_video(video_frames, tmp_file.name, fps=8)
 
331
  video_path = tmp_file.name
332
 
333
- # Clear memory
334
- if HAS_CUDA:
335
- torch.cuda.empty_cache()
336
- gc.collect()
337
 
338
- success_msg = f"""βœ… **H200 Video Generated!**
339
 
340
  πŸ€– **Model:** {MODEL_INFO['name']}
341
  πŸ“ **Prompt:** {prompt}
342
- 🎬 **Frames:** {num_frames}
343
  πŸ“ **Resolution:** {width}x{height}
344
  βš™οΈ **Inference Steps:** {num_inference_steps}
345
- 🎯 **Guidance:** {guidance_scale}
346
  🎲 **Seed:** {seed}
347
- ⏱️ **Time:** {generation_time:.1f}s
348
- πŸ–₯️ **Device:** H200 CUDA
349
- πŸ’‘ **Notes:** {MODEL_INFO['description']}"""
 
 
 
 
350
 
351
  return video_path, success_msg
352
 
 
 
 
 
 
353
  except Exception as e:
354
  if HAS_CUDA:
355
  torch.cuda.empty_cache()
356
  gc.collect()
357
- return None, f"❌ Generation failed: {str(e)}"
358
 
359
  def get_loading_logs():
360
- """Get detailed loading logs"""
361
  global LOADING_LOGS
362
-
363
  if not LOADING_LOGS:
364
- return "No loading attempts yet. Click 'Load Model' to start."
365
-
366
  return "\n".join(LOADING_LOGS)
367
 
368
- def get_system_diagnostic():
369
- """Comprehensive system diagnostic"""
370
-
371
- diagnostic = []
372
 
373
- # Environment check
374
- diagnostic.append("## πŸ–₯️ H200 System Diagnostic")
375
- diagnostic.append(f"- ZeroGPU: {'βœ…' if IS_ZERO_GPU else '❌'}")
376
- diagnostic.append(f"- HF Spaces: {'βœ…' if IS_SPACES else '❌'}")
377
- diagnostic.append(f"- CUDA: {'βœ…' if HAS_CUDA else '❌'}")
378
-
379
- # GPU info
380
- if HAS_CUDA:
381
- try:
382
- gpu_name = torch.cuda.get_device_name(0)
383
- total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
384
- diagnostic.append(f"- GPU: {gpu_name}")
385
- diagnostic.append(f"- Memory: {total_memory:.1f} GB")
386
- except Exception as e:
387
- diagnostic.append(f"- GPU Error: {e}")
388
-
389
- # Package versions
390
- try:
391
- import diffusers
392
- diagnostic.append(f"- Diffusers: {diffusers.__version__}")
393
- except ImportError:
394
- diagnostic.append("- Diffusers: ❌ Not installed")
395
-
396
- try:
397
- import transformers
398
- diagnostic.append(f"- Transformers: {transformers.__version__}")
399
- except ImportError:
400
- diagnostic.append("- Transformers: ❌ Not installed")
401
-
402
- # Available pipelines
403
- available = check_available_pipelines()
404
- diagnostic.append("\n## πŸ“‹ Available Pipelines")
405
- for pipeline, status in available.items():
406
- diagnostic.append(f"- {pipeline}: {'βœ…' if status else '❌'}")
407
 
408
- # Model status
409
- diagnostic.append("\n## πŸ€– Model Status")
410
  if MODEL is not None:
411
- diagnostic.append(f"- Loaded: βœ… {MODEL_INFO['name']}")
412
- diagnostic.append(f"- Resolution: {MODEL_INFO['resolution']}")
413
- diagnostic.append(f"- Max Frames: {MODEL_INFO['max_frames']}")
 
 
 
414
  else:
415
- diagnostic.append("- Loaded: ❌ No model loaded")
 
 
416
 
417
- return "\n".join(diagnostic)
418
 
419
- def force_load_model():
420
- """Force reload model"""
 
 
 
 
 
 
 
 
421
  global MODEL, MODEL_INFO
422
  MODEL = None
423
  MODEL_INFO = None
 
 
 
 
 
424
 
425
- success = load_any_working_model()
426
- return f"πŸ”„ Force reload: {'βœ… Success' if success else '❌ Failed'}"
427
 
428
- # Create diagnostic interface
429
- with gr.Blocks(title="H200 Video Generator - Debug Mode", theme=gr.themes.Soft()) as demo:
430
 
431
  gr.Markdown("""
432
- # πŸ”§ H200 Video Generator - Debug Mode
433
 
434
- **Systematic model loading with full diagnostics**
435
  """)
436
 
 
 
 
 
 
 
 
 
437
  with gr.Tab("πŸŽ₯ Generate Video"):
438
  with gr.Row():
439
  with gr.Column(scale=1):
440
  prompt_input = gr.Textbox(
441
  label="πŸ“ Video Prompt",
442
- placeholder="A cat playing with a ball in a sunny garden...",
443
- lines=3
444
  )
445
 
446
  negative_prompt_input = gr.Textbox(
447
  label="🚫 Negative Prompt",
448
- placeholder="blurry, low quality, distorted...",
449
  lines=2
450
  )
451
 
452
- with gr.Row():
453
- num_frames = gr.Slider(8, 50, value=16, step=1, label="🎬 Frames")
454
- num_steps = gr.Slider(10, 50, value=20, step=1, label="βš™οΈ Steps")
 
 
 
 
 
455
 
456
- with gr.Row():
457
- guidance_scale = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="🎯 Guidance")
458
- seed = gr.Number(value=-1, precision=0, label="🎲 Seed")
459
 
460
- generate_btn = gr.Button("πŸš€ Generate Video", variant="primary", size="lg")
 
 
 
 
461
 
462
  with gr.Column(scale=1):
463
- video_output = gr.Video(label="πŸŽ₯ Generated Video", height=400)
464
- result_text = gr.Textbox(label="πŸ“‹ Results", lines=8, show_copy_button=True)
465
 
466
  generate_btn.click(
467
  fn=generate_video,
@@ -469,93 +453,52 @@ with gr.Blocks(title="H200 Video Generator - Debug Mode", theme=gr.themes.Soft()
469
  outputs=[video_output, result_text]
470
  )
471
 
472
- # Simple examples
473
  gr.Examples(
474
  examples=[
475
- ["A peaceful cat sleeping in a sunny garden", "", 16, 20, 7.5, 42],
476
- ["Ocean waves gently washing the shore", "blurry", 20, 25, 8.0, 123],
477
- ["A butterfly landing on a flower", "", 16, 20, 7.0, 456]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  ],
479
  inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed]
480
  )
481
 
482
- with gr.Tab("πŸ”§ Debug & Diagnostics"):
483
  with gr.Row():
484
- diagnostic_btn = gr.Button("πŸ” System Diagnostic", variant="secondary")
485
- logs_btn = gr.Button("πŸ“‹ Loading Logs", variant="secondary")
486
- reload_btn = gr.Button("πŸ”„ Force Reload Model", variant="secondary")
487
-
488
- diagnostic_output = gr.Markdown()
489
- logs_output = gr.Textbox(label="Loading Logs", lines=15, show_copy_button=True)
490
- reload_output = gr.Textbox(label="Reload Result", lines=2)
491
-
492
- diagnostic_btn.click(fn=get_system_diagnostic, outputs=diagnostic_output)
493
- logs_btn.click(fn=get_loading_logs, outputs=logs_output)
494
- reload_btn.click(fn=force_load_model, outputs=reload_output)
495
-
496
- # Auto-load diagnostic
497
- demo.load(fn=get_system_diagnostic, outputs=diagnostic_output)
498
-
499
- with gr.Tab("πŸ’‘ Troubleshooting"):
500
- gr.Markdown("""
501
- ## πŸ”§ H200 Troubleshooting Guide
502
-
503
- ### 🚨 Common Issues & Solutions:
504
-
505
- **❌ "All premium models failed to load"**
506
-
507
- **Possible Causes:**
508
- 1. **Pipeline not available:** LTXVideoPipeline, HunyuanVideoPipeline may not be in stable diffusers
509
- 2. **Model access:** Some models may be gated or require authentication
510
- 3. **Memory issues:** Even H200 can have limits during loading
511
- 4. **Network timeouts:** Large model downloads can timeout
512
-
513
- **Solutions:**
514
- 1. **Check System Diagnostic tab** - see what pipelines are available
515
- 2. **View Loading Logs** - detailed error messages
516
- 3. **Force Reload Model** - retry with fresh state
517
- 4. **Wait and retry** - sometimes it's just a temporary issue
518
-
519
- ### 🎯 Step-by-Step Debugging:
520
 
521
- **Step 1: Check Environment**
522
- - Click "System Diagnostic"
523
- - Verify H200 GPU is detected
524
- - Check if diffusers/transformers are installed
525
 
526
- **Step 2: Check Available Pipelines**
527
- - Look for βœ… next to DiffusionPipeline (minimum required)
528
- - LTXVideoPipeline/HunyuanVideoPipeline may be ❌ (that's ok)
529
-
530
- **Step 3: Check Loading Logs**
531
- - Click "Loading Logs" to see detailed attempt logs
532
- - Look for specific error messages
533
- - Note which models were tried
534
-
535
- **Step 4: Force Reload**
536
- - Click "Force Reload Model" if needed
537
- - This clears cache and retries
538
-
539
- ### πŸ”„ Fallback Strategy:
540
-
541
- This app tries models in this order:
542
- 1. **LTX-Video** (premium)
543
- 2. **HunyuanVideo** (premium)
544
- 3. **Zeroscope V2** (reliable fallback)
545
- 4. **ModelScope T2V** (backup)
546
- 5. **AliVilab T2V** (final fallback)
547
-
548
- At least one should work!
549
 
550
- ### πŸ’‘ Tips:
551
- - First run always takes longer (model download)
552
- - H200 has plenty of memory, so memory errors are rare
553
- - Check HuggingFace status if all models fail
554
- - Some models may need authentication tokens
555
- """)
556
 
557
  if __name__ == "__main__":
558
- demo.queue(max_size=5)
559
  demo.launch(
560
  share=False,
561
  server_name="0.0.0.0",
 
6
  import tempfile
7
  from typing import Optional, Tuple
8
  import time
 
 
9
 
10
+ # ZeroGPU support (even without detection)
11
  try:
12
  import spaces
13
  SPACES_AVAILABLE = True
 
14
  except ImportError:
15
  SPACES_AVAILABLE = False
16
  class spaces:
17
  @staticmethod
18
+ def GPU(duration=240):
19
  def decorator(func): return func
20
  return decorator
21
 
22
+ # Environment
23
  IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true"
24
  IS_SPACES = os.environ.get("SPACE_ID") is not None
25
  HAS_CUDA = torch.cuda.is_available()
26
 
27
+ print(f"πŸš€ H200 MIG Environment: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
28
 
29
+ # Working models based on your diagnostic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  WORKING_MODELS = [
31
+ {
32
+ "id": "Lightricks/LTX-Video",
33
+ "name": "LTX-Video",
34
+ "pipeline_class": "DiffusionPipeline",
35
+ "variant": None, # No fp16 variant available
36
+ "use_safetensors": False, # Use .bin files
37
+ "resolution": (512, 512),
38
+ "max_frames": 50,
39
+ "dtype": torch.bfloat16,
40
+ "priority": 1,
41
+ "description": "LTX-Video via DiffusionPipeline (no variant)"
42
+ },
43
+ {
44
+ "id": "THUDM/CogVideoX-5b",
45
+ "name": "CogVideoX-5B",
46
+ "pipeline_class": "CogVideoXPipeline",
47
+ "variant": None,
48
+ "use_safetensors": True,
49
+ "resolution": (720, 480),
50
+ "max_frames": 49,
51
+ "dtype": torch.bfloat16,
52
+ "priority": 2,
53
+ "description": "CogVideo 5B model - proven to work"
54
+ },
55
  {
56
  "id": "cerspense/zeroscope_v2_576w",
57
  "name": "Zeroscope V2",
58
+ "pipeline_class": "DiffusionPipeline",
59
+ "variant": None, # No fp16 variant
60
+ "use_safetensors": False, # Use .bin files
61
  "resolution": (576, 320),
62
  "max_frames": 24,
63
  "dtype": torch.float16,
64
+ "priority": 3,
65
+ "description": "Zeroscope without safetensors"
66
  },
67
  {
68
  "id": "damo-vilab/text-to-video-ms-1.7b",
69
  "name": "ModelScope T2V",
70
+ "pipeline_class": "DiffusionPipeline",
71
+ "variant": None,
72
+ "use_safetensors": False,
73
  "resolution": (256, 256),
74
+ "max_frames": 16,
75
  "dtype": torch.float16,
76
+ "priority": 4,
77
+ "description": "ModelScope reliable fallback"
78
  },
79
  {
80
  "id": "ali-vilab/text-to-video-ms-1.7b",
81
  "name": "AliVilab T2V",
82
+ "pipeline_class": "DiffusionPipeline",
83
+ "variant": None,
84
+ "use_safetensors": False,
85
+ "resolution": (256, 256),
86
  "max_frames": 16,
87
  "dtype": torch.float16,
88
+ "priority": 5,
89
+ "description": "AliVilab alternative"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  }
91
  ]
92
 
 
96
  LOADING_LOGS = []
97
 
98
  def log_loading(message):
99
+ """Enhanced logging"""
100
  global LOADING_LOGS
101
  print(message)
102
+ LOADING_LOGS.append(f"{time.strftime('%H:%M:%S')} - {message}")
103
+
104
+ def get_h200_memory():
105
+ """Get H200 MIG memory stats"""
106
+ if HAS_CUDA:
107
+ try:
108
+ total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
109
+ allocated = torch.cuda.memory_allocated(0) / (1024**3)
110
+ return total, allocated
111
+ except:
112
+ return 0, 0
113
+ return 0, 0
114
 
115
+ def load_working_model():
116
+ """Load first working model with H200 MIG optimizations"""
117
  global MODEL, MODEL_INFO, LOADING_LOGS
118
 
119
  if MODEL is not None:
120
  return True
121
 
122
  LOADING_LOGS = []
123
+ log_loading("πŸš€ H200 MIG (69.5GB) model loading started...")
124
 
125
+ total_mem, allocated_mem = get_h200_memory()
126
+ log_loading(f"πŸ’Ύ Initial H200 memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB used")
 
127
 
128
+ # Sort by priority
129
+ sorted_models = sorted(WORKING_MODELS, key=lambda x: x["priority"])
 
130
 
131
+ for model_config in sorted_models:
132
+ if try_load_specific_model(model_config):
 
 
133
  return True
134
 
135
+ log_loading("❌ All models failed on H200 MIG")
 
 
 
 
 
 
136
  return False
137
 
138
+ def try_load_specific_model(config):
139
+ """Try loading a specific model with exact configuration"""
140
  global MODEL, MODEL_INFO
141
 
142
+ model_id = config["id"]
143
+ model_name = config["name"]
144
 
145
+ log_loading(f"πŸ”„ Attempting {model_name}...")
146
+ log_loading(f" πŸ“‹ Config: {config['pipeline_class']}, variant={config['variant']}, safetensors={config['use_safetensors']}")
147
 
148
  try:
149
+ # Clear memory first
150
+ if HAS_CUDA:
151
+ torch.cuda.empty_cache()
152
+ gc.collect()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
+ # Import appropriate pipeline
155
+ if config["pipeline_class"] == "CogVideoXPipeline":
156
+ from diffusers import CogVideoXPipeline
157
+ PipelineClass = CogVideoXPipeline
158
  else:
159
+ from diffusers import DiffusionPipeline
160
+ PipelineClass = DiffusionPipeline
161
+
162
+ # Prepare loading parameters
163
+ load_params = {
164
+ "torch_dtype": config["dtype"],
165
+ "trust_remote_code": True
166
+ }
167
+
168
+ # Add variant only if specified
169
+ if config["variant"]:
170
+ load_params["variant"] = config["variant"]
171
+
172
+ # Add safetensors setting
173
+ if config["use_safetensors"]:
174
+ load_params["use_safetensors"] = True
175
+
176
+ log_loading(f" πŸ“₯ Loading with params: {load_params}")
177
+
178
+ # Load model
179
+ pipe = PipelineClass.from_pretrained(model_id, **load_params)
180
+
181
+ # Move to H200 MIG GPU
182
  if HAS_CUDA:
183
  pipe = pipe.to("cuda")
184
+ log_loading(f" πŸ“± Moved to H200 MIG CUDA")
185
 
186
+ # H200 MIG optimizations (69.5GB is plenty!)
 
 
187
  if hasattr(pipe, 'enable_vae_slicing'):
188
  pipe.enable_vae_slicing()
189
+ log_loading(f" ⚑ VAE slicing enabled")
190
+
191
  if hasattr(pipe, 'enable_vae_tiling'):
192
  pipe.enable_vae_tiling()
193
+ log_loading(f" ⚑ VAE tiling enabled")
194
 
195
+ if hasattr(pipe, 'enable_memory_efficient_attention'):
196
+ pipe.enable_memory_efficient_attention()
197
+ log_loading(f" ⚑ Memory efficient attention enabled")
198
 
199
+ # Don't use CPU offload on H200 - keep everything in GPU
200
+ log_loading(f" πŸš€ Keeping model fully in H200 GPU memory")
201
+
202
+ # Memory check after loading
203
+ total_mem, allocated_mem = get_h200_memory()
204
+ log_loading(f" πŸ’Ύ Post-load memory: {allocated_mem:.1f}GB used / {total_mem:.1f}GB total")
205
 
206
  MODEL = pipe
207
+ MODEL_INFO = config
208
 
209
+ log_loading(f"βœ… {model_name} loaded successfully on H200 MIG!")
210
  return True
211
 
212
  except Exception as e:
213
  log_loading(f"❌ {model_name} failed: {str(e)}")
214
+ # Clear memory before next attempt
215
  if HAS_CUDA:
216
  torch.cuda.empty_cache()
217
  gc.collect()
218
  return False
219
 
220
+ @spaces.GPU(duration=240) if SPACES_AVAILABLE else lambda x: x
221
  def generate_video(
222
  prompt: str,
223
  negative_prompt: str = "",
224
+ num_frames: int = 25,
225
+ num_inference_steps: int = 25,
226
  guidance_scale: float = 7.5,
227
  seed: int = -1
228
  ) -> Tuple[Optional[str], str]:
229
+ """Generate video with H200 MIG power"""
230
 
231
  global MODEL, MODEL_INFO
232
 
233
  # Load model if needed
234
+ if not load_working_model():
235
+ logs = "\n".join(LOADING_LOGS[-10:]) # Last 10 log entries
236
+ return None, f"❌ Model loading failed on H200 MIG\n\nRecent logs:\n{logs}"
237
 
238
  # Input validation
239
  if not prompt.strip():
 
243
  max_frames = MODEL_INFO["max_frames"]
244
  width, height = MODEL_INFO["resolution"]
245
 
246
+ # Adjust parameters for model
247
  num_frames = min(max(num_frames, 8), max_frames)
248
 
249
  try:
250
+ # H200 MIG memory management
251
+ start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
 
 
252
 
253
  # Set seed
254
  if seed == -1:
 
257
  device = "cuda" if HAS_CUDA else "cpu"
258
  generator = torch.Generator(device=device).manual_seed(seed)
259
 
260
+ log_loading(f"🎬 H200 MIG Generation: {MODEL_INFO['name']}")
261
+ log_loading(f"πŸ“ {width}x{height}, {num_frames} frames, {num_inference_steps} steps")
262
+
263
  start_time = time.time()
264
 
265
+ # Generate with H200 MIG autocast
266
  with torch.autocast(device, dtype=MODEL_INFO["dtype"]):
267
+ # Prepare generation arguments
268
+ gen_kwargs = {
269
+ "prompt": prompt,
270
+ "num_frames": num_frames,
271
+ "height": height,
272
+ "width": width,
273
+ "num_inference_steps": num_inference_steps,
274
+ "guidance_scale": guidance_scale,
275
+ "generator": generator
276
+ }
277
+
278
+ # Add negative prompt if provided
279
+ if negative_prompt.strip():
280
+ gen_kwargs["negative_prompt"] = negative_prompt
281
+
282
+ # Model-specific adjustments
283
+ if MODEL_INFO["name"] == "CogVideoX-5B":
284
+ gen_kwargs["num_videos_per_prompt"] = 1
285
+
286
+ log_loading(f"πŸš€ Starting H200 MIG generation...")
287
+ result = MODEL(**gen_kwargs)
288
 
289
  end_time = time.time()
290
  generation_time = end_time - start_time
291
 
292
+ # Extract video frames
293
+ if hasattr(result, 'frames'):
294
+ video_frames = result.frames[0]
295
+ elif hasattr(result, 'videos'):
296
+ video_frames = result.videos[0]
297
+ else:
298
+ return None, "❌ Could not extract video frames"
299
 
300
+ # Export video
301
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
302
  from diffusers.utils import export_to_video
303
+ fps = 8
304
+ export_to_video(video_frames, tmp_file.name, fps=fps)
305
  video_path = tmp_file.name
306
 
307
+ # Memory stats
308
+ end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
309
+ memory_used = end_memory - start_memory
 
310
 
311
+ success_msg = f"""βœ… **H200 MIG Video Generated!**
312
 
313
  πŸ€– **Model:** {MODEL_INFO['name']}
314
  πŸ“ **Prompt:** {prompt}
315
+ 🎬 **Frames:** {num_frames} @ {fps} FPS
316
  πŸ“ **Resolution:** {width}x{height}
317
  βš™οΈ **Inference Steps:** {num_inference_steps}
318
+ 🎯 **Guidance Scale:** {guidance_scale}
319
  🎲 **Seed:** {seed}
320
+ ⏱️ **Generation Time:** {generation_time:.1f}s
321
+ πŸ–₯️ **Device:** H200 MIG (69.5GB)
322
+ πŸ’Ύ **Memory Used:** {memory_used:.1f}GB
323
+ πŸŽ₯ **Video Length:** {num_frames/fps:.1f}s
324
+ πŸ“‹ **Notes:** {MODEL_INFO['description']}"""
325
+
326
+ log_loading(f"βœ… Generation completed in {generation_time:.1f}s")
327
 
328
  return video_path, success_msg
329
 
330
+ except torch.cuda.OutOfMemoryError:
331
+ torch.cuda.empty_cache()
332
+ gc.collect()
333
+ return None, "❌ H200 MIG memory exceeded (rare!). Try reducing parameters."
334
+
335
  except Exception as e:
336
  if HAS_CUDA:
337
  torch.cuda.empty_cache()
338
  gc.collect()
339
+ return None, f"❌ H200 MIG generation failed: {str(e)}"
340
 
341
  def get_loading_logs():
342
+ """Return formatted loading logs"""
343
  global LOADING_LOGS
 
344
  if not LOADING_LOGS:
345
+ return "No loading attempts yet."
 
346
  return "\n".join(LOADING_LOGS)
347
 
348
+ def get_h200_status():
349
+ """Get H200 MIG specific status"""
350
+ total_mem, allocated_mem = get_h200_memory()
 
351
 
352
+ status = f"""## πŸš€ H200 MIG Status
353
+
354
+ **πŸ–₯️ Hardware:**
355
+ - GPU: NVIDIA H200 MIG 3g.71gb
356
+ - Total Memory: {total_mem:.1f} GB
357
+ - Allocated: {allocated_mem:.1f} GB
358
+ - Free: {total_mem - allocated_mem:.1f} GB
359
+
360
+ **πŸ€– Current Model:**"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
 
 
362
  if MODEL is not None:
363
+ status += f"""
364
+ - βœ… **{MODEL_INFO['name']}** loaded and ready
365
+ - πŸ“ Resolution: {MODEL_INFO['resolution']}
366
+ - 🎬 Max Frames: {MODEL_INFO['max_frames']}
367
+ - πŸ’Ύ Memory Usage: {allocated_mem:.1f}GB
368
+ - πŸ“‹ Details: {MODEL_INFO['description']}"""
369
  else:
370
+ status += f"""
371
+ - ⏳ No model loaded yet
372
+ - πŸ”„ Will auto-load on first generation"""
373
 
374
+ status += f"""
375
 
376
+ **πŸ’‘ H200 MIG Advantages:**
377
+ - 69.5GB dedicated memory
378
+ - Isolated GPU partition
379
+ - Consistent performance
380
+ - No interference from other workloads"""
381
+
382
+ return status
383
+
384
+ def force_reload():
385
+ """Force model reload"""
386
  global MODEL, MODEL_INFO
387
  MODEL = None
388
  MODEL_INFO = None
389
+ torch.cuda.empty_cache()
390
+ gc.collect()
391
+
392
+ success = load_working_model()
393
+ logs = "\n".join(LOADING_LOGS[-5:]) # Last 5 entries
394
 
395
+ return f"πŸ”„ **Force Reload Result:** {'βœ… Success' if success else '❌ Failed'}\n\nRecent logs:\n{logs}"
 
396
 
397
+ # Create H200 MIG optimized interface
398
+ with gr.Blocks(title="H200 MIG Video Generator", theme=gr.themes.Glass()) as demo:
399
 
400
  gr.Markdown("""
401
+ # πŸš€ H200 MIG Video Generator
402
 
403
+ **NVIDIA H200 MIG 3g.71gb** β€’ **69.5GB Memory** β€’ **Working Models**
404
  """)
405
 
406
+ # Status bar
407
+ with gr.Row():
408
+ gr.Markdown("""
409
+ <div style="background: linear-gradient(45deg, #FF6B6B, #4ECDC4); padding: 10px; border-radius: 10px; text-align: center; color: white; font-weight: bold;">
410
+ πŸ”₯ H200 MIG ACTIVE - OPTIMIZED FOR YOUR SETUP πŸ”₯
411
+ </div>
412
+ """)
413
+
414
  with gr.Tab("πŸŽ₯ Generate Video"):
415
  with gr.Row():
416
  with gr.Column(scale=1):
417
  prompt_input = gr.Textbox(
418
  label="πŸ“ Video Prompt",
419
+ placeholder="A majestic eagle soaring through mountain peaks at golden hour, cinematic shot with dramatic lighting...",
420
+ lines=4
421
  )
422
 
423
  negative_prompt_input = gr.Textbox(
424
  label="🚫 Negative Prompt",
425
+ placeholder="blurry, low quality, distorted, pixelated, static...",
426
  lines=2
427
  )
428
 
429
+ with gr.Accordion("βš™οΈ H200 MIG Settings", open=True):
430
+ with gr.Row():
431
+ num_frames = gr.Slider(8, 50, value=25, step=1, label="🎬 Frames")
432
+ num_steps = gr.Slider(15, 50, value=25, step=1, label="βš™οΈ Steps")
433
+
434
+ with gr.Row():
435
+ guidance_scale = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="🎯 Guidance")
436
+ seed = gr.Number(value=-1, precision=0, label="🎲 Seed")
437
 
438
+ generate_btn = gr.Button("πŸš€ Generate on H200 MIG", variant="primary", size="lg")
 
 
439
 
440
+ gr.Markdown("""
441
+ **⏱️ Generation:** 1-3 minutes on H200 MIG
442
+
443
+ **πŸ’‘ Auto-detects:** Best working model for your setup
444
+ """)
445
 
446
  with gr.Column(scale=1):
447
+ video_output = gr.Video(label="πŸŽ₯ H200 MIG Generated Video", height=400)
448
+ result_text = gr.Textbox(label="πŸ“‹ Generation Report", lines=10, show_copy_button=True)
449
 
450
  generate_btn.click(
451
  fn=generate_video,
 
453
  outputs=[video_output, result_text]
454
  )
455
 
456
+ # H200 MIG optimized examples
457
  gr.Examples(
458
  examples=[
459
+ [
460
+ "A majestic golden eagle soaring through misty mountain peaks at sunrise",
461
+ "blurry, low quality, static",
462
+ 25, 25, 7.5, 42
463
+ ],
464
+ [
465
+ "Ocean waves crashing against rocks during sunset, cinematic view",
466
+ "pixelated, distorted, watermark",
467
+ 30, 30, 8.0, 123
468
+ ],
469
+ [
470
+ "A peaceful cat sleeping in a sunny garden with flowers",
471
+ "dark, gloomy, low quality",
472
+ 20, 20, 7.0, 456
473
+ ],
474
+ [
475
+ "Time-lapse of clouds moving over a mountain landscape",
476
+ "static, boring, blurry",
477
+ 35, 35, 7.5, 789
478
+ ]
479
  ],
480
  inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed]
481
  )
482
 
483
+ with gr.Tab("πŸ”§ H200 MIG Status"):
484
  with gr.Row():
485
+ status_btn = gr.Button("πŸ” Check H200 Status", variant="secondary")
486
+ logs_btn = gr.Button("πŸ“‹ View Loading Logs", variant="secondary")
487
+ reload_btn = gr.Button("πŸ”„ Force Reload", variant="secondary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
+ status_output = gr.Markdown()
490
+ logs_output = gr.Textbox(label="Detailed Loading Logs", lines=15, show_copy_button=True)
491
+ reload_output = gr.Markdown()
 
492
 
493
+ status_btn.click(fn=get_h200_status, outputs=status_output)
494
+ logs_btn.click(fn=get_loading_logs, outputs=logs_output)
495
+ reload_btn.click(fn=force_reload, outputs=reload_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
 
497
+ # Auto-load status
498
+ demo.load(fn=get_h200_status, outputs=status_output)
 
 
 
 
499
 
500
  if __name__ == "__main__":
501
+ demo.queue(max_size=3)
502
  demo.launch(
503
  share=False,
504
  server_name="0.0.0.0",