text2video

Sleeping

App Files Files Community

ozilion commited on Jun 18

Commit

05424ef

verified ·

1 Parent(s): 7a8e438

Update app.py

Browse files

Files changed (1) hide show

app.py +292 -349

app.py CHANGED Viewed

@@ -6,124 +6,87 @@ import numpy as np
 import tempfile
 from typing import Optional, Tuple
 import time
-import subprocess
-import sys
-# ZeroGPU with H200
 try:
     import spaces
     SPACES_AVAILABLE = True
-    print("✅ Spaces library loaded - H200 ready!")
 except ImportError:
     SPACES_AVAILABLE = False
     class spaces:
         @staticmethod
-        def GPU(duration=300):
             def decorator(func): return func
             return decorator
-# Environment check
 IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true"
 IS_SPACES = os.environ.get("SPACE_ID") is not None
 HAS_CUDA = torch.cuda.is_available()
-print(f"🚀 Environment: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
-def install_missing_packages():
-    """Install any missing packages"""
-    try:
-        print("🔄 Checking and installing packages...")
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "diffusers>=0.31.0"])
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "transformers>=4.36.0"])
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "accelerate"])
-        print("✅ Packages updated successfully")
-        return True
-    except Exception as e:
-        print(f"❌ Package installation failed: {e}")
-        return False
-def check_available_pipelines():
-    """Check what pipelines are actually available"""
-    available = {}
-    try:
-        from diffusers import DiffusionPipeline
-        available['DiffusionPipeline'] = True
-    except ImportError:
-        available['DiffusionPipeline'] = False
-    try:
-        from diffusers import LTXVideoPipeline
-        available['LTXVideoPipeline'] = True
-    except ImportError:
-        available['LTXVideoPipeline'] = False
-    try:
-        from diffusers import HunyuanVideoPipeline
-        available['HunyuanVideoPipeline'] = True
-    except ImportError:
-        available['HunyuanVideoPipeline'] = False
-    try:
-        from diffusers import CogVideoXPipeline
-        available['CogVideoXPipeline'] = True
-    except ImportError:
-        available['CogVideoXPipeline'] = False
-    return available
-# Simplified working models - confirmed to work
 WORKING_MODELS = [
     {
         "id": "cerspense/zeroscope_v2_576w",
         "name": "Zeroscope V2",
-        "pipeline": "DiffusionPipeline",
         "resolution": (576, 320),
         "max_frames": 24,
         "dtype": torch.float16,
-        "description": "Fast and reliable video generation"
     },
     {
         "id": "damo-vilab/text-to-video-ms-1.7b",
         "name": "ModelScope T2V",
-        "pipeline": "DiffusionPipeline",
         "resolution": (256, 256),
-        "max_frames": 16,
         "dtype": torch.float16,
-        "description": "Stable text-to-video model"
     },
     {
         "id": "ali-vilab/text-to-video-ms-1.7b",
         "name": "AliVilab T2V",
-        "pipeline": "DiffusionPipeline",
-        "resolution": (256, 256),
         "max_frames": 16,
         "dtype": torch.float16,
-        "description": "Alternative ModelScope version"
-    }
-]
-# Try premium models but with fallbacks
-PREMIUM_MODELS = [
-    {
-        "id": "Lightricks/LTX-Video",
-        "name": "LTX-Video",
-        "pipeline": "LTXVideoPipeline",
-        "fallback_pipeline": "DiffusionPipeline",
-        "resolution": (512, 512),
-        "max_frames": 50,
-        "dtype": torch.bfloat16,
-        "description": "Premium quality video generation"
-    },
-    {
-        "id": "tencent/HunyuanVideo",
-        "name": "HunyuanVideo",
-        "pipeline": "HunyuanVideoPipeline",
-        "fallback_pipeline": "DiffusionPipeline",
-        "resolution": (512, 512),
-        "max_frames": 40,
-        "dtype": torch.bfloat16,
-        "description": "Advanced video model"
     }
 ]
@@ -133,151 +96,144 @@ MODEL_INFO = None
 LOADING_LOGS = []
 def log_loading(message):
-    """Log loading attempts"""
     global LOADING_LOGS
     print(message)
-    LOADING_LOGS.append(message)
-def load_any_working_model():
-    """Load any working model - premium first, then fallbacks"""
     global MODEL, MODEL_INFO, LOADING_LOGS
     if MODEL is not None:
         return True
     LOADING_LOGS = []
-    log_loading("🚀 Starting H200 model loading...")
-    # Install packages first
-    if not install_missing_packages():
-        log_loading("❌ Package installation failed")
-    # Check available pipelines
-    available_pipelines = check_available_pipelines()
-    log_loading(f"📋 Available pipelines: {available_pipelines}")
-    # Try premium models first
-    log_loading("🎯 Attempting premium models...")
-    for model_config in PREMIUM_MODELS:
-        if try_load_model(model_config, available_pipelines):
             return True
-    # Fallback to working models
-    log_loading("🔄 Falling back to reliable models...")
-    for model_config in WORKING_MODELS:
-        if try_load_model(model_config, available_pipelines):
-            return True
-    log_loading("❌ All models failed to load")
     return False
-def try_load_model(model_config, available_pipelines):
-    """Try to load a specific model with fallbacks"""
     global MODEL, MODEL_INFO
-    model_id = model_config["id"]
-    model_name = model_config["name"]
-    log_loading(f"🔄 Trying {model_name}...")
     try:
-        from diffusers import DiffusionPipeline
-        # Strategy 1: Try specific pipeline if available
-        primary_pipeline = model_config.get("pipeline", "DiffusionPipeline")
-        if available_pipelines.get(primary_pipeline, False):
-            try:
-                log_loading(f"  📥 Loading with {primary_pipeline}...")
-                if primary_pipeline == "LTXVideoPipeline":
-                    from diffusers import LTXVideoPipeline
-                    pipe = LTXVideoPipeline.from_pretrained(
-                        model_id,
-                        torch_dtype=model_config["dtype"],
-                        use_safetensors=True,
-                        variant="fp16"
-                    )
-                elif primary_pipeline == "HunyuanVideoPipeline":
-                    from diffusers import HunyuanVideoPipeline
-                    pipe = HunyuanVideoPipeline.from_pretrained(
-                        model_id,
-                        torch_dtype=model_config["dtype"],
-                        use_safetensors=True,
-                        variant="fp16"
-                    )
-                else:
-                    pipe = DiffusionPipeline.from_pretrained(
-                        model_id,
-                        torch_dtype=model_config["dtype"],
-                        use_safetensors=True,
-                        variant="fp16"
-                    )
-                log_loading(f"  ✅ Loaded with {primary_pipeline}")
-            except Exception as e:
-                log_loading(f"  ❌ {primary_pipeline} failed: {e}")
-                raise e
-        # Strategy 2: Fallback to DiffusionPipeline
         else:
-            log_loading(f"  🔄 Using DiffusionPipeline fallback...")
-            pipe = DiffusionPipeline.from_pretrained(
-                model_id,
-                torch_dtype=model_config["dtype"],
-                use_safetensors=True,
-                variant="fp16",
-                trust_remote_code=True
-            )
-        # Move to H200 GPU
         if HAS_CUDA:
             pipe = pipe.to("cuda")
-            log_loading(f"  📱 Moved to H200 CUDA")
-        # Enable optimizations
-        if hasattr(pipe, 'enable_sequential_cpu_offload'):
-            pipe.enable_sequential_cpu_offload()
         if hasattr(pipe, 'enable_vae_slicing'):
             pipe.enable_vae_slicing()
         if hasattr(pipe, 'enable_vae_tiling'):
             pipe.enable_vae_tiling()
-        log_loading(f"  ⚡ Optimizations enabled")
-        # Test generation
-        log_loading(f"  🧪 Testing {model_name}...")
         MODEL = pipe
-        MODEL_INFO = model_config
-        log_loading(f"✅ {model_name} loaded and ready!")
         return True
     except Exception as e:
         log_loading(f"❌ {model_name} failed: {str(e)}")
-        # Clear memory before trying next
         if HAS_CUDA:
             torch.cuda.empty_cache()
         gc.collect()
         return False
-@spaces.GPU(duration=180) if SPACES_AVAILABLE else lambda x: x
 def generate_video(
     prompt: str,
     negative_prompt: str = "",
-    num_frames: int = 16,
-    num_inference_steps: int = 20,
     guidance_scale: float = 7.5,
     seed: int = -1
 ) -> Tuple[Optional[str], str]:
-    """Generate video with loaded model"""
     global MODEL, MODEL_INFO
     # Load model if needed
-    if not load_any_working_model():
-        return None, f"❌ No models could be loaded. Check logs for details."
     # Input validation
     if not prompt.strip():
@@ -287,14 +243,12 @@ def generate_video(
     max_frames = MODEL_INFO["max_frames"]
     width, height = MODEL_INFO["resolution"]
-    # Limit parameters to model capabilities
     num_frames = min(max(num_frames, 8), max_frames)
     try:
-        # Clear H200 memory
-        if HAS_CUDA:
-            torch.cuda.empty_cache()
-        gc.collect()
         # Set seed
         if seed == -1:
@@ -303,165 +257,195 @@ def generate_video(
         device = "cuda" if HAS_CUDA else "cpu"
         generator = torch.Generator(device=device).manual_seed(seed)
-        print(f"🎬 H200 Generation: {MODEL_INFO['name']} - {prompt[:50]}...")
         start_time = time.time()
-        # Generate with autocast
         with torch.autocast(device, dtype=MODEL_INFO["dtype"]):
-            result = MODEL(
-                prompt=prompt,
-                negative_prompt=negative_prompt if negative_prompt.strip() else None,
-                num_frames=num_frames,
-                height=height,
-                width=width,
-                num_inference_steps=num_inference_steps,
-                guidance_scale=guidance_scale,
-                generator=generator
-            )
         end_time = time.time()
         generation_time = end_time - start_time
-        # Export video
-        video_frames = result.frames[0]
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
             from diffusers.utils import export_to_video
-            export_to_video(video_frames, tmp_file.name, fps=8)
             video_path = tmp_file.name
-        # Clear memory
-        if HAS_CUDA:
-            torch.cuda.empty_cache()
-        gc.collect()
-        success_msg = f"""✅ **H200 Video Generated!**
 🤖 **Model:** {MODEL_INFO['name']}
 📝 **Prompt:** {prompt}
-🎬 **Frames:** {num_frames}
 📐 **Resolution:** {width}x{height}
 ⚙️ **Inference Steps:** {num_inference_steps}
-🎯 **Guidance:** {guidance_scale}
 🎲 **Seed:** {seed}
-⏱️ **Time:** {generation_time:.1f}s
-🖥️ **Device:** H200 CUDA
-💡 **Notes:** {MODEL_INFO['description']}"""
         return video_path, success_msg
     except Exception as e:
         if HAS_CUDA:
             torch.cuda.empty_cache()
         gc.collect()
-        return None, f"❌ Generation failed: {str(e)}"
 def get_loading_logs():
-    """Get detailed loading logs"""
     global LOADING_LOGS
     if not LOADING_LOGS:
-        return "No loading attempts yet. Click 'Load Model' to start."
     return "\n".join(LOADING_LOGS)
-def get_system_diagnostic():
-    """Comprehensive system diagnostic"""
-    diagnostic = []
-    # Environment check
-    diagnostic.append("## 🖥️ H200 System Diagnostic")
-    diagnostic.append(f"- ZeroGPU: {'✅' if IS_ZERO_GPU else '❌'}")
-    diagnostic.append(f"- HF Spaces: {'✅' if IS_SPACES else '❌'}")
-    diagnostic.append(f"- CUDA: {'✅' if HAS_CUDA else '❌'}")
-    # GPU info
-    if HAS_CUDA:
-        try:
-            gpu_name = torch.cuda.get_device_name(0)
-            total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
-            diagnostic.append(f"- GPU: {gpu_name}")
-            diagnostic.append(f"- Memory: {total_memory:.1f} GB")
-        except Exception as e:
-            diagnostic.append(f"- GPU Error: {e}")
-    # Package versions
-    try:
-        import diffusers
-        diagnostic.append(f"- Diffusers: {diffusers.__version__}")
-    except ImportError:
-        diagnostic.append("- Diffusers: ❌ Not installed")
-    try:
-        import transformers
-        diagnostic.append(f"- Transformers: {transformers.__version__}")
-    except ImportError:
-        diagnostic.append("- Transformers: ❌ Not installed")
-    # Available pipelines
-    available = check_available_pipelines()
-    diagnostic.append("\n## 📋 Available Pipelines")
-    for pipeline, status in available.items():
-        diagnostic.append(f"- {pipeline}: {'✅' if status else '❌'}")
-    # Model status
-    diagnostic.append("\n## 🤖 Model Status")
     if MODEL is not None:
-        diagnostic.append(f"- Loaded: ✅ {MODEL_INFO['name']}")
-        diagnostic.append(f"- Resolution: {MODEL_INFO['resolution']}")
-        diagnostic.append(f"- Max Frames: {MODEL_INFO['max_frames']}")
     else:
-        diagnostic.append("- Loaded: ❌ No model loaded")
-    return "\n".join(diagnostic)
-def force_load_model():
-    """Force reload model"""
     global MODEL, MODEL_INFO
     MODEL = None
     MODEL_INFO = None
-    success = load_any_working_model()
-    return f"🔄 Force reload: {'✅ Success' if success else '❌ Failed'}"
-# Create diagnostic interface
-with gr.Blocks(title="H200 Video Generator - Debug Mode", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
-    # 🔧 H200 Video Generator - Debug Mode
-    **Systematic model loading with full diagnostics**
     """)
     with gr.Tab("🎥 Generate Video"):
         with gr.Row():
             with gr.Column(scale=1):
                 prompt_input = gr.Textbox(
                     label="📝 Video Prompt",
-                    placeholder="A cat playing with a ball in a sunny garden...",
-                    lines=3
                 )
                 negative_prompt_input = gr.Textbox(
                     label="🚫 Negative Prompt",
-                    placeholder="blurry, low quality, distorted...",
                     lines=2
                 )
-                with gr.Row():
-                    num_frames = gr.Slider(8, 50, value=16, step=1, label="🎬 Frames")
-                    num_steps = gr.Slider(10, 50, value=20, step=1, label="⚙️ Steps")
-                with gr.Row():
-                    guidance_scale = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="🎯 Guidance")
-                    seed = gr.Number(value=-1, precision=0, label="🎲 Seed")
-                generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg")
             with gr.Column(scale=1):
-                video_output = gr.Video(label="🎥 Generated Video", height=400)
-                result_text = gr.Textbox(label="📋 Results", lines=8, show_copy_button=True)
         generate_btn.click(
             fn=generate_video,
@@ -469,93 +453,52 @@ with gr.Blocks(title="H200 Video Generator - Debug Mode", theme=gr.themes.Soft()
             outputs=[video_output, result_text]
         )
-        # Simple examples
         gr.Examples(
             examples=[
-                ["A peaceful cat sleeping in a sunny garden", "", 16, 20, 7.5, 42],
-                ["Ocean waves gently washing the shore", "blurry", 20, 25, 8.0, 123],
-                ["A butterfly landing on a flower", "", 16, 20, 7.0, 456]
             ],
             inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed]
         )
-    with gr.Tab("🔧 Debug & Diagnostics"):
         with gr.Row():
-            diagnostic_btn = gr.Button("🔍 System Diagnostic", variant="secondary")
-            logs_btn = gr.Button("📋 Loading Logs", variant="secondary")
-            reload_btn = gr.Button("🔄 Force Reload Model", variant="secondary")
-        diagnostic_output = gr.Markdown()
-        logs_output = gr.Textbox(label="Loading Logs", lines=15, show_copy_button=True)
-        reload_output = gr.Textbox(label="Reload Result", lines=2)
-        diagnostic_btn.click(fn=get_system_diagnostic, outputs=diagnostic_output)
-        logs_btn.click(fn=get_loading_logs, outputs=logs_output)
-        reload_btn.click(fn=force_load_model, outputs=reload_output)
-        # Auto-load diagnostic
-        demo.load(fn=get_system_diagnostic, outputs=diagnostic_output)
-    with gr.Tab("💡 Troubleshooting"):
-        gr.Markdown("""
-        ## 🔧 H200 Troubleshooting Guide
-        ### 🚨 Common Issues & Solutions:
-        **❌ "All premium models failed to load"**
-        **Possible Causes:**
-        1. **Pipeline not available:** LTXVideoPipeline, HunyuanVideoPipeline may not be in stable diffusers
-        2. **Model access:** Some models may be gated or require authentication
-        3. **Memory issues:** Even H200 can have limits during loading
-        4. **Network timeouts:** Large model downloads can timeout
-        **Solutions:**
-        1. **Check System Diagnostic tab** - see what pipelines are available
-        2. **View Loading Logs** - detailed error messages
-        3. **Force Reload Model** - retry with fresh state
-        4. **Wait and retry** - sometimes it's just a temporary issue
-        ### 🎯 Step-by-Step Debugging:
-        **Step 1: Check Environment**
-        - Click "System Diagnostic"
-        - Verify H200 GPU is detected
-        - Check if diffusers/transformers are installed
-        **Step 2: Check Available Pipelines**
-        - Look for ✅ next to DiffusionPipeline (minimum required)
-        - LTXVideoPipeline/HunyuanVideoPipeline may be ❌ (that's ok)
-        **Step 3: Check Loading Logs**
-        - Click "Loading Logs" to see detailed attempt logs
-        - Look for specific error messages
-        - Note which models were tried
-        **Step 4: Force Reload**
-        - Click "Force Reload Model" if needed
-        - This clears cache and retries
-        ### 🔄 Fallback Strategy:
-        This app tries models in this order:
-        1. **LTX-Video** (premium)
-        2. **HunyuanVideo** (premium)
-        3. **Zeroscope V2** (reliable fallback)
-        4. **ModelScope T2V** (backup)
-        5. **AliVilab T2V** (final fallback)
-        At least one should work!
-        ### 💡 Tips:
-        - First run always takes longer (model download)
-        - H200 has plenty of memory, so memory errors are rare
-        - Check HuggingFace status if all models fail
-        - Some models may need authentication tokens
-        """)
 if __name__ == "__main__":
-    demo.queue(max_size=5)
     demo.launch(
         share=False,
         server_name="0.0.0.0",

 import tempfile
 from typing import Optional, Tuple
 import time
+# ZeroGPU support (even without detection)
 try:
     import spaces
     SPACES_AVAILABLE = True
 except ImportError:
     SPACES_AVAILABLE = False
     class spaces:
         @staticmethod
+        def GPU(duration=240):
             def decorator(func): return func
             return decorator
+# Environment
 IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true"
 IS_SPACES = os.environ.get("SPACE_ID") is not None
 HAS_CUDA = torch.cuda.is_available()
+print(f"🚀 H200 MIG Environment: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
+# Working models based on your diagnostic
 WORKING_MODELS = [
+    {
+        "id": "Lightricks/LTX-Video",
+        "name": "LTX-Video",
+        "pipeline_class": "DiffusionPipeline",
+        "variant": None,  # No fp16 variant available
+        "use_safetensors": False,  # Use .bin files
+        "resolution": (512, 512),
+        "max_frames": 50,
+        "dtype": torch.bfloat16,
+        "priority": 1,
+        "description": "LTX-Video via DiffusionPipeline (no variant)"
+    },
+    {
+        "id": "THUDM/CogVideoX-5b",
+        "name": "CogVideoX-5B",
+        "pipeline_class": "CogVideoXPipeline",
+        "variant": None,
+        "use_safetensors": True,
+        "resolution": (720, 480),
+        "max_frames": 49,
+        "dtype": torch.bfloat16,
+        "priority": 2,
+        "description": "CogVideo 5B model - proven to work"
+    },
     {
         "id": "cerspense/zeroscope_v2_576w",
         "name": "Zeroscope V2",
+        "pipeline_class": "DiffusionPipeline",
+        "variant": None,  # No fp16 variant
+        "use_safetensors": False,  # Use .bin files
         "resolution": (576, 320),
         "max_frames": 24,
         "dtype": torch.float16,
+        "priority": 3,
+        "description": "Zeroscope without safetensors"
     },
     {
         "id": "damo-vilab/text-to-video-ms-1.7b",
         "name": "ModelScope T2V",
+        "pipeline_class": "DiffusionPipeline",
+        "variant": None,
+        "use_safetensors": False,
         "resolution": (256, 256),
+        "max_frames": 16,
         "dtype": torch.float16,
+        "priority": 4,
+        "description": "ModelScope reliable fallback"
     },
     {
         "id": "ali-vilab/text-to-video-ms-1.7b",
         "name": "AliVilab T2V",
+        "pipeline_class": "DiffusionPipeline",
+        "variant": None,
+        "use_safetensors": False,
+        "resolution": (256, 256),
         "max_frames": 16,
         "dtype": torch.float16,
+        "priority": 5,
+        "description": "AliVilab alternative"
     }
 ]
 LOADING_LOGS = []
 def log_loading(message):
+    """Enhanced logging"""
     global LOADING_LOGS
     print(message)
+    LOADING_LOGS.append(f"{time.strftime('%H:%M:%S')} - {message}")
+def get_h200_memory():
+    """Get H200 MIG memory stats"""
+    if HAS_CUDA:
+        try:
+            total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
+            allocated = torch.cuda.memory_allocated(0) / (1024**3)
+            return total, allocated
+        except:
+            return 0, 0
+    return 0, 0
+def load_working_model():
+    """Load first working model with H200 MIG optimizations"""
     global MODEL, MODEL_INFO, LOADING_LOGS
     if MODEL is not None:
         return True
     LOADING_LOGS = []
+    log_loading("🚀 H200 MIG (69.5GB) model loading started...")
+    total_mem, allocated_mem = get_h200_memory()
+    log_loading(f"💾 Initial H200 memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB used")
+    # Sort by priority
+    sorted_models = sorted(WORKING_MODELS, key=lambda x: x["priority"])
+    for model_config in sorted_models:
+        if try_load_specific_model(model_config):
             return True
+    log_loading("❌ All models failed on H200 MIG")
     return False
+def try_load_specific_model(config):
+    """Try loading a specific model with exact configuration"""
     global MODEL, MODEL_INFO
+    model_id = config["id"]
+    model_name = config["name"]
+    log_loading(f"🔄 Attempting {model_name}...")
+    log_loading(f"  📋 Config: {config['pipeline_class']}, variant={config['variant']}, safetensors={config['use_safetensors']}")
     try:
+        # Clear memory first
+        if HAS_CUDA:
+            torch.cuda.empty_cache()
+        gc.collect()
+        # Import appropriate pipeline
+        if config["pipeline_class"] == "CogVideoXPipeline":
+            from diffusers import CogVideoXPipeline
+            PipelineClass = CogVideoXPipeline
         else:
+            from diffusers import DiffusionPipeline
+            PipelineClass = DiffusionPipeline
+        # Prepare loading parameters
+        load_params = {
+            "torch_dtype": config["dtype"],
+            "trust_remote_code": True
+        }
+        # Add variant only if specified
+        if config["variant"]:
+            load_params["variant"] = config["variant"]
+        # Add safetensors setting
+        if config["use_safetensors"]:
+            load_params["use_safetensors"] = True
+        log_loading(f"  📥 Loading with params: {load_params}")
+        # Load model
+        pipe = PipelineClass.from_pretrained(model_id, **load_params)
+        # Move to H200 MIG GPU
         if HAS_CUDA:
             pipe = pipe.to("cuda")
+            log_loading(f"  📱 Moved to H200 MIG CUDA")
+        # H200 MIG optimizations (69.5GB is plenty!)
         if hasattr(pipe, 'enable_vae_slicing'):
             pipe.enable_vae_slicing()
+            log_loading(f"  ⚡ VAE slicing enabled")
         if hasattr(pipe, 'enable_vae_tiling'):
             pipe.enable_vae_tiling()
+            log_loading(f"  ⚡ VAE tiling enabled")
+        if hasattr(pipe, 'enable_memory_efficient_attention'):
+            pipe.enable_memory_efficient_attention()
+            log_loading(f"  ⚡ Memory efficient attention enabled")
+        # Don't use CPU offload on H200 - keep everything in GPU
+        log_loading(f"  🚀 Keeping model fully in H200 GPU memory")
+        # Memory check after loading
+        total_mem, allocated_mem = get_h200_memory()
+        log_loading(f"  💾 Post-load memory: {allocated_mem:.1f}GB used / {total_mem:.1f}GB total")
         MODEL = pipe
+        MODEL_INFO = config
+        log_loading(f"✅ {model_name} loaded successfully on H200 MIG!")
         return True
     except Exception as e:
         log_loading(f"❌ {model_name} failed: {str(e)}")
+        # Clear memory before next attempt
         if HAS_CUDA:
             torch.cuda.empty_cache()
         gc.collect()
         return False
+@spaces.GPU(duration=240) if SPACES_AVAILABLE else lambda x: x
 def generate_video(
     prompt: str,
     negative_prompt: str = "",
+    num_frames: int = 25,
+    num_inference_steps: int = 25,
     guidance_scale: float = 7.5,
     seed: int = -1
 ) -> Tuple[Optional[str], str]:
+    """Generate video with H200 MIG power"""
     global MODEL, MODEL_INFO
     # Load model if needed
+    if not load_working_model():
+        logs = "\n".join(LOADING_LOGS[-10:])  # Last 10 log entries
+        return None, f"❌ Model loading failed on H200 MIG\n\nRecent logs:\n{logs}"
     # Input validation
     if not prompt.strip():
     max_frames = MODEL_INFO["max_frames"]
     width, height = MODEL_INFO["resolution"]
+    # Adjust parameters for model
     num_frames = min(max(num_frames, 8), max_frames)
     try:
+        # H200 MIG memory management
+        start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
         # Set seed
         if seed == -1:
         device = "cuda" if HAS_CUDA else "cpu"
         generator = torch.Generator(device=device).manual_seed(seed)
+        log_loading(f"🎬 H200 MIG Generation: {MODEL_INFO['name']}")
+        log_loading(f"📐 {width}x{height}, {num_frames} frames, {num_inference_steps} steps")
         start_time = time.time()
+        # Generate with H200 MIG autocast
         with torch.autocast(device, dtype=MODEL_INFO["dtype"]):
+            # Prepare generation arguments
+            gen_kwargs = {
+                "prompt": prompt,
+                "num_frames": num_frames,
+                "height": height,
+                "width": width,
+                "num_inference_steps": num_inference_steps,
+                "guidance_scale": guidance_scale,
+                "generator": generator
+            }
+            # Add negative prompt if provided
+            if negative_prompt.strip():
+                gen_kwargs["negative_prompt"] = negative_prompt
+            # Model-specific adjustments
+            if MODEL_INFO["name"] == "CogVideoX-5B":
+                gen_kwargs["num_videos_per_prompt"] = 1
+            log_loading(f"🚀 Starting H200 MIG generation...")
+            result = MODEL(**gen_kwargs)
         end_time = time.time()
         generation_time = end_time - start_time
+        # Extract video frames
+        if hasattr(result, 'frames'):
+            video_frames = result.frames[0]
+        elif hasattr(result, 'videos'):
+            video_frames = result.videos[0]
+        else:
+            return None, "❌ Could not extract video frames"
+        # Export video
         with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
             from diffusers.utils import export_to_video
+            fps = 8
+            export_to_video(video_frames, tmp_file.name, fps=fps)
             video_path = tmp_file.name
+        # Memory stats
+        end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
+        memory_used = end_memory - start_memory
+        success_msg = f"""✅ **H200 MIG Video Generated!**
 🤖 **Model:** {MODEL_INFO['name']}
 📝 **Prompt:** {prompt}
+🎬 **Frames:** {num_frames} @ {fps} FPS
 📐 **Resolution:** {width}x{height}
 ⚙️ **Inference Steps:** {num_inference_steps}
+🎯 **Guidance Scale:** {guidance_scale}
 🎲 **Seed:** {seed}
+⏱️ **Generation Time:** {generation_time:.1f}s
+🖥️ **Device:** H200 MIG (69.5GB)
+💾 **Memory Used:** {memory_used:.1f}GB
+🎥 **Video Length:** {num_frames/fps:.1f}s
+📋 **Notes:** {MODEL_INFO['description']}"""
+        log_loading(f"✅ Generation completed in {generation_time:.1f}s")
         return video_path, success_msg
+    except torch.cuda.OutOfMemoryError:
+        torch.cuda.empty_cache()
+        gc.collect()
+        return None, "❌ H200 MIG memory exceeded (rare!). Try reducing parameters."
     except Exception as e:
         if HAS_CUDA:
             torch.cuda.empty_cache()
         gc.collect()
+        return None, f"❌ H200 MIG generation failed: {str(e)}"
 def get_loading_logs():
+    """Return formatted loading logs"""
     global LOADING_LOGS
     if not LOADING_LOGS:
+        return "No loading attempts yet."
     return "\n".join(LOADING_LOGS)
+def get_h200_status():
+    """Get H200 MIG specific status"""
+    total_mem, allocated_mem = get_h200_memory()
+    status = f"""## 🚀 H200 MIG Status
+**🖥️ Hardware:**
+- GPU: NVIDIA H200 MIG 3g.71gb
+- Total Memory: {total_mem:.1f} GB
+- Allocated: {allocated_mem:.1f} GB
+- Free: {total_mem - allocated_mem:.1f} GB
+**🤖 Current Model:**"""
     if MODEL is not None:
+        status += f"""
+- ✅ **{MODEL_INFO['name']}** loaded and ready
+- 📐 Resolution: {MODEL_INFO['resolution']}
+- 🎬 Max Frames: {MODEL_INFO['max_frames']}
+- 💾 Memory Usage: {allocated_mem:.1f}GB
+- 📋 Details: {MODEL_INFO['description']}"""
     else:
+        status += f"""
+- ⏳ No model loaded yet
+- 🔄 Will auto-load on first generation"""
+    status += f"""
+**💡 H200 MIG Advantages:**
+- 69.5GB dedicated memory
+- Isolated GPU partition
+- Consistent performance
+- No interference from other workloads"""
+    return status
+def force_reload():
+    """Force model reload"""
     global MODEL, MODEL_INFO
     MODEL = None
     MODEL_INFO = None
+    torch.cuda.empty_cache()
+    gc.collect()
+    success = load_working_model()
+    logs = "\n".join(LOADING_LOGS[-5:])  # Last 5 entries
+    return f"🔄 **Force Reload Result:** {'✅ Success' if success else '❌ Failed'}\n\nRecent logs:\n{logs}"
+# Create H200 MIG optimized interface
+with gr.Blocks(title="H200 MIG Video Generator", theme=gr.themes.Glass()) as demo:
     gr.Markdown("""
+    # 🚀 H200 MIG Video Generator
+    **NVIDIA H200 MIG 3g.71gb** • **69.5GB Memory** • **Working Models**
     """)
+    # Status bar
+    with gr.Row():
+        gr.Markdown("""
+        <div style="background: linear-gradient(45deg, #FF6B6B, #4ECDC4); padding: 10px; border-radius: 10px; text-align: center; color: white; font-weight: bold;">
+        🔥 H200 MIG ACTIVE - OPTIMIZED FOR YOUR SETUP 🔥
+        </div>
+        """)
     with gr.Tab("🎥 Generate Video"):
         with gr.Row():
             with gr.Column(scale=1):
                 prompt_input = gr.Textbox(
                     label="📝 Video Prompt",
+                    placeholder="A majestic eagle soaring through mountain peaks at golden hour, cinematic shot with dramatic lighting...",
+                    lines=4
                 )
                 negative_prompt_input = gr.Textbox(
                     label="🚫 Negative Prompt",
+                    placeholder="blurry, low quality, distorted, pixelated, static...",
                     lines=2
                 )
+                with gr.Accordion("⚙️ H200 MIG Settings", open=True):
+                    with gr.Row():
+                        num_frames = gr.Slider(8, 50, value=25, step=1, label="🎬 Frames")
+                        num_steps = gr.Slider(15, 50, value=25, step=1, label="⚙️ Steps")
+                    with gr.Row():
+                        guidance_scale = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="🎯 Guidance")
+                        seed = gr.Number(value=-1, precision=0, label="🎲 Seed")
+                generate_btn = gr.Button("🚀 Generate on H200 MIG", variant="primary", size="lg")
+                gr.Markdown("""
+                **⏱️ Generation:** 1-3 minutes on H200 MIG
+                **💡 Auto-detects:** Best working model for your setup
+                """)
             with gr.Column(scale=1):
+                video_output = gr.Video(label="🎥 H200 MIG Generated Video", height=400)
+                result_text = gr.Textbox(label="📋 Generation Report", lines=10, show_copy_button=True)
         generate_btn.click(
             fn=generate_video,
             outputs=[video_output, result_text]
         )
+        # H200 MIG optimized examples
         gr.Examples(
             examples=[
+                [
+                    "A majestic golden eagle soaring through misty mountain peaks at sunrise",
+                    "blurry, low quality, static",
+                    25, 25, 7.5, 42
+                ],
+                [
+                    "Ocean waves crashing against rocks during sunset, cinematic view",
+                    "pixelated, distorted, watermark",
+                    30, 30, 8.0, 123
+                ],
+                [
+                    "A peaceful cat sleeping in a sunny garden with flowers",
+                    "dark, gloomy, low quality",
+                    20, 20, 7.0, 456
+                ],
+                [
+                    "Time-lapse of clouds moving over a mountain landscape",
+                    "static, boring, blurry",
+                    35, 35, 7.5, 789
+                ]
             ],
             inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed]
         )
+    with gr.Tab("🔧 H200 MIG Status"):
         with gr.Row():
+            status_btn = gr.Button("🔍 Check H200 Status", variant="secondary")
+            logs_btn = gr.Button("📋 View Loading Logs", variant="secondary")
+            reload_btn = gr.Button("🔄 Force Reload", variant="secondary")
+        status_output = gr.Markdown()
+        logs_output = gr.Textbox(label="Detailed Loading Logs", lines=15, show_copy_button=True)
+        reload_output = gr.Markdown()
+        status_btn.click(fn=get_h200_status, outputs=status_output)
+        logs_btn.click(fn=get_loading_logs, outputs=logs_output)
+        reload_btn.click(fn=force_reload, outputs=reload_output)
+        # Auto-load status
+        demo.load(fn=get_h200_status, outputs=status_output)
 if __name__ == "__main__":
+    demo.queue(max_size=3)
     demo.launch(
         share=False,
         server_name="0.0.0.0",