SkyReels_B

Paused

App Files Files Community

1inkusFace commited on Mar 6

Commit

eca6bdb

verified ·

1 Parent(s): 7e82a5e

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -74

app.py CHANGED Viewed

@@ -1,19 +1,20 @@
 import spaces
 import gradio as gr
-import argparse
 import sys
 import time
 import os
 import random
-#sys.path.append("..")
 from skyreelsinfer import TaskType
 from skyreelsinfer.offload import OffloadConfig
 from skyreelsinfer.skyreels_video_infer import SkyReelsVideoInfer
 from diffusers.utils import export_to_video
 from diffusers.utils import load_image
-import torch
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -24,44 +25,57 @@ torch.backends.cudnn.benchmark = False
 torch.set_float32_matmul_precision("highest")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-predictor = None
-task_type = None
-def get_transformer_model_id(task_type:str) -> str:
     return "Skywork/SkyReels-V1-Hunyuan-I2V" if task_type == "i2v" else "Skywork/SkyReels-V1-Hunyuan-T2V"
-@spaces.GPU()
-def init_predictor(task_type:str, gpu_num:int=1):
     global predictor
-    predictor = SkyReelsVideoInfer(
-        task_type= TaskType.I2V if task_type == "i2v" else TaskType.T2V,
-        model_id=get_transformer_model_id(task_type),
-        quant_model=True,
-        world_size=gpu_num,
-        is_offload=True,
-        offload_config=OffloadConfig(
-            high_cpu_memory=True,
-            parameters_level=True,
-            compiler_transformer=False,
         )
-    )
 @spaces.GPU(duration=90)
-def generate_video(prompt, seed, image=None):
-    global task_type
-    print(f"image:{type(image)}")
     if seed == -1:
         random.seed(time.time())
         seed = int(random.randrange(4294967294))
     kwargs = {
         "prompt": prompt,
-        "height": 512,
-        "width": 512,
-        "num_frames": 97,
-        "num_inference_steps": 30,
-        "seed": seed,
         "guidance_scale": 6.0,
         "embedded_guidance_scale": 1.0,
         "negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
@@ -69,57 +83,62 @@ def generate_video(prompt, seed, image=None):
     }
     if task_type == "i2v":
-        assert image is not None, "please input image"
-        kwargs["image"] = load_image(image=image)
-    global predictor
-    output = predictor.inference(kwargs)
-    save_dir = f"./result/{task_type}"
-    os.makedirs(save_dir, exist_ok=True)
-    video_out_file = f"{save_dir}/{prompt[:100].replace('/','')}_{seed}.mp4"
-    print(f"generate video, local path: {video_out_file}")
-    export_to_video(output, video_out_file, fps=24)
-    return video_out_file, kwargs
-def create_gradio_interface(task_type):
-    """Create a Gradio interface based on the task type."""
-    if task_type == "i2v":
-        with gr.Blocks() as demo:
-            with gr.Row():
-                image = gr.Image(label="Upload Image", type="filepath")
-                prompt = gr.Textbox(label="Input Prompt")
-                seed = gr.Number(label="Random Seed", value=-1)
-            submit_button = gr.Button("Generate Video")
-            output_video = gr.Video(label="Generated Video")
-            output_params = gr.Textbox(label="Output Parameters")
-            # Submit button logic
-            submit_button.click(
-                fn=generate_video,
-                inputs=[prompt, seed, image],
-                outputs=[output_video, output_params],
-            )
-    elif task_type == "t2v":
-        with gr.Blocks() as demo:
-            with gr.Row():
-                prompt = gr.Textbox(label="Input Prompt")
-                seed = gr.Number(label="Random Seed", value=-1)
             submit_button = gr.Button("Generate Video")
             output_video = gr.Video(label="Generated Video")
             output_params = gr.Textbox(label="Output Parameters")
-            # Submit button logic
-            submit_button.click(
-                fn=generate_video,
-                inputs=[prompt, seed],
-                outputs=[output_video, output_params],  # Pass task_type as additional input
-            )
-    return demo
-if __name__ == "__main__":
-    # Parse command-line arguments
-    init_predictor(task_type="i2v", gpu_num=1)
-    demo = create_gradio_interface("i2v")
-    demo.launch()

 import spaces
 import gradio as gr
+import argparse  # Keep argparse, but we'll modify its use
 import sys
 import time
 import os
 import random
+# VERY IMPORTANT: Add the SkyReels-V1 root directory to the Python path
+# Assuming your app.py is in the root of your cloned/forked repo.
+sys.path.append(".")  # Correct path for Hugging Face Space
 from skyreelsinfer import TaskType
 from skyreelsinfer.offload import OffloadConfig
 from skyreelsinfer.skyreels_video_infer import SkyReelsVideoInfer
 from diffusers.utils import export_to_video
 from diffusers.utils import load_image
+import torch # Import Torch
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 torch.set_float32_matmul_precision("highest")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# --- Model Loading (CRITICAL CHANGES) ---
+predictor = None  # Global predictor, BUT loaded inside a function
+def get_transformer_model_id(task_type: str) -> str:
     return "Skywork/SkyReels-V1-Hunyuan-I2V" if task_type == "i2v" else "Skywork/SkyReels-V1-Hunyuan-T2V"
+@spaces.GPU(duration=90)
+def init_predictor(task_type: str):
     global predictor
+    try:
+        predictor = SkyReelsVideoInfer(
+            task_type=TaskType.I2V if task_type == "i2v" else TaskType.T2V,
+            model_id=get_transformer_model_id(task_type),
+            quant_model=True,  # Keep quantization for smaller model size
+            world_size=1,  # VERY IMPORTANT: Set world_size to 1 for CPU
+            is_offload=True,  # Keep offload for CPU
+            offload_config=OffloadConfig(
+                high_cpu_memory=True,
+                parameters_level=True,
+                compiler_transformer=False,  # Consider setting to True if compatible
+            )
         )
+        # Explicitly move the predictor to CPU (CRUCIAL)
+        if hasattr(predictor, 'pipe') and hasattr(predictor.pipe, 'to'): #check to make sure the predictor has a pipe and to() method
+            predictor.pipe.to("cpu")
+        return "Model loaded successfully!"
+    except Exception as e:
+        return f"Error loading model: {e}"
 @spaces.GPU(duration=90)
+def generate_video(prompt, seed, image=None, task_type=None):
+    global predictor
+    # Input Type Validation
+    if task_type == "i2v" and not isinstance(image, str):
+        return "Error: For i2v, please provide a valid image file path.", "{}"
+    if not isinstance(prompt, str) or not isinstance(seed, (int, float)):
+        return "Error: Invalid input types for prompt or seed.", "{}"
     if seed == -1:
         random.seed(time.time())
         seed = int(random.randrange(4294967294))
     kwargs = {
         "prompt": prompt,
+        "height": 512,  # Consider reducing for faster processing on CPU
+        "width": 512,  # Consider reducing for faster processing on CPU
+        "num_frames": 97,  # Consider reducing for faster processing on CPU
+        "num_inference_steps": 30,  # Consider reducing for faster processing
+        "seed": int(seed), #make sure seed is int
         "guidance_scale": 6.0,
         "embedded_guidance_scale": 1.0,
         "negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
     }
     if task_type == "i2v":
+        if image is None or not os.path.exists(image):
+            return "Error: Image not provided or not found.", "{}"
+        try:
+            kwargs["image"] = load_image(image=image)
+        except Exception as e:
+          return f"Error loading image: {e}", "{}"
+    try:
+        #Ensure Predictor is Loaded
+        if predictor is None:
+            return "Error: Model not initialized. Please reload the Space.", "{}"
+        output = predictor.inference(kwargs)
+        save_dir = f"./result/{task_type}"
+        os.makedirs(save_dir, exist_ok=True)
+        video_out_file = f"{save_dir}/{prompt[:100].replace('/','')}_{int(seed)}.mp4"  # Ensure seed is an integer
+        print(f"Generating video, local path: {video_out_file}")
+        export_to_video(output, video_out_file, fps=24)
+        return video_out_file, str(kwargs)  # Return kwargs as a string
+    except Exception as e:
+        return f"Error during video generation: {e}", "{}"
+# --- Gradio Interface ---
+# We'll define a single interface that handles BOTH i2v and t2v
+with gr.Blocks() as demo:
+    with gr.Row():
+        task_type_dropdown = gr.Dropdown(
+            choices=["i2v", "t2v"], label="Task Type", value="t2v"
+        )  # Default to t2v
+        load_model_button = gr.Button("Load Model")
+        model_status = gr.Textbox(label="Model Status")
+    with gr.Row():
+        with gr.Column():  # Use Columns for better layout
+            prompt = gr.Textbox(label="Input Prompt")
+            seed = gr.Number(label="Random Seed", value=-1)
+            image = gr.Image(label="Upload Image (for i2v)", type="filepath")
             submit_button = gr.Button("Generate Video")
+        with gr.Column():
             output_video = gr.Video(label="Generated Video")
             output_params = gr.Textbox(label="Output Parameters")
+    # Load Model Button Logic
+    load_model_button.click(
+        fn=init_predictor,
+        inputs=[task_type_dropdown],
+        outputs=[model_status]
+    )
+    # Submit Button Logic (Handles both i2v and t2v)
+    submit_button.click(
+        fn=generate_video,
+        inputs=[prompt, seed, image, task_type_dropdown],  # Include task_type
+        outputs=[output_video, output_params],
+    )
+# --- Launch the App ---
+# No need for argparse in app.py for Hugging Face Spaces
+# demo.launch() # Don't use demo.launch() inside HuggingFace Spaces.