CRM

Runtime error

App Files Files Community

YoussefAnso commited on Jun 7

Commit

1452c34

1 Parent(s): b948a73

fixed api naming

Browse files

Files changed (1) hide show

app.py +179 -153

app.py CHANGED Viewed

@@ -20,7 +20,66 @@ import argparse
 from model import CRM
 from inference import generate3d
 pipeline = None
 rembg_session = rembg.new_session()
@@ -36,9 +95,10 @@ def expand_to_square(image, bg_color=(0, 0, 0, 0)):
     return new_image
 def check_input_image(input_image):
     if input_image is None:
         raise gr.Error("No image uploaded!")
 def remove_background(
     image: PIL.Image.Image,
@@ -78,100 +138,63 @@ def add_background(image, bg_color=(255, 255, 255)):
     background = Image.new("RGBA", image.size, bg_color)
     return Image.alpha_composite(background, image)
-def preprocess_image(image, background_choice, foreground_ratio, backgroud_color):
-    """
-    input image is a pil image in RGBA, return RGB image
-    """
-    print(background_choice)
-    if background_choice == "Alpha as mask":
-        background = Image.new("RGBA", image.size, (0, 0, 0, 0))
-        image = Image.alpha_composite(background, image)
-    else:
-        image = remove_background(image, rembg_session, force=True)
-    image = do_resize_content(image, foreground_ratio)
-    image = expand_to_square(image)
-    image = add_background(image, backgroud_color)
-    return image.convert("RGB")
-@spaces.GPU
-def gen_image(input_image, seed, scale, step):
-    global pipeline, model, args
-    pipeline.set_seed(seed)
-    rt_dict = pipeline(input_image, scale=scale, step=step)
-    stage1_images = rt_dict["stage1_images"]
-    stage2_images = rt_dict["stage2_images"]
-    np_imgs = np.concatenate(stage1_images, 1)
-    np_xyzs = np.concatenate(stage2_images, 1)
-    glb_path = generate3d(model, np_imgs, np_xyzs, args.device)
-    return Image.fromarray(np_imgs), Image.fromarray(np_xyzs), glb_path#, obj_path
-def process_and_generate(input_image, background_choice, foreground_ratio, backgroud_color, seed, scale, step):
-    """Process the input image and generate the 3D model in a single function"""
-    if input_image is None:
-        raise gr.Error("No image uploaded!")
-    # Preprocess the image
-    processed = preprocess_image(input_image, background_choice, foreground_ratio, backgroud_color)
-    # Generate the 3D model
-    pipeline.set_seed(seed)
-    rt_dict = pipeline(processed, scale=scale, step=step)
-    stage1_images = rt_dict["stage1_images"]
-    stage2_images = rt_dict["stage2_images"]
-    np_imgs = np.concatenate(stage1_images, 1)
-    np_xyzs = np.concatenate(stage2_images, 1)
-    glb_path = generate3d(model, np_imgs, np_xyzs, args.device)
-    return Image.fromarray(np_imgs), Image.fromarray(np_xyzs), glb_path
-# Model initialization code
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "--stage1_config",
-    type=str,
-    default="configs/nf7_v3_SNR_rd_size_stroke.yaml",
-    help="config for stage1",
-)
-parser.add_argument(
-    "--stage2_config",
-    type=str,
-    default="configs/stage2-v2-snr.yaml",
-    help="config for stage2",
-)
-parser.add_argument("--device", type=str, default="cuda")
-args = parser.parse_args()
-crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
-specs = json.load(open("configs/specs_objaverse_total.json"))
-model = CRM(specs)
-model.load_state_dict(torch.load(crm_path, map_location="cpu"), strict=False)
-model = model.to(args.device)
-stage1_config = OmegaConf.load(args.stage1_config).config
-stage2_config = OmegaConf.load(args.stage2_config).config
-stage2_sampler_config = stage2_config.sampler
-stage1_sampler_config = stage1_config.sampler
-stage1_model_config = stage1_config.models
-stage2_model_config = stage2_config.models
-xyz_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="ccm-diffusion.pth")
-pixel_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth")
-stage1_model_config.resume = pixel_path
-stage2_model_config.resume = xyz_path
-pipeline = TwoStagePipeline(
-    stage1_model_config,
-    stage2_model_config,
-    stage1_sampler_config,
-    stage2_sampler_config,
-    device=args.device,
-    dtype=torch.float32
-)
 _DESCRIPTION = '''
 * Our [official implementation](https://github.com/thu-ml/CRM) uses UV texture instead of vertex color. It has better texture than this online demo.
@@ -179,72 +202,75 @@ _DESCRIPTION = '''
 * If you find the output unsatisfying, try using different seeds:)
 '''
-# Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("# CRM: Single Image to 3D Textured Mesh with Convolutional Reconstruction Model")
     gr.Markdown(_DESCRIPTION)
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(
-                label="Image input",
-                type="pil",
-                image_mode="RGBA",
-                sources=["upload"]
             )
-            with gr.Row():
-                background_choice = gr.Radio(
-                    choices=["Alpha as mask", "Auto Remove background"],
-                    value="Auto Remove background",
-                    label="Background choice"
-                )
-            with gr.Row():
-                seed = gr.Number(value=1234, label="Seed", precision=0)
-                guidance_scale = gr.Number(value=5.5, minimum=3, maximum=10, label="Guidance scale")
-                step = gr.Number(value=30, minimum=30, maximum=100, label="Sample steps", precision=0)
-            generate_btn = gr.Button("Generate 3D shape")
-            gr.Examples(
-                examples=[os.path.join("examples", i) for i in os.listdir("examples")],
-                inputs=[image_input],
-                examples_per_page=20
             )
-        with gr.Column():
-            image_output = gr.Image(label="Output RGB image", type="pil")
-            xyz_output = gr.Image(label="Output CCM image", type="pil")
-            output_model = gr.Model3D(label="Output 3D Model")
-            gr.Markdown("Note: Ensure that the input image is correctly pre-processed into a grey background, otherwise the results will be unpredictable.")
-    def process_and_generate_simple(image, seed, scale, step):
-        if image is None:
-            raise gr.Error("No image uploaded!")
-        # Use default values for background processing
-        processed = preprocess_image(image, "Auto Remove background", 1.0, "#7F7F7F")
-        # Generate the 3D model
-        pipeline.set_seed(seed)
-        rt_dict = pipeline(processed, scale=scale, step=step)
-        stage1_images = rt_dict["stage1_images"]
-        stage2_images = rt_dict["stage2_images"]
-        np_imgs = np.concatenate(stage1_images, 1)
-        np_xyzs = np.concatenate(stage2_images, 1)
-        glb_path = generate3d(model, np_imgs, np_xyzs, args.device)
-        return Image.fromarray(np_imgs), Image.fromarray(np_xyzs), glb_path
     generate_btn.click(
-        fn=process_and_generate_simple,
-        inputs=[image_input, seed, guidance_scale, step],
-        outputs=[image_output, xyz_output, output_model]
     )
-    # For Hugging Face Spaces, use minimal configuration
-    demo.queue().launch(
-        show_error=True  # Only keep error display for debugging
-    )

 from model import CRM
 from inference import generate3d
+# Move model initialization into a function that will be called by workers
+def init_model():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--stage1_config",
+        type=str,
+        default="configs/nf7_v3_SNR_rd_size_stroke.yaml",
+        help="config for stage1",
+    )
+    parser.add_argument(
+        "--stage2_config",
+        type=str,
+        default="configs/stage2-v2-snr.yaml",
+        help="config for stage2",
+    )
+    parser.add_argument("--device", type=str, default="cuda")
+    args = parser.parse_args()
+    # Download model files
+    crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
+    specs = json.load(open("configs/specs_objaverse_total.json"))
+    model = CRM(specs)
+    model.load_state_dict(torch.load(crm_path, map_location="cpu"), strict=False)
+    model = model.to(args.device)
+    # Load configs
+    stage1_config = OmegaConf.load(args.stage1_config).config
+    stage2_config = OmegaConf.load(args.stage2_config).config
+    stage2_sampler_config = stage2_config.sampler
+    stage1_sampler_config = stage1_config.sampler
+    stage1_model_config = stage1_config.models
+    stage2_model_config = stage2_config.models
+    xyz_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="ccm-diffusion.pth")
+    pixel_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth")
+    stage1_model_config.resume = pixel_path
+    stage2_model_config.resume = xyz_path
+    pipeline = TwoStagePipeline(
+        stage1_model_config,
+        stage2_model_config,
+        stage1_sampler_config,
+        stage2_sampler_config,
+        device=args.device,
+        dtype=torch.float32
+    )
+    return model, pipeline
+# Global variables to store model and pipeline
+model = None
 pipeline = None
+def get_model():
+    global model, pipeline
+    if model is None or pipeline is None:
+        model, pipeline = init_model()
+    return model, pipeline
 rembg_session = rembg.new_session()
     return new_image
 def check_input_image(input_image):
+    """Check if the input image is valid"""
     if input_image is None:
         raise gr.Error("No image uploaded!")
+    return input_image
 def remove_background(
     image: PIL.Image.Image,
     background = Image.new("RGBA", image.size, bg_color)
     return Image.alpha_composite(background, image)
+def add_random_background(image, color):
+    # Add a random background to the image
+    width, height = image.size
+    background = Image.new("RGBA", image.size, color)
+    return Image.alpha_composite(background, image)
+def preprocess_image(input_image, background_choice, foreground_ratio, back_groud_color):
+    """Preprocess the input image"""
+    try:
+        # Get model and pipeline when needed
+        model, pipeline = get_model()
+        # Convert to numpy array
+        np_image = np.array(input_image)
+        # Process background
+        if background_choice == "Remove Background":
+            np_image = rembg.remove(np_image, session=rembg_session)
+        elif background_choice == "Custom Background":
+            np_image = add_random_background(np_image, back_groud_color)
+        # Resize content if needed
+        if foreground_ratio != 1.0:
+            np_image = do_resize_content(Image.fromarray(np_image), foreground_ratio)
+            np_image = np.array(np_image)
+        return Image.fromarray(np_image)
+    except Exception as e:
+        print(f"Error in preprocess_image: {str(e)}")
+        raise e
+def gen_image(processed_image, seed, scale, step):
+    """Generate the 3D model"""
+    try:
+        # Get model and pipeline when needed
+        model, pipeline = get_model()
+        # Convert to numpy array
+        np_image = np.array(processed_image)
+        # Set random seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        # Generate images
+        np_imgs, np_xyzs = pipeline.generate(
+            np_image,
+            guidance_scale=scale,
+            num_inference_steps=step
+        )
+        # Generate 3D model
+        glb_path = generate3d(model, np_imgs, np_xyzs, args.device)
+        return Image.fromarray(np_imgs), Image.fromarray(np_xyzs), glb_path
+    except Exception as e:
+        print(f"Error in gen_image: {str(e)}")
+        raise e
 _DESCRIPTION = '''
 * Our [official implementation](https://github.com/thu-ml/CRM) uses UV texture instead of vertex color. It has better texture than this online demo.
 * If you find the output unsatisfying, try using different seeds:)
 '''
+# Create Gradio interface
+with gr.Blocks(title="CRM: 3D Character Generation from Single Image") as demo:
     gr.Markdown(_DESCRIPTION)
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(label="Input Image", type="pil")
+            background_choice = gr.Radio(
+                choices=["Remove Background", "Custom Background"],
+                value="Remove Background",
+                label="Background Option"
             )
+            foreground_ratio = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=1.0,
+                step=0.1,
+                label="Foreground Ratio"
             )
+            back_groud_color = gr.ColorPicker(
+                label="Background Color",
+                value="#FFFFFF"
+            )
+            seed = gr.Number(
+                label="Seed",
+                value=42,
+                precision=0
+            )
+            scale = gr.Slider(
+                minimum=1.0,
+                maximum=20.0,
+                value=7.5,
+                step=0.1,
+                label="Guidance Scale"
+            )
+            step = gr.Slider(
+                minimum=1,
+                maximum=100,
+                value=50,
+                step=1,
+                label="Steps"
+            )
+            generate_btn = gr.Button("Generate 3D Model")
+        with gr.Column():
+            processed_image = gr.Image(label="Processed Image", type="pil")
+            output_image = gr.Image(label="Generated Image", type="pil")
+            output_xyz = gr.Image(label="Generated XYZ", type="pil")
+            output_glb = gr.Model3D(label="Generated 3D Model")
+    # Connect the functions with explicit API names
     generate_btn.click(
+        fn=check_input_image,
+        inputs=[input_image],
+        outputs=[input_image],
+        api_name="check_input_image"
+    ).success(
+        fn=preprocess_image,
+        inputs=[input_image, background_choice, foreground_ratio, back_groud_color],
+        outputs=[processed_image],
+        api_name="preprocess_image"
+    ).success(
+        fn=gen_image,
+        inputs=[processed_image, seed, scale, step],
+        outputs=[output_image, output_xyz, output_glb],
+        api_name="gen_image"
     )
+# For Hugging Face Spaces, use minimal configuration
+demo.queue().launch(
+    show_error=True  # Only keep error display for debugging
+)