CRM

Runtime error

App Files Files Community

YoussefAnso commited on 21 days ago

Commit

9d0b3b4

1 Parent(s): 2053232

Refactor background removal process in app.py to utilize rembg library, enhancing performance and simplifying the code. Update device handling to allow dynamic selection between CPU and CUDA, improving compatibility across different hardware configurations. Modify output format from OBJ to GLB for better integration with Gradio display.

Browse files

Files changed (2) hide show

app.py +22 -62
inference.py +35 -126

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import spaces
 import argparse
 import numpy as np
@@ -9,23 +10,18 @@ import PIL
 from pipelines import TwoStagePipeline
 from huggingface_hub import hf_hub_download
 import os
 from typing import Any
 import json
 import os
 import json
 import argparse
-import requests
-import tempfile
 from model import CRM
 from inference import generate3d
-from dis_bg_remover import remove_background as dis_remove_background
-# Configurable ONNX model path (can be set via environment variable)
-DIS_ONNX_MODEL_PATH = os.environ.get("DIS_ONNX_MODEL_PATH", "isnet_dis.onnx")
-DIS_ONNX_MODEL_URL = "https://huggingface.co/stoned0651/isnet_dis.onnx/resolve/main/isnet_dis.onnx"
 pipeline = None
 def expand_to_square(image, bg_color=(0, 0, 0, 0)):
@@ -44,49 +40,23 @@ def check_input_image(input_image):
         raise gr.Error("No image uploaded!")
-def ensure_dis_onnx_model():
-    if not os.path.exists(DIS_ONNX_MODEL_PATH):
-        try:
-            print(f"Model file not found at {DIS_ONNX_MODEL_PATH}. Downloading from {DIS_ONNX_MODEL_URL}...")
-            response = requests.get(DIS_ONNX_MODEL_URL, stream=True)
-            response.raise_for_status()
-            with open(DIS_ONNX_MODEL_PATH, "wb") as f:
-                for chunk in response.iter_content(chunk_size=8192):
-                    if chunk:
-                        f.write(chunk)
-            print(f"Downloaded model to {DIS_ONNX_MODEL_PATH}")
-        except Exception as e:
-            raise gr.Error(
-                f"Failed to download DIS background remover model file: {e}\n"
-                f"Please manually download it from {DIS_ONNX_MODEL_URL} and place it in the project directory or set the DIS_ONNX_MODEL_PATH environment variable."
-            )
 def remove_background(
     image: PIL.Image.Image,
     rembg_session: Any = None,
     force: bool = False,
     **rembg_kwargs,
 ) -> PIL.Image.Image:
-    ensure_dis_onnx_model()
-    with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as temp:
-        image.save(temp.name)
-        extracted_img, mask = dis_remove_background(DIS_ONNX_MODEL_PATH, temp.name)
-    # If extracted_img is a mask (single channel), use it as alpha for the original image
-    if isinstance(extracted_img, np.ndarray):
-        # If mask is float, convert to uint8
-        if mask.dtype != np.uint8:
-            mask = (np.clip(mask, 0, 1) * 255).astype(np.uint8)
-        # Ensure mask is 2D
-        if mask.ndim == 3:
-            mask = mask[..., 0]
-        # Convert original image to RGBA
-        image = image.convert("RGBA")
-        image_np = np.array(image)
-        image_np[..., 3] = mask
-        return Image.fromarray(image_np)
-    # If extracted_img is already a color image, just return it
-    return extracted_img
 def do_resize_content(original_image: Image, scale_rate):
     # resize image content wile retain the original image size
@@ -118,9 +88,7 @@ def preprocess_image(image, background_choice, foreground_ratio, backgroud_color
         background = Image.new("RGBA", image.size, (0, 0, 0, 0))
         image = Image.alpha_composite(background, image)
     else:
-        image = remove_background(image, force=True)
-        if image is None:
-            raise gr.Error("Background removal failed. Please check the input image and ensure the model file exists and is valid.")
     image = do_resize_content(image, foreground_ratio)
     image = expand_to_square(image)
     image = add_background(image, backgroud_color)
@@ -154,20 +122,14 @@ parser.add_argument(
     help="config for stage2",
 )
-# Force CPU usage
-parser.add_argument("--device", type=str, default="cpu")
 args = parser.parse_args()
-if not torch.cuda.is_available():
-    raise RuntimeError("CUDA is not available! Please check your GPU and CUDA installation.")
-device = torch.device("cuda")
 crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
 specs = json.load(open("configs/specs_objaverse_total.json"))
 model = CRM(specs)
-model.load_state_dict(torch.load(crm_path, map_location="cuda"), strict=False)
-model = model.to("cuda")
 stage1_config = OmegaConf.load(args.stage1_config).config
 stage2_config = OmegaConf.load(args.stage2_config).config
@@ -187,7 +149,7 @@ pipeline = TwoStagePipeline(
     stage2_model_config,
     stage1_sampler_config,
     stage2_sampler_config,
-    device="cuda",
     dtype=torch.float32
 )
@@ -243,10 +205,8 @@ with gr.Blocks() as demo:
             image_output = gr.Image(interactive=False, label="Output RGB image")
             xyz_ouput = gr.Image(interactive=False, label="Output CCM image")
-            output_model = gr.Model3D(
-                label="Output OBJ",
-                interactive=False,
-            )
             gr.Markdown("Note: Ensure that the input image is correctly pre-processed into a grey background, otherwise the results will be unpredictable.")
     inputs = [
@@ -272,4 +232,4 @@ with gr.Blocks() as demo:
         inputs=inputs,
         outputs=outputs,
     )
-    demo.queue().launch()

+# Not ready to use yet
 import spaces
 import argparse
 import numpy as np
 from pipelines import TwoStagePipeline
 from huggingface_hub import hf_hub_download
 import os
+import rembg
 from typing import Any
 import json
 import os
 import json
 import argparse
 from model import CRM
 from inference import generate3d
 pipeline = None
+rembg_session = rembg.new_session()
 def expand_to_square(image, bg_color=(0, 0, 0, 0)):
         raise gr.Error("No image uploaded!")
 def remove_background(
     image: PIL.Image.Image,
     rembg_session: Any = None,
     force: bool = False,
     **rembg_kwargs,
 ) -> PIL.Image.Image:
+    do_remove = True
+    if image.mode == "RGBA" and image.getextrema()[3][0] < 255:
+        # explain why current do not rm bg
+        print("alhpa channl not enpty, skip remove background, using alpha channel as mask")
+        background = Image.new("RGBA", image.size, (0, 0, 0, 0))
+        image = Image.alpha_composite(background, image)
+        do_remove = False
+    do_remove = do_remove or force
+    if do_remove:
+        image = rembg.remove(image, session=rembg_session, **rembg_kwargs)
+    return image
 def do_resize_content(original_image: Image, scale_rate):
     # resize image content wile retain the original image size
         background = Image.new("RGBA", image.size, (0, 0, 0, 0))
         image = Image.alpha_composite(background, image)
     else:
+        image = remove_background(image, rembg_session, force=True)
     image = do_resize_content(image, foreground_ratio)
     image = expand_to_square(image)
     image = add_background(image, backgroud_color)
     help="config for stage2",
 )
+parser.add_argument("--device", type=str, default="cuda")
 args = parser.parse_args()
 crm_path = hf_hub_download(repo_id="Zhengyi/CRM", filename="CRM.pth")
 specs = json.load(open("configs/specs_objaverse_total.json"))
 model = CRM(specs)
+model.load_state_dict(torch.load(crm_path, map_location="cpu"), strict=False)
+model = model.to(args.device)
 stage1_config = OmegaConf.load(args.stage1_config).config
 stage2_config = OmegaConf.load(args.stage2_config).config
     stage2_model_config,
     stage1_sampler_config,
     stage2_sampler_config,
+    device=args.device,
     dtype=torch.float32
 )
             image_output = gr.Image(interactive=False, label="Output RGB image")
             xyz_ouput = gr.Image(interactive=False, label="Output CCM image")
+            output_model = gr.Model3D(label="Output GLB", clear_color=[1, 1, 1, 0])
             gr.Markdown("Note: Ensure that the input image is correctly pre-processed into a grey background, otherwise the results will be unpredictable.")
     inputs = [
         inputs=inputs,
         outputs=outputs,
     )
+    demo.queue().launch()

inference.py CHANGED Viewed

@@ -1,130 +1,39 @@
-import numpy as np
 import torch
-import time
-import nvdiffrast.torch as dr
-from util.utils import get_tri
-import tempfile
 from mesh import Mesh
-import zipfile
-from util.renderer import Renderer
-import trimesh
-import xatlas
-import cv2
-from PIL import Image, ImageFilter
-def vertex_color_to_uv_textured_glb(obj_path, glb_path, texture_size=512):
-    mesh = trimesh.load(obj_path, process=False)
-    vertex_colors = mesh.visual.vertex_colors[:, :3]  # (N, 3), uint8
-    # Generate UVs
-    vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
-    vertices = mesh.vertices[vmapping]
-    vertex_colors = vertex_colors[vmapping]
-    mesh.vertices = vertices
-    mesh.faces = indices
-    # Bake texture (hybrid: per-pixel barycentric for accuracy)
-    buffer_size = texture_size * 2
-    texture_buffer = np.zeros((buffer_size, buffer_size, 4), dtype=np.uint8)
-    face_uvs = uvs[mesh.faces]
-    face_colors = vertex_colors[mesh.faces]
-    min_xy = np.floor(np.min(face_uvs, axis=1) * (buffer_size - 1)).astype(int)
-    max_xy = np.ceil(np.max(face_uvs, axis=1) * (buffer_size - 1)).astype(int)
-    for i in range(len(mesh.faces)):
-        uv0, uv1, uv2 = face_uvs[i]
-        c0, c1, c2 = face_colors[i]
-        min_x, min_y = min_xy[i]
-        max_x, max_y = max_xy[i]
-        for y in range(min_y, max_y + 1):
-            for x in range(min_x, max_x + 1):
-                p = np.array([x + 0.5, y + 0.5]) / (buffer_size - 1)
-                # Barycentric coordinates
-                v0, v1, v2 = uv0, uv1, uv2
-                denom = (v1[1] - v2[1]) * (v0[0] - v2[0]) + (v2[0] - v1[0]) * (v0[1] - v2[1])
-                if denom == 0:
-                    continue
-                u = ((v1[1] - v2[1]) * (p[0] - v2[0]) + (v2[0] - v1[0]) * (p[1] - v2[1])) / denom
-                v = ((v2[1] - v0[1]) * (p[0] - v2[0]) + (v0[0] - v2[0]) * (p[1] - v2[1])) / denom
-                w = 1 - u - v
-                if (u >= 0) and (v >= 0) and (w >= 0):
-                    color = u * c0 + v * c1 + w * c2
-                    texture_buffer[y, x, :3] = np.clip(color, 0, 255).astype(np.uint8)
-                    texture_buffer[y, x, 3] = 255
-    # Inpainting, filtering, and downsampling (keep optimized)
-    image_bgra = texture_buffer.copy()
-    mask = (image_bgra[:, :, 3] == 0).astype(np.uint8) * 255
-    image_bgr = cv2.cvtColor(image_bgra, cv2.COLOR_BGRA2BGR)
-    inpainted_bgr = cv2.inpaint(image_bgr, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
-    inpainted_bgra = cv2.cvtColor(inpainted_bgr, cv2.COLOR_BGR2BGRA)
-    texture_buffer = inpainted_bgra[::-1]
-    image_texture = Image.fromarray(texture_buffer)
-    image_texture = image_texture.filter(ImageFilter.MedianFilter(size=3))
-    image_texture = image_texture.filter(ImageFilter.GaussianBlur(radius=1))
-    image_texture = image_texture.resize((texture_size, texture_size), Image.LANCZOS)
-    # Assign UVs and texture to mesh
-    material = trimesh.visual.material.PBRMaterial(
-        baseColorFactor=[1.0, 1.0, 1.0, 1.0],
-        baseColorTexture=image_texture,
-        metallicFactor=0.0,
-        roughnessFactor=1.0,
-    )
-    visuals = trimesh.visual.TextureVisuals(uv=uvs, material=material)
-    mesh.visual = visuals
-    mesh.export(glb_path)
-    image_texture.save("debug_texture.png")
-def generate3d(model, rgb, ccm, device=None):
-    device = torch.device("cuda")
-    model.renderer = Renderer(tet_grid_size=model.tet_grid_size, camera_angle_num=model.camera_angle_num,
-                                scale=model.input.scale, geo_type = model.geo_type)
-    color_tri = torch.from_numpy(rgb).to(device)/255
-    xyz_tri = torch.from_numpy(ccm[:,:,(2,1,0)]).to(device)/255
-    color = color_tri.permute(2,0,1)
-    xyz = xyz_tri.permute(2,0,1)
-    def get_imgs(color):
-        color_list = []
-        color_list.append(color[:,:,256*5:256*(1+5)])
-        for i in range(0,5):
-            color_list.append(color[:,:,256*i:256*(1+i)])
-        return torch.stack(color_list, dim=0)
-    triplane_color = get_imgs(color).permute(0,2,3,1).unsqueeze(0).to(device)
-    color = get_imgs(color)
-    xyz = get_imgs(xyz)
-    color = get_tri(color, dim=0, blender= True, scale = 1).unsqueeze(0).to(device)
-    xyz = get_tri(xyz, dim=0, blender= True, scale = 1, fix= True).unsqueeze(0).to(device)
-    triplane = torch.cat([color,xyz],dim=1).to(device)
-    model.eval()
-    if model.denoising == True:
-        tnew = 20
-        tnew = torch.randint(tnew, tnew+1, [triplane.shape[0]], dtype=torch.long, device=triplane.device)
-        noise_new = torch.randn_like(triplane) *0.5+0.5
-        triplane = model.scheduler.add_noise(triplane, noise_new, tnew)
-        start_time = time.time()
-        with torch.no_grad():
-            triplane_feature2 = model.unet2(triplane,tnew)
-        end_time = time.time()
-        elapsed_time = end_time - start_time
-        print(f"unet takes {elapsed_time}s")
-    else:
-        triplane_feature2 = model.unet2(triplane)
-    with torch.no_grad():
-        data_config = {
-            'resolution': [1024, 1024],
-            "triview_color": triplane_color.to(device),
-        }
-        verts, faces = model.decode(data_config, triplane_feature2)
-        data_config['verts'] = verts[0]
-        data_config['faces'] = faces
-    from kiui.mesh_utils import clean_mesh
-    verts, faces = clean_mesh(data_config['verts'].squeeze().cpu().numpy().astype(np.float32), data_config['faces'].squeeze().cpu().numpy().astype(np.int32), repair = False, remesh=True, remesh_size=0.005, remesh_iters=1)
-    data_config['verts'] = torch.from_numpy(verts).to(device).contiguous()
-    data_config['faces'] = torch.from_numpy(faces).to(device).contiguous()
-    start_time = time.time()
-    with torch.no_grad():
-        mesh_path_glb = tempfile.NamedTemporaryFile(suffix=f"", delete=False).name
-        model.export_mesh(data_config, mesh_path_glb, tri_fea_2 = triplane_feature2)
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    print(f"uv takes {elapsed_time}s")
-    obj_path = mesh_path_glb + ".obj"
-    glb_path = mesh_path_glb + ".glb"
-    vertex_color_to_uv_textured_glb(obj_path, glb_path)
-    return glb_path

+import os
 import torch
+import numpy as np
+from PIL import Image
 from mesh import Mesh
+from pipelines.pipeline_text_to_3d import TextTo3D
+# === Load Model (assumes this is done once at startup, not per request) ===
+model = TextTo3D.from_pretrained("./checkpoints/zeroscope_v1_5")
+model.to(torch.device("cpu"))
+model.eval()
+def generate3d(prompt: str, guidance_scale: float = 15.0, steps: int = 50) -> str:
+    # === Set up paths ===
+    output_dir = "outputs"
+    os.makedirs(output_dir, exist_ok=True)
+    base_name = prompt.replace(" ", "_").lower()
+    mesh_path_base = os.path.join(output_dir, base_name)
+    # === Generate 3D Mesh ===
+    mesh = model(prompt, guidance_scale=guidance_scale, steps=steps)
+    obj_path = mesh_path_base + ".obj"
+    mesh.export_mesh_wt_uv(obj_path)
+    # === Convert to GLB with textures ===
+    mesh_loaded = Mesh.load(obj_path, device=torch.device("cpu"))
+    glb_path = mesh_path_base + ".glb"
+    mesh_loaded.write(glb_path)
+    # === Return GLB path for Gradio display ===
+    return glb_path
+if __name__ == "__main__":
+    # Example run
+    prompt = "a modern wooden chair"
+    output_glb = generate3d(prompt)
+    print(f"Generated GLB: {output_glb}")