i3d

Paused

App Files Files Community

rgndgn commited on Feb 17

Commit

a22b6d9

verified ·

1 Parent(s): efccc85

test

Browse files

Files changed (1) hide show

gradio_app.py +43 -84

gradio_app.py CHANGED Viewed

@@ -8,11 +8,11 @@ from PIL import Image
 import gradio as gr
 import trimesh
 from transparent_background import Remover
 import subprocess
 def install_cuda_toolkit():
-    # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
     CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
     CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
     subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
@@ -25,24 +25,22 @@ def install_cuda_toolkit():
         os.environ["CUDA_HOME"],
         "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
     )
-    # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
     os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
 install_cuda_toolkit()
-# Import and setup SPAR3D
 os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
 import spar3d.utils as spar3d_utils
 from spar3d.system import SPAR3D
-# Constants
 COND_WIDTH = 512
 COND_HEIGHT = 512
 COND_DISTANCE = 2.2
 COND_FOVY = 0.591627
 BACKGROUND_COLOR = [0.5, 0.5, 0.5]
-# Initialize models
 device = spar3d_utils.get_device()
 bg_remover = Remover()
 spar3d_model = SPAR3D.from_pretrained(
@@ -51,17 +49,14 @@ spar3d_model = SPAR3D.from_pretrained(
     weight_name="model.safetensors"
 ).eval().to(device)
-# Initialize camera parameters
 c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
 intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
     COND_FOVY, COND_HEIGHT, COND_WIDTH
 )
 def create_rgba_image(rgb_image: Image.Image, mask: np.ndarray = None) -> Image.Image:
-    """Create an RGBA image from RGB image and optional mask."""
     rgba_image = rgb_image.convert('RGBA')
     if mask is not None:
-        # Ensure mask is 2D before converting to alpha
         if len(mask.shape) > 2:
             mask = mask.squeeze()
         alpha = Image.fromarray((mask * 255).astype(np.uint8))
@@ -69,55 +64,37 @@ def create_rgba_image(rgb_image: Image.Image, mask: np.ndarray = None) -> Image.
     return rgba_image
 def create_batch(input_image: Image.Image) -> dict[str, Any]:
-    """Prepare image batch for model input."""
-    # Resize and convert input image to numpy array
     resized_image = input_image.resize((COND_WIDTH, COND_HEIGHT))
     img_array = np.array(resized_image).astype(np.float32) / 255.0
-    # Extract RGB and alpha channels
-    if img_array.shape[-1] == 4:  # RGBA
         rgb = img_array[..., :3]
         mask = img_array[..., 3:4]
-    else:  # RGB
         rgb = img_array
         mask = np.ones((*img_array.shape[:2], 1), dtype=np.float32)
-    # Convert to tensors while keeping channel-last format
-    rgb = torch.from_numpy(rgb).float()  # [H, W, 3]
-    mask = torch.from_numpy(mask).float()  # [H, W, 1]
-    # Create background blend (match channel-last format)
-    bg_tensor = torch.tensor(BACKGROUND_COLOR).view(1, 1, 3)  # [1, 1, 3]
-    # Blend RGB with background using mask (all in channel-last format)
-    rgb_cond = torch.lerp(bg_tensor, rgb, mask)  # [H, W, 3]
-    # Move channels to correct dimension and add batch dimension
-    # Important: For SPAR3D image tokenizer, we need [B, H, W, C] format
-    rgb_cond = rgb_cond.unsqueeze(0)  # [1, H, W, 3]
-    mask = mask.unsqueeze(0)  # [1, H, W, 1]
-    # Create the batch dictionary
     batch = {
-        "rgb_cond": rgb_cond,  # [1, H, W, 3]
-        "mask_cond": mask,  # [1, H, W, 1]
-        "c2w_cond": c2w_cond.unsqueeze(0),  # [1, 4, 4]
-        "intrinsic_cond": intrinsic.unsqueeze(0),  # [1, 3, 3]
-        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),  # [1, 3, 3]
     }
-    for k, v in batch.items():
-        print(f"[debug] {k} final shape:", v.shape)
     return batch
 def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
-    """Process batch through model and generate point cloud."""
     batch_size = batch["rgb_cond"].shape[0]
     assert batch_size == 1, f"Expected batch size 1, got {batch_size}"
-    # Generate point cloud tokens
     try:
         cond_tokens = system.forward_pdiff_cond(batch)
     except Exception as e:
@@ -129,7 +106,6 @@ def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
         print("rgb_cond requires_grad:", batch["rgb_cond"].requires_grad)
         raise
-    # Sample points
     sample_iter = system.sampler.sample_batch_progressive(
         batch_size,
         cond_tokens,
@@ -137,38 +113,23 @@ def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
         device=device
     )
-    # Get final samples
     for x in sample_iter:
         samples = x["xstart"]
     pc_cond = samples.permute(0, 2, 1).float()
-    # Normalize point cloud
     pc_cond = spar3d_utils.normalize_pc_bbox(pc_cond)
-    # Subsample to 512 points
     pc_cond = pc_cond[:, torch.randperm(pc_cond.shape[1])[:512]]
     return pc_cond
 @spaces.GPU
 @torch.inference_mode()
-def generate_and_process_3d(image: Image.Image) -> tuple[str | None, Image.Image | None]:
-    """Generate image from prompt and convert to 3D model."""
-    # Generate random seed
     seed = np.random.randint(0, np.iinfo(np.int32).max)
     try:
         rgb_image = image.convert('RGB')
-        # bg_remover returns a PIL Image already, no need to convert
         no_bg_image = bg_remover.process(rgb_image)
-        print(f"[debug] no_bg_image type: {type(no_bg_image)}, mode: {no_bg_image.mode}")
-        # Convert to RGBA if not already
         rgba_image = no_bg_image.convert('RGBA')
-        print(f"[debug] rgba_image mode: {rgba_image.mode}")
         processed_image = spar3d_utils.foreground_crop(
             rgba_image,
@@ -177,15 +138,8 @@ def generate_and_process_3d(image: Image.Image) -> tuple[str | None, Image.Image
             no_crop=False
         )
-        # Show the processed image alpha channel for debugging
-        alpha = np.array(processed_image)[:, :, 3]
-        print(f"[debug] Alpha channel stats - min: {alpha.min()}, max: {alpha.max()}, unique: {np.unique(alpha)}")
-        # Prepare batch for processing
         batch = create_batch(processed_image)
         batch = {k: v.to(device) for k, v in batch.items()}
-        # Generate point cloud
         pc_cond = forward_model(
             batch,
             spar3d_model,
@@ -195,25 +149,24 @@ def generate_and_process_3d(image: Image.Image) -> tuple[str | None, Image.Image
         )
         batch["pc_cond"] = pc_cond
-        # Generate mesh
         with torch.no_grad():
             with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu', dtype=torch.bfloat16):
                 trimesh_mesh, _ = spar3d_model.generate_mesh(
                     batch,
-                    1024,  # texture_resolution
                     remesh="none",
                     vertex_count=-1,
                     estimate_illumination=True
                 )
                 trimesh_mesh = trimesh_mesh[0]
-        # Export to GLB
-        temp_dir = tempfile.mkdtemp()
-        output_path = os.path.join(temp_dir, 'mesh.glb')
         trimesh_mesh.export(output_path, file_type="glb", include_normals=True)
-        return output_path
     except Exception as e:
         print(f"Error during generation: {str(e)}")
@@ -221,28 +174,34 @@ def generate_and_process_3d(image: Image.Image) -> tuple[str | None, Image.Image
         traceback.print_exc()
         return None
-# Create Gradio app using Blocks
 with gr.Blocks() as demo:
-    gr.Markdown("This space is based on [Stable Point-Aware 3D](https://huggingface.co/spaces/stabilityai/stable-point-aware-3d) by Stability AI, [Text to 3D](https://huggingface.co/spaces/jbilcke-hf/text-to-3d) by jbilcke-hf.")
     with gr.Row():
         input_img = gr.Image(
-            type="pil", label="Input Image", sources="upload", image_mode="RGBA"
-        )
-    with gr.Row():
-        model_output = gr.Model3D(
-            label="Generated .GLB model",
-            clear_color=[0.0, 0.0, 0.0, 0.0],
         )
-    # Event handler
     input_img.upload(
         fn=generate_and_process_3d,
         inputs=[input_img],
-        outputs=[model_output],
         api_name="generate"
     )
 if __name__ == "__main__":
-    demo.queue().launch()

 import gradio as gr
 import trimesh
 from transparent_background import Remover
+from pathlib import Path
 import subprocess
+import uuid
 def install_cuda_toolkit():
     CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
     CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
     subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
         os.environ["CUDA_HOME"],
         "" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
     )
     os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
 install_cuda_toolkit()
 os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
 import spar3d.utils as spar3d_utils
 from spar3d.system import SPAR3D
 COND_WIDTH = 512
 COND_HEIGHT = 512
 COND_DISTANCE = 2.2
 COND_FOVY = 0.591627
 BACKGROUND_COLOR = [0.5, 0.5, 0.5]
+OUTPUT_DIR = "output"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
 device = spar3d_utils.get_device()
 bg_remover = Remover()
 spar3d_model = SPAR3D.from_pretrained(
     weight_name="model.safetensors"
 ).eval().to(device)
 c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
 intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
     COND_FOVY, COND_HEIGHT, COND_WIDTH
 )
 def create_rgba_image(rgb_image: Image.Image, mask: np.ndarray = None) -> Image.Image:
     rgba_image = rgb_image.convert('RGBA')
     if mask is not None:
         if len(mask.shape) > 2:
             mask = mask.squeeze()
         alpha = Image.fromarray((mask * 255).astype(np.uint8))
     return rgba_image
 def create_batch(input_image: Image.Image) -> dict[str, Any]:
     resized_image = input_image.resize((COND_WIDTH, COND_HEIGHT))
     img_array = np.array(resized_image).astype(np.float32) / 255.0
+    if img_array.shape[-1] == 4:
         rgb = img_array[..., :3]
         mask = img_array[..., 3:4]
+    else:
         rgb = img_array
         mask = np.ones((*img_array.shape[:2], 1), dtype=np.float32)
+    rgb = torch.from_numpy(rgb).float()
+    mask = torch.from_numpy(mask).float()
+    bg_tensor = torch.tensor(BACKGROUND_COLOR).view(1, 1, 3)
+    rgb_cond = torch.lerp(bg_tensor, rgb, mask)
+    rgb_cond = rgb_cond.unsqueeze(0)
+    mask = mask.unsqueeze(0)
     batch = {
+        "rgb_cond": rgb_cond,
+        "mask_cond": mask,
+        "c2w_cond": c2w_cond.unsqueeze(0),
+        "intrinsic_cond": intrinsic.unsqueeze(0),
+        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
     }
     return batch
 def forward_model(batch, system, guidance_scale=3.0, seed=0, device="cuda"):
     batch_size = batch["rgb_cond"].shape[0]
     assert batch_size == 1, f"Expected batch size 1, got {batch_size}"
     try:
         cond_tokens = system.forward_pdiff_cond(batch)
     except Exception as e:
         print("rgb_cond requires_grad:", batch["rgb_cond"].requires_grad)
         raise
     sample_iter = system.sampler.sample_batch_progressive(
         batch_size,
         cond_tokens,
         device=device
     )
     for x in sample_iter:
         samples = x["xstart"]
     pc_cond = samples.permute(0, 2, 1).float()
     pc_cond = spar3d_utils.normalize_pc_bbox(pc_cond)
     pc_cond = pc_cond[:, torch.randperm(pc_cond.shape[1])[:512]]
     return pc_cond
 @spaces.GPU
 @torch.inference_mode()
+def generate_and_process_3d(image: Image.Image) -> str:
     seed = np.random.randint(0, np.iinfo(np.int32).max)
     try:
         rgb_image = image.convert('RGB')
         no_bg_image = bg_remover.process(rgb_image)
         rgba_image = no_bg_image.convert('RGBA')
         processed_image = spar3d_utils.foreground_crop(
             rgba_image,
             no_crop=False
         )
         batch = create_batch(processed_image)
         batch = {k: v.to(device) for k, v in batch.items()}
         pc_cond = forward_model(
             batch,
             spar3d_model,
         )
         batch["pc_cond"] = pc_cond
         with torch.no_grad():
             with torch.autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu', dtype=torch.bfloat16):
                 trimesh_mesh, _ = spar3d_model.generate_mesh(
                     batch,
+                    1024,
                     remesh="none",
                     vertex_count=-1,
                     estimate_illumination=True
                 )
                 trimesh_mesh = trimesh_mesh[0]
+        unique_id = str(uuid.uuid4())
+        filename = f'model_{unique_id}.glb'
+        output_path = os.path.join(OUTPUT_DIR, filename)
         trimesh_mesh.export(output_path, file_type="glb", include_normals=True)
+        public_url = f"https://john6666-image-to-3d-test.hf.space/file={output_path}"
+        return public_url
     except Exception as e:
         print(f"Error during generation: {str(e)}")
         traceback.print_exc()
         return None
+# Create Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         input_img = gr.Image(
+            type="pil",
+            label=None,  # Remove the label
+            show_label=False, # Further remove label
+            sources="upload",
+            image_mode="RGBA",
+            elem_id="hidden-upload" # Add an ID for CSS targeting
         )
+    # Make all output components invisible
+    with gr.Row(visible=False):
+        model_url = gr.Textbox(label="Model URL")
     input_img.upload(
         fn=generate_and_process_3d,
         inputs=[input_img],
+        outputs=[model_url],
         api_name="generate"
     )
 if __name__ == "__main__":
+    demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        ssr_mode=False,
+        allowed_paths=[Path(OUTPUT_DIR).resolve()]
+    )