Spaces:

rootglitch
/

CarVizGradioDemo01

Running on Zero

App Files Files Community

rootglitch commited on Mar 2

Commit

52b5462

1 Parent(s): d04cb87

Add mask and blending modules

Browse files

Files changed (1) hide show

app.py +160 -14

app.py CHANGED Viewed

@@ -225,9 +225,7 @@ def draw_box(box: torch.Tensor, draw: ImageDraw.Draw, label: Optional[str]) -> N
         draw.text((box[0], box[1]), str(label), fill="white")
-def run_grounded_sam(
-    input_image
-) -> List[Image.Image]:
     """Main function to run GroundingDINO and SAM-HQ"""
     try:
         # Create output directory
@@ -319,7 +317,7 @@ def run_grounded_sam(
         for box, label in zip(boxes_filt, pred_phrases):
             draw_box(box, image_draw, label)
-        return [mask_image]
     except Exception as e:
         logger.error(f"Error in run_grounded_sam: {e}")
@@ -331,14 +329,85 @@ def run_grounded_sam(
         else:
             return [Image.new('RGB', (400, 300), color='gray'), Image.new('RGBA', (400, 300), color=(0, 0, 0, 0))]
-def upload_file(img_file):
-    file_path = os.path.join(UPLOAD_DIR, 'input_image.png')
-    img_file.save(file_path)
-    print('file_path = ', file_path)
-    print('hf_file_path = ', f"https://huggingface.co/spaces/rootglitch/CarVizGradioDemo01/{file_path}")
-    # Return URL
-    return file_path
 def encode_image(image):
     buffer = BytesIO()
@@ -368,10 +437,85 @@ def generate_ai_bg(input_img, prompt):
     return ic_light_img
-def generate_image(input_img, prompt):
-    ai_gen_image = generate_ai_bg(input_img, prompt)
-    return [ai_gen_image]
 def create_ui():
     """Create Gradio UI for CarViz demo"""
@@ -383,6 +527,7 @@ def create_ui():
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(type="pil", label="image")
                 prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
                 run_button = gr.Button(value='Run')
@@ -396,6 +541,7 @@ def create_ui():
             fn=generate_image,
             inputs=[
                 input_image,
                 prompt
             ],
             outputs=gallery

         draw.text((box[0], box[1]), str(label), fill="white")
+def run_grounded_sam(input_image):
     """Main function to run GroundingDINO and SAM-HQ"""
     try:
         # Create output directory
         for box, label in zip(boxes_filt, pred_phrases):
             draw_box(box, image_draw, label)
+        return mask_image
     except Exception as e:
         logger.error(f"Error in run_grounded_sam: {e}")
         else:
             return [Image.new('RGB', (400, 300), color='gray'), Image.new('RGBA', (400, 300), color=(0, 0, 0, 0))]
+def image_gaussian_blur(image: torch.Tensor, radius: float) -> torch.Tensor:
+    if image.ndim == 4:  # Remove batch dimension if present
+        image = image.squeeze(0)
+    pil_image = tensor2pil(image)
+    blurred_pil_image = pil_image.filter(ImageFilter.GaussianBlur(radius))
+    return pil2tensor(blurred_pil_image).squeeze(0)
+def load_image(image_path: str) -> torch.Tensor:
+    image = Image.open(image_path).convert("RGBA")
+    image_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).float() / 255.0
+    return image_tensor
+def split_image_with_alpha(image: torch.Tensor):
+    out_images = image[:3, :, :]
+    out_alphas = image[3, :, :] if image.shape[0] > 3 else torch.ones_like(image[0, :, :])
+    result = (out_images.unsqueeze(0), 1.0 - out_alphas.unsqueeze(0))
+    return result
+def pil2numpy(image: Image.Image):
+    return np.array(image).astype(np.float32) / 255.0
+def numpy2pil(image: np.ndarray, mode=None):
+    return Image.fromarray(np.clip(255.0 * image, 0, 255).astype(np.uint8), mode)
+def pil2tensor(image: Image.Image):
+    return torch.from_numpy(pil2numpy(image)).unsqueeze(0)
+def invert(image):
+    s = 1.0 - image
+    return s
+def tensor2pil(image: torch.Tensor, mode=None):
+    if image.ndim == 2:  # Grayscale image
+        image = image.unsqueeze(0)  # Add channel dimension
+    if image.ndim != 3 or image.shape[1:] == (0, 0):
+        raise ValueError(f"Invalid tensor dimensions: {image.shape}")
+    if image.shape[0] == 1:  # Single channel, replicate to 3 channels
+        image = image.repeat(3, 1, 1)
+    elif image.shape[0] != 3:
+        raise ValueError("Unexpected number of channels in the image tensor")
+    return numpy2pil(image.cpu().numpy().transpose(1, 2, 0), mode=mode)
+def extract_high_frequency(image: torch.Tensor, blur_radius: float = 5.0) -> torch.Tensor:
+    """Extract high-frequency details by subtracting the blurred image from the original."""
+    if image.ndim == 4:
+        image = image.squeeze(0)
+    blurred = image_gaussian_blur(image, blur_radius)
+    if blurred.ndim == 4:
+        blurred = blurred.squeeze(0)
+    elif blurred.ndim == 3 and blurred.shape[0] != 3:
+        blurred = blurred.permute(2, 0, 1)
+    high_freq = image - blurred
+    return high_freq
+def image_blend_mask(image_a, image_b, mask, blend_percentage):
+    # Convert images to PIL
+    img_a = tensor2pil(image_a)
+    img_b = tensor2pil(image_b)
+    mask = ImageOps.invert(tensor2pil(mask).convert('L'))
+    # Mask image
+    masked_img = Image.composite(img_a, img_b, mask.resize(img_a.size))
+    # Blend image
+    blend_mask = Image.new(mode="L", size=img_a.size,
+                           color=(round(blend_percentage * 255)))
+    blend_mask = ImageOps.invert(blend_mask)
+    img_result = Image.composite(img_a, masked_img, blend_mask)
+    del img_a, img_b, blend_mask, mask
+    return (pil2tensor(img_result), )
 def encode_image(image):
     buffer = BytesIO()
     return ic_light_img
+def blend_details(input_image, relit_image, masked_image):
+    with torch.inference_mode():
+        # Load and resize images
+        # input_image = load_image(input_image_path)
+        # relit_image = load_image(relit_image_path)
+        # masked_image = load_image(masked_image_path)
+        # Resize input image
+        input_image = torch.nn.functional.interpolate(
+            input_image.unsqueeze(0),
+            size=(1024, 1024),
+            mode="bicubic",
+            align_corners=False
+        ).squeeze(0)
+        # Resize relit image
+        relit_image = torch.nn.functional.interpolate(
+            relit_image.unsqueeze(0),
+            size=(1024, 1024),
+            mode="bicubic",
+            align_corners=False
+        ).squeeze(0)
+        # Resize masked image
+        masked_image = torch.nn.functional.interpolate(
+            masked_image.unsqueeze(0),
+            size=(1024, 1024),
+            mode="bicubic",
+            align_corners=False
+        ).squeeze(0)
+        # Split images and get RGB channels
+        input_image_rgb = split_image_with_alpha(input_image)[0].squeeze(0)
+        relit_image_rgb = split_image_with_alpha(relit_image)[0].squeeze(0)
+        # Use masked image RGB channels as segmentation mask (average of RGB channels)
+        segmentation_mask = masked_image[:3].mean(dim=0)  # Average RGB channels to get grayscale mask
+        print(f"segmentation_mask shape: {segmentation_mask.shape}")
+        # Extract high-frequency details from input image
+        high_freq_details = extract_high_frequency(input_image_rgb, blur_radius=3.0)
+        # Print shapes for debugging
+        print(f"high_freq_details shape: {high_freq_details.shape}")
+        print(f"segmentation_mask shape: {segmentation_mask.shape}")
+        print(f"relit_image_rgb shape: {relit_image_rgb.shape}")
+        # Apply high-frequency details only in masked areas
+        detail_strength = 0.5
+        segmentation_mask = segmentation_mask.unsqueeze(0).repeat(3, 1, 1)  # Expand mask to match RGB channels
+        masked_details = high_freq_details * segmentation_mask
+        # final_image = relit_image_rgb + (masked_details * detail_strength)
+        # final_image = image_blend_mask(relit_image_rgb, masked_details, mask, blend_percentage)
+        final_image = relit_image_rgb + masked_details
+        print('final_image shape:', final_image.shape)
+        # Normalize to [0, 1] range
+        final_image = torch.clamp(final_image, 0, 1)
+        # Save intermediate results for debugging
+        tensor2pil(segmentation_mask).save("output/segmentation_mask.png")
+        tensor2pil(high_freq_details).save("output/high_freq_details.png")
+        tensor2pil(masked_details).save("output/masked_details.png")
+        # Save final result
+        final_image_pil = tensor2pil(final_image)
+        # final_image_pil.save("output/output_image.png")
+        return [final_image_pil]
+def generate_image(input_img, ai_gen_image, prompt):
+    # ai_gen_image = generate_ai_bg(input_img, prompt)
+    mask_input_image = run_grounded_sam(input_img)
+    final_image = blend_details(input_img, ai_gen_image, mask_input_image)
+    return [final_image]
 def create_ui():
     """Create Gradio UI for CarViz demo"""
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(type="pil", label="image")
+                ai_image = gr.Image(type="pil", label="image")
                 prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...")
                 run_button = gr.Button(value='Run')
             fn=generate_image,
             inputs=[
                 input_image,
+                ai_image,
                 prompt
             ],
             outputs=gallery