Spaces:

gaur3009
/

Text_Blending

Build error

App Files Files Community

gaur3009 commited on Jan 25

Commit

6a85c9c

verified ·

1 Parent(s): 751c76a

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -13

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torch
 from torchvision import transforms
 from torchvision.models.segmentation import deeplabv3_resnet101
 model = deeplabv3_resnet101(pretrained=True)
 model.eval()
@@ -24,33 +25,47 @@ def segment_clothing(image):
         output = model(input_tensor)['out'][0]
     output_predictions = output.argmax(0).byte().cpu().numpy()
-    mask = cv2.resize(output_predictions, (image.shape[1], image.shape[0]))
     return mask
 def generate_displacement_map(image, mask):
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    blurred = cv2.GaussianBlur(gray, (15, 15), 0)
     displacement_map = cv2.normalize(blurred, None, 0, 255, cv2.NORM_MINMAX)
-    displacement_map[mask != 15] = 0
     return displacement_map
 def warp_text(image, text_overlay, displacement_map):
     text_overlay_array = np.array(text_overlay)
-    displacement_map = cv2.GaussianBlur(displacement_map, (15, 15), 0)
     h, w = displacement_map.shape
     x, y = np.meshgrid(np.arange(w), np.arange(h))
-    x_displacement = x + displacement_map / 50.0
-    y_displacement = y + displacement_map / 50.0
-    warped = cv2.remap(text_overlay_array, x_displacement.astype(np.float32), y_displacement.astype(np.float32), interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT)
     return Image.fromarray(warped)
 def overlay_text(image, text, font_size, color, mask):
     pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
-    draw = ImageDraw.Draw(pil_image)
-    y_indices, x_indices = np.where(mask == 15)
     if len(x_indices) == 0 or len(y_indices) == 0:
         return None, "No clothing region detected."
@@ -60,6 +75,11 @@ def overlay_text(image, text, font_size, color, mask):
     clothing_width = x_max - x_min
     clothing_height = y_max - y_min
     font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
     if not os.path.exists(font_path):
         return None, "Font file not found. Please provide a valid font path."
@@ -73,14 +93,15 @@ def overlay_text(image, text, font_size, color, mask):
         font = ImageFont.truetype(font_path, font_size)
         text_width, text_height = font.getbbox(text)[2:]
     text_x = x_min + (clothing_width - text_width) // 2
     text_y = y_min + (clothing_height - text_height) // 2
     text_overlay = Image.new("RGBA", pil_image.size, (255, 255, 255, 0))
     text_draw = ImageDraw.Draw(text_overlay)
     try:
-        rgba_color = tuple(color) + (255,)
         text_draw.text((text_x, text_y), text, font=font, fill=rgba_color)
     except Exception as e:
         return None, f"Error applying color: {str(e)}"
@@ -89,26 +110,32 @@ def overlay_text(image, text, font_size, color, mask):
 def process_image(image, text, font_size, color):
     try:
         mask = segment_clothing(image)
         if mask.sum() == 0:
             return "No clothing detected. Try another image."
         displacement_map = generate_displacement_map(image, mask)
         text_overlay, error = overlay_text(image, text, font_size, color, mask)
         if error:
             return error
         warped_text = warp_text(image, text_overlay, displacement_map)
         pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
-        final_image = Image.alpha_composite(pil_image, warped_text)
         return final_image
     except Exception as e:
         print(f"Error processing image: {str(e)}")
         return f"Error: {str(e)}"
 gr.Interface(
     fn=process_image,
     inputs=[

 from torchvision import transforms
 from torchvision.models.segmentation import deeplabv3_resnet101
+# Load Pretrained DeepLabV3 Model
 model = deeplabv3_resnet101(pretrained=True)
 model.eval()
         output = model(input_tensor)['out'][0]
     output_predictions = output.argmax(0).byte().cpu().numpy()
+    # Scale back to original size
+    mask = cv2.resize(output_predictions, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)
+    print(f"Mask shape: {mask.shape}, unique values: {np.unique(mask)}")  # Debugging
     return mask
 def generate_displacement_map(image, mask):
+    """Generate a displacement map from the clothing region."""
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    blurred = cv2.GaussianBlur(gray, (9, 9), 0)  # Reduced kernel size for clarity
     displacement_map = cv2.normalize(blurred, None, 0, 255, cv2.NORM_MINMAX)
+    displacement_map[mask != 15] = 0  # Apply mask (class 15 corresponds to 'person')
+    print(f"Displacement map stats - Min: {np.min(displacement_map)}, Max: {np.max(displacement_map)}")  # Debugging
     return displacement_map
 def warp_text(image, text_overlay, displacement_map):
+    """Warp the text overlay based on the displacement map."""
     text_overlay_array = np.array(text_overlay)
+    displacement_map = cv2.GaussianBlur(displacement_map, (9, 9), 0)  # Reduced blur for better details
+    # Create an x, y distortion map
     h, w = displacement_map.shape
     x, y = np.meshgrid(np.arange(w), np.arange(h))
+    x_displacement = x + displacement_map / 100.0  # Adjusted scaling factor for subtle warping
+    y_displacement = y + displacement_map / 100.0
+    # Warp text overlay using remap
+    warped = cv2.remap(
+        text_overlay_array,
+        x_displacement.astype(np.float32),
+        y_displacement.astype(np.float32),
+        interpolation=cv2.INTER_LINEAR,
+        borderMode=cv2.BORDER_CONSTANT
+    )
     return Image.fromarray(warped)
 def overlay_text(image, text, font_size, color, mask):
+    """Overlay text onto the detected clothing region."""
     pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
+    # Find the bounding box of the mask (clothing area)
+    y_indices, x_indices = np.where(mask == 15)  # Class 15 corresponds to 'person' in DeepLabV3
     if len(x_indices) == 0 or len(y_indices) == 0:
         return None, "No clothing region detected."
     clothing_width = x_max - x_min
     clothing_height = y_max - y_min
+    # Ensure the color is correctly formatted
+    color = color.lstrip('#')
+    color_tuple = tuple(int(color[i:i+2], 16) for i in (0, 2, 4))
+    # Load font and adjust size dynamically
     font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
     if not os.path.exists(font_path):
         return None, "Font file not found. Please provide a valid font path."
         font = ImageFont.truetype(font_path, font_size)
         text_width, text_height = font.getbbox(text)[2:]
+    # Calculate position to center the text
     text_x = x_min + (clothing_width - text_width) // 2
     text_y = y_min + (clothing_height - text_height) // 2
+    # Draw the text on a transparent overlay
     text_overlay = Image.new("RGBA", pil_image.size, (255, 255, 255, 0))
     text_draw = ImageDraw.Draw(text_overlay)
     try:
+        rgba_color = color_tuple + (255,)  # Add alpha channel
         text_draw.text((text_x, text_y), text, font=font, fill=rgba_color)
     except Exception as e:
         return None, f"Error applying color: {str(e)}"
 def process_image(image, text, font_size, color):
     try:
+        # Segment the clothing using DeepLabV3
         mask = segment_clothing(image)
         if mask.sum() == 0:
             return "No clothing detected. Try another image."
+        # Generate displacement map
         displacement_map = generate_displacement_map(image, mask)
+        # Overlay the text
         text_overlay, error = overlay_text(image, text, font_size, color, mask)
         if error:
             return error
+        # Warp text using displacement map
         warped_text = warp_text(image, text_overlay, displacement_map)
+        # Blend the warped text back onto the original image
         pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)).convert("RGBA")
+        final_image = Image.alpha_composite(pil_image, warped_text).convert("RGB")
         return final_image
     except Exception as e:
         print(f"Error processing image: {str(e)}")
         return f"Error: {str(e)}"
+# Gradio Interface
 gr.Interface(
     fn=process_image,
     inputs=[