Spaces:
Build error
Build error
import cv2 | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
import torch | |
import torchvision.transforms as T | |
from torchvision.models import resnet50 | |
from scipy.ndimage import gaussian_filter | |
import gradio as gr | |
class TextClothBlender: | |
def __init__(self, font_path: str): | |
self.font_path = font_path | |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
self.model = resnet50(pretrained=True).to(self.device) | |
self.model.eval() | |
def preprocess_cloth_image(self, cloth_image_path: str): | |
# Load the cloth image | |
cloth_image = cv2.imread(cloth_image_path) | |
gray_image = cv2.cvtColor(cloth_image, cv2.COLOR_BGR2GRAY) | |
# Detect texture using edge detection | |
edges = cv2.Canny(gray_image, 50, 150) | |
# Extract features using ResNet | |
preprocess = T.Compose([ | |
T.ToPILImage(), | |
T.Resize((224, 224)), | |
T.ToTensor(), | |
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |
]) | |
input_tensor = preprocess(cloth_image).unsqueeze(0).to(self.device) | |
with torch.no_grad(): | |
features = self.model(input_tensor) | |
return cloth_image, edges, features | |
def render_text(self, text: str, font_size: int, text_color: tuple): | |
# Create a blank image for text | |
font = ImageFont.truetype(self.font_path, font_size) | |
text_size = font.getsize(text) | |
text_image = Image.new('RGBA', text_size, (255, 255, 255, 0)) | |
draw = ImageDraw.Draw(text_image) | |
draw.text((0, 0), text, font=font, fill=text_color) | |
return np.array(text_image) | |
def apply_perspective_transform(self, text_image: np.ndarray, cloth_image_shape: tuple): | |
# Define points for perspective transformation | |
h, w, _ = cloth_image_shape | |
src_points = np.float32([[0, 0], [text_image.shape[1], 0], [0, text_image.shape[0]], [text_image.shape[1], text_image.shape[0]]]) | |
dst_points = np.float32([[50, 50], [w - 50, 30], [50, h - 100], [w - 50, h - 120]]) | |
matrix = cv2.getPerspectiveTransform(src_points, dst_points) | |
warped_text = cv2.warpPerspective(text_image, matrix, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0, 0)) | |
return warped_text | |
def blend_text_with_cloth(self, cloth_image: np.ndarray, text_image: np.ndarray, edges: np.ndarray): | |
# Convert cloth and text images to the same size | |
h, w, _ = cloth_image.shape | |
text_resized = cv2.resize(text_image, (w, h), interpolation=cv2.INTER_AREA) | |
# Convert text to grayscale for masking | |
text_gray = cv2.cvtColor(text_resized, cv2.COLOR_RGBA2GRAY) | |
_, text_mask = cv2.threshold(text_gray, 1, 255, cv2.THRESH_BINARY) | |
# Apply displacement mapping using the edges | |
displace_map = gaussian_filter(edges, sigma=5) | |
displaced_text = cv2.addWeighted(text_resized, 0.5, displace_map[..., None], 0.5, 0) | |
# Blend text and cloth using overlay mode | |
blended = cv2.addWeighted(cloth_image, 0.7, displaced_text[..., :3], 0.3, 0) | |
return blended | |
def refine_output(self, blended_image: np.ndarray): | |
# Apply Gaussian blur for smooth edges | |
refined = cv2.GaussianBlur(blended_image, (5, 5), 0) | |
return refined | |
def process(self, cloth_image_path: str, text: str, font_size: int, text_color: tuple): | |
# Step 1: Preprocess the cloth image | |
cloth_image, edges, _ = self.preprocess_cloth_image(cloth_image_path) | |
# Step 2: Render the text | |
text_image = self.render_text(text, font_size, text_color) | |
# Step 3: Apply perspective transformation | |
warped_text = self.apply_perspective_transform(text_image, cloth_image.shape) | |
# Step 4: Blend text with the cloth | |
blended_image = self.blend_text_with_cloth(cloth_image, warped_text, edges) | |
# Step 5: Refine the output | |
final_image = self.refine_output(blended_image) | |
return final_image | |
# Define the Gradio interface | |
def blend_text_on_cloth(cloth_image, text, font_size, text_color): | |
font_path = "path/to/font.ttf" # Ensure the font file exists in the deployed environment | |
blender = TextClothBlender(font_path) | |
text_color = tuple(map(int, text_color.strip('()').split(','))) # Convert string to tuple | |
cloth_image_path = "temp_cloth_image.jpg" | |
# Save the uploaded cloth image temporarily | |
cv2.imwrite(cloth_image_path, cv2.cvtColor(np.array(cloth_image), cv2.COLOR_RGB2BGR)) | |
# Process the image | |
result = blender.process(cloth_image_path, text, int(font_size), text_color) | |
return result[:, :, ::-1] # Convert BGR to RGB for display | |
iface = gr.Interface( | |
fn=blend_text_on_cloth, | |
inputs=[ | |
gr.Image(type="pil", label="Upload Cloth Image"), | |
gr.Textbox(label="Text to Blend"), | |
gr.Slider(10, 100, step=1, label="Font Size"), | |
gr.Textbox(label="Text Color (R,G,B,A)", placeholder="e.g., 255,0,0,255") | |
], | |
outputs=gr.Image(type="numpy", label="Blended Output") | |
) | |
if __name__ == "__main__": | |
iface.launch() |