File size: 5,183 Bytes
07180e9
 
ed5e427
c67b8fa
 
 
 
 
ed5e427
c67b8fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07180e9
c67b8fa
 
 
 
07180e9
c67b8fa
ed5e427
 
 
c67b8fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import torch
import torchvision.transforms as T
from torchvision.models import resnet50
from scipy.ndimage import gaussian_filter
import gradio as gr

class TextClothBlender:
    def __init__(self, font_path: str):
        self.font_path = font_path
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = resnet50(pretrained=True).to(self.device)
        self.model.eval()

    def preprocess_cloth_image(self, cloth_image_path: str):
        # Load the cloth image
        cloth_image = cv2.imread(cloth_image_path)
        gray_image = cv2.cvtColor(cloth_image, cv2.COLOR_BGR2GRAY)

        # Detect texture using edge detection
        edges = cv2.Canny(gray_image, 50, 150)

        # Extract features using ResNet
        preprocess = T.Compose([
            T.ToPILImage(),
            T.Resize((224, 224)),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        input_tensor = preprocess(cloth_image).unsqueeze(0).to(self.device)
        with torch.no_grad():
            features = self.model(input_tensor)

        return cloth_image, edges, features

    def render_text(self, text: str, font_size: int, text_color: tuple):
        # Create a blank image for text
        font = ImageFont.truetype(self.font_path, font_size)
        text_size = font.getsize(text)
        text_image = Image.new('RGBA', text_size, (255, 255, 255, 0))
        draw = ImageDraw.Draw(text_image)
        draw.text((0, 0), text, font=font, fill=text_color)
        
        return np.array(text_image)

    def apply_perspective_transform(self, text_image: np.ndarray, cloth_image_shape: tuple):
        # Define points for perspective transformation
        h, w, _ = cloth_image_shape
        src_points = np.float32([[0, 0], [text_image.shape[1], 0], [0, text_image.shape[0]], [text_image.shape[1], text_image.shape[0]]])
        dst_points = np.float32([[50, 50], [w - 50, 30], [50, h - 100], [w - 50, h - 120]])

        matrix = cv2.getPerspectiveTransform(src_points, dst_points)
        warped_text = cv2.warpPerspective(text_image, matrix, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0, 0))

        return warped_text

    def blend_text_with_cloth(self, cloth_image: np.ndarray, text_image: np.ndarray, edges: np.ndarray):
        # Convert cloth and text images to the same size
        h, w, _ = cloth_image.shape
        text_resized = cv2.resize(text_image, (w, h), interpolation=cv2.INTER_AREA)

        # Convert text to grayscale for masking
        text_gray = cv2.cvtColor(text_resized, cv2.COLOR_RGBA2GRAY)
        _, text_mask = cv2.threshold(text_gray, 1, 255, cv2.THRESH_BINARY)

        # Apply displacement mapping using the edges
        displace_map = gaussian_filter(edges, sigma=5)
        displaced_text = cv2.addWeighted(text_resized, 0.5, displace_map[..., None], 0.5, 0)

        # Blend text and cloth using overlay mode
        blended = cv2.addWeighted(cloth_image, 0.7, displaced_text[..., :3], 0.3, 0)

        return blended

    def refine_output(self, blended_image: np.ndarray):
        # Apply Gaussian blur for smooth edges
        refined = cv2.GaussianBlur(blended_image, (5, 5), 0)
        return refined

    def process(self, cloth_image_path: str, text: str, font_size: int, text_color: tuple):
        # Step 1: Preprocess the cloth image
        cloth_image, edges, _ = self.preprocess_cloth_image(cloth_image_path)

        # Step 2: Render the text
        text_image = self.render_text(text, font_size, text_color)

        # Step 3: Apply perspective transformation
        warped_text = self.apply_perspective_transform(text_image, cloth_image.shape)

        # Step 4: Blend text with the cloth
        blended_image = self.blend_text_with_cloth(cloth_image, warped_text, edges)

        # Step 5: Refine the output
        final_image = self.refine_output(blended_image)

        return final_image

# Define the Gradio interface
def blend_text_on_cloth(cloth_image, text, font_size, text_color):
    font_path = "path/to/font.ttf"  # Ensure the font file exists in the deployed environment
    blender = TextClothBlender(font_path)
    text_color = tuple(map(int, text_color.strip('()').split(',')))  # Convert string to tuple
    cloth_image_path = "temp_cloth_image.jpg"

    # Save the uploaded cloth image temporarily
    cv2.imwrite(cloth_image_path, cv2.cvtColor(np.array(cloth_image), cv2.COLOR_RGB2BGR))
    
    # Process the image
    result = blender.process(cloth_image_path, text, int(font_size), text_color)
    return result[:, :, ::-1]  # Convert BGR to RGB for display

iface = gr.Interface(
    fn=blend_text_on_cloth,
    inputs=[
        gr.Image(type="pil", label="Upload Cloth Image"),
        gr.Textbox(label="Text to Blend"),
        gr.Slider(10, 100, step=1, label="Font Size"),
        gr.Textbox(label="Text Color (R,G,B,A)", placeholder="e.g., 255,0,0,255")
    ],
    outputs=gr.Image(type="numpy", label="Blended Output")
)

if __name__ == "__main__":
    iface.launch()