File size: 3,525 Bytes
3fba243
dc4542a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fba243
 
dc4542a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fba243
 
 
 
 
 
 
 
ab120f4
 
 
dc4542a
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os, subprocess
import spaces
import gradio as gr
from functools import partial
from huggingface_hub import hf_hub_download

@spaces.GPU
def dummy_gpu():
    pass

# https://github.com/R3gm/stablepy/blob/main/stablepy/diffusers_vanilla/adetailer.py
# =====================================
# Yolo
# =====================================
from pathlib import Path
import numpy as np
import torch
from huggingface_hub import hf_hub_download
from PIL import Image, ImageDraw
from torchvision.transforms.functional import to_pil_image
from ultralytics import YOLO

subprocess.run("pip list", shell=True)

def create_mask_from_bbox(

    bboxes: np.ndarray, shape: tuple[int, int]

) -> list[Image.Image]:
    """

    Parameters

    ----------

        bboxes: list[list[float]]

            list of [x1, y1, x2, y2]

            bounding boxes

        shape: tuple[int, int]

            shape of the image (width, height)



    Returns

    -------

        masks: list[Image.Image]

        A list of masks



    """
    masks = []
    for bbox in bboxes:
        mask = Image.new("L", shape, "black")
        mask_draw = ImageDraw.Draw(mask)
        mask_draw.rectangle(bbox, fill="white")
        masks.append(mask)
    return masks


def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]:
    """

    Parameters

    ----------

    masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).

        The device can be CUDA, but `to_pil_image` takes care of that.



    shape: tuple[int, int]

        (width, height) of the original image



    Returns

    -------

    images: list[Image.Image]

    """
    n = masks.shape[0]
    return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]


def yolo_detector(

    image: Image.Image, model_path: str | Path | None = None, confidence: float = 0.3

) -> list[Image.Image] | None:
    if not model_path:
        model_path = hf_hub_download("Bingsu/adetailer", "face_yolov8n.pt")
    model = YOLO(model_path)
    pred = model(image, conf=confidence)

    bboxes = pred[0].boxes.xyxy.cpu().numpy()
    if bboxes.size == 0:
        return None

    if pred[0].masks is None:
        masks = create_mask_from_bbox(bboxes, image.size)
    else:
        masks = mask_to_pil(pred[0].masks.data, image.size)

    return masks

@spaces.GPU
def infer(text: str):
    detectors = []
    person_model_path = hf_hub_download("Bingsu/adetailer", "person_yolov8s-seg.pt")
    person_detector = partial(yolo_detector, model_path=person_model_path)
    detectors.append(person_detector)
    face_model_path = hf_hub_download("Bingsu/adetailer", "face_yolov8n.pt")
    face_detector = partial(yolo_detector, model_path=face_model_path)
    detectors.append(face_detector)
    hand_model_path = hf_hub_download("Bingsu/adetailer", "hand_yolov8n.pt")
    hand_detector = partial(yolo_detector, model_path=hand_model_path)
    detectors.append(hand_detector)
    init_image = Image.open("./image.webp").convert("RGB")
    masks = None
    for j, detector in enumerate(detectors):
        masks = detector(init_image)
    return str(masks)

with gr.Blocks() as demo:
    input_text= gr.Textbox(label="Input", value="", show_copy_button=True)
    run_button = gr.Button("Run", variant="primary")
    output_text = gr.Textbox(label="Output", value="", show_copy_button=True)

    run_button.click(infer, [input_text], [output_text])

demo.queue().launch()