aryswisnu sessex commited on
Commit
f489c3f
·
0 Parent(s):

Duplicate from sessex/CLIPSeg2

Browse files

Co-authored-by: Sydney Essex <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
0.001861_submarine _ submarine_0.9862991.jpg ADDED
0.003473_cliff _ cliff_0.51112.jpg ADDED
0.004658_spatula _ spatula_0.35416836.jpg ADDED
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: CLIPSeg
3
+ emoji: 🦀
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.16.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: sessex/CLIPSeg2
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
2
+ import gradio as gr
3
+ from PIL import Image
4
+ import torch
5
+ import matplotlib.pyplot as plt
6
+ import torch
7
+ import numpy as np
8
+
9
+ processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
10
+ model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
11
+
12
+
13
+ def process_image(image, prompt):
14
+ inputs = processor(
15
+ text=prompt, images=image, padding="max_length", return_tensors="pt"
16
+ )
17
+
18
+ # predict
19
+ with torch.no_grad():
20
+ outputs = model(**inputs)
21
+ preds = outputs.logits
22
+
23
+ pred = torch.sigmoid(preds)
24
+ mat = pred.cpu().numpy()
25
+ mask = Image.fromarray(np.uint8(mat * 255), "L")
26
+ mask = mask.convert("RGB")
27
+ mask = mask.resize(image.size)
28
+ mask = np.array(mask)[:, :, 0]
29
+
30
+ # normalize the mask
31
+ mask_min = mask.min()
32
+ mask_max = mask.max()
33
+ mask = (mask - mask_min) / (mask_max - mask_min)
34
+ return mask
35
+
36
+
37
+ def get_masks(prompts, img, threhsold):
38
+ prompts = prompts.split(",")
39
+ masks = []
40
+ for prompt in prompts:
41
+ mask = process_image(img, prompt)
42
+ mask = mask > threhsold
43
+ masks.append(mask)
44
+ return masks
45
+
46
+
47
+ def extract_image(img, pos_prompts, neg_prompts, threshold):
48
+ positive_masks = get_masks(pos_prompts, img, threshold)
49
+ negative_masks = get_masks(neg_prompts, img, threshold)
50
+
51
+ # combine masks into one masks, logic OR
52
+ pos_mask = np.any(np.stack(positive_masks), axis=0)
53
+ neg_mask = np.any(np.stack(negative_masks), axis=0)
54
+ final_mask = pos_mask & ~neg_mask
55
+
56
+ # extract the final image
57
+ final_mask = Image.fromarray(final_mask.astype(np.uint8) * 255, "L")
58
+ inverse_mask = np.invert(final_mask)
59
+ output_image = Image.new("RGBA", img.size, (0, 0, 0, 0))
60
+ output_image.paste(img, mask=final_mask)
61
+
62
+ return output_image, final_mask, inverse_mask
63
+
64
+
65
+ title = "Interactive demo: zero-shot image segmentation with CLIPSeg"
66
+ description = "Demo for using CLIPSeg, a CLIP-based model for zero- and one-shot image segmentation. To use it, simply upload an image and add a text to mask (identify in the image), or use one of the examples below and click 'submit'. Results will show up in a few seconds."
67
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.10003'>CLIPSeg: Image Segmentation Using Text and Image Prompts</a> | <a href='https://huggingface.co/docs/transformers/main/en/model_doc/clipseg'>HuggingFace docs</a></p>"
68
+
69
+
70
+ with gr.Blocks() as demo:
71
+ gr.Markdown("# CLIPSeg: Image Segmentation Using Text and Image Prompts")
72
+ gr.Markdown(article)
73
+ gr.Markdown(description)
74
+
75
+ with gr.Row():
76
+ with gr.Column():
77
+ input_image = gr.Image(type="pil")
78
+ positive_prompts = gr.Textbox(
79
+ label="Please describe what you want to identify (comma separated)"
80
+ )
81
+ negative_prompts = gr.Textbox(
82
+ label="Please describe what you want to ignore (comma separated)"
83
+ )
84
+
85
+ input_slider_T = gr.Slider(
86
+ minimum=0, maximum=1, value=0.4, label="Threshold"
87
+ )
88
+ btn_process = gr.Button(label="Process")
89
+
90
+ with gr.Column():
91
+ output_image = gr.Image(label="Result")
92
+ output_mask = gr.Image(label="Mask")
93
+ inverse_mask = gr.Image(label="Inverse")
94
+
95
+ btn_process.click(
96
+ extract_image,
97
+ inputs=[
98
+ input_image,
99
+ positive_prompts,
100
+ negative_prompts,
101
+ input_slider_T,
102
+ ],
103
+ outputs=[output_image, output_mask, inverse_mask],
104
+ api_name="mask"
105
+ )
106
+
107
+
108
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python3-opencv
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git+https://github.com/huggingface/transformers.git
2
+ torch
3
+ opencv-python