Spaces:

SmilingWolf
/

danbooru2022_image_similarity

Running

App Files Files Community

SmilingWolf commited on Jan 22, 2023

Commit

ff17aaa

1 Parent(s): b40c0ad

Danbooru2022 Explorer v1.0

Browse files

Files changed (6) hide show

.gitattributes +1 -0
Utils/dbimutils.py +54 -0
app.py +206 -0
index/cosine_ids.npy +3 -0
index/cosine_infos.json +1 -0
index/cosine_knn.index +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.index filter=lfs diff=lfs merge=lfs -text

Utils/dbimutils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# DanBooru IMage Utility functions
+import cv2
+import numpy as np
+from PIL import Image
+def smart_imread(img, flag=cv2.IMREAD_UNCHANGED):
+    if img.endswith(".gif"):
+        img = Image.open(img)
+        img = img.convert("RGB")
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+    else:
+        img = cv2.imread(img, flag)
+    return img
+def smart_24bit(img):
+    if img.dtype is np.dtype(np.uint16):
+        img = (img / 257).astype(np.uint8)
+    if len(img.shape) == 2:
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    elif img.shape[2] == 4:
+        trans_mask = img[:, :, 3] == 0
+        img[trans_mask] = [255, 255, 255, 255]
+        img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
+    return img
+def make_square(img, target_size):
+    old_size = img.shape[:2]
+    desired_size = max(old_size)
+    desired_size = max(desired_size, target_size)
+    delta_w = desired_size - old_size[1]
+    delta_h = desired_size - old_size[0]
+    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
+    left, right = delta_w // 2, delta_w - (delta_w // 2)
+    color = [255, 255, 255]
+    new_im = cv2.copyMakeBorder(
+        img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
+    )
+    return new_im
+def smart_resize(img, size):
+    # Assumes the image has already gone through make_square
+    if img.shape[0] > size:
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
+    elif img.shape[0] < size:
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+    return img

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import argparse
+import functools
+import json
+from pathlib import Path
+import faiss
+import gradio as gr
+import numpy as np
+import PIL.Image
+import requests
+import tensorflow as tf
+from huggingface_hub import hf_hub_download
+from Utils import dbimutils
+TITLE = "## Danbooru Explorer"
+DESCRIPTION = """
+Image similarity-based retrieval tool using:
+- [SmilingWolf/wd-v1-4-convnext-tagger-v2](https://huggingface.co/SmilingWolf/wd-v1-4-convnext-tagger-v2) as feature extractor
+- [Faiss](https://github.com/facebookresearch/faiss) and [autofaiss](https://github.com/criteo/autofaiss) for indexing
+"""
+CONV_MODEL_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
+CONV_MODEL_REVISION = "v2.0"
+CONV_FEXT_LAYER = "predictions_norm"
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--share", action="store_true")
+    return parser.parse_args()
+def download_model(model_repo, model_revision):
+    model_files = [
+        {"filename": "saved_model.pb", "subfolder": ""},
+        {"filename": "keras_metadata.pb", "subfolder": ""},
+        {"filename": "variables.index", "subfolder": "variables"},
+        {"filename": "variables.data-00000-of-00001", "subfolder": "variables"},
+    ]
+    model_file_paths = []
+    for elem in model_files:
+        model_file_paths.append(
+            Path(hf_hub_download(model_repo, revision=model_revision, **elem))
+        )
+    model_path = model_file_paths[0].parents[0]
+    return model_path
+def load_model(model_repo, model_revision, feature_extraction_layer):
+    model_path = download_model(model_repo, model_revision)
+    full_model = tf.keras.models.load_model(model_path)
+    model = tf.keras.models.Model(
+        full_model.inputs, full_model.get_layer(feature_extraction_layer).output
+    )
+    return model
+def danbooru_id_to_url(image_id, selected_ratings, api_username="", api_key=""):
+    headers = {"User-Agent": "image_similarity_tool"}
+    ratings_to_letters = {
+        "General": "g",
+        "Sensitive": "s",
+        "Questionable": "q",
+        "Explicit": "e",
+    }
+    acceptable_ratings = [ratings_to_letters[x] for x in selected_ratings]
+    image_url = f"https://danbooru.donmai.us/posts/{image_id}.json"
+    if api_username != "" and api_key != "":
+        image_url = f"{image_url}?api_key={api_key}&login={api_username}"
+    r = requests.get(image_url, headers=headers)
+    if r.status_code != 200:
+        return None
+    content = json.loads(r.text)
+    image_url = content["large_file_url"] if "large_file_url" in content else None
+    image_url = image_url if content["rating"] in acceptable_ratings else None
+    return image_url
+class SimilaritySearcher:
+    def __init__(self, model, images_ids):
+        self.knn_index = None
+        self.knn_metric = None
+        self.model = model
+        self.images_ids = images_ids
+    def change_index(self, knn_metric):
+        if knn_metric == self.knn_metric:
+            return
+        if knn_metric == "ip":
+            self.knn_index = faiss.read_index("index/ip_knn.index")
+            config = json.loads(open("index/ip_infos.json").read())["index_param"]
+        elif knn_metric == "cosine":
+            self.knn_index = faiss.read_index("index/cosine_knn.index")
+            config = json.loads(open("index/cosine_infos.json").read())["index_param"]
+        faiss.ParameterSpace().set_index_parameters(self.knn_index, config)
+        self.knn_metric = knn_metric
+    def predict(
+        self, image, selected_ratings, knn_metric, api_username, api_key, n_neighbours
+    ):
+        _, height, width, _ = self.model.inputs[0].shape
+        self.change_index(knn_metric)
+        # Alpha to white
+        image = image.convert("RGBA")
+        new_image = PIL.Image.new("RGBA", image.size, "WHITE")
+        new_image.paste(image, mask=image)
+        image = new_image.convert("RGB")
+        image = np.asarray(image)
+        # PIL RGB to OpenCV BGR
+        image = image[:, :, ::-1]
+        image = dbimutils.make_square(image, height)
+        image = dbimutils.smart_resize(image, height)
+        image = image.astype(np.float32)
+        image = np.expand_dims(image, 0)
+        target = self.model(image).numpy()
+        if self.knn_metric == "cosine":
+            faiss.normalize_L2(target)
+        dists, indexes = self.knn_index.search(target, k=n_neighbours)
+        neighbours_ids = self.images_ids[indexes][0]
+        neighbours_ids = [int(x) for x in neighbours_ids]
+        captions = []
+        for image_id, dist in zip(neighbours_ids, dists[0]):
+            captions.append(f"{image_id}/{dist:.2f}")
+        image_urls = []
+        for image_id in neighbours_ids:
+            current_url = danbooru_id_to_url(
+                image_id, selected_ratings, api_username, api_key
+            )
+            if current_url is not None:
+                image_urls.append(current_url)
+        return list(zip(image_urls, captions))
+def main():
+    args = parse_args()
+    model = load_model(CONV_MODEL_REPO, CONV_MODEL_REVISION, CONV_FEXT_LAYER)
+    images_ids = np.load("index/cosine_ids.npy")
+    searcher = SimilaritySearcher(model=model, images_ids=images_ids)
+    with gr.Blocks() as demo:
+        gr.Markdown(TITLE)
+        gr.Markdown(DESCRIPTION)
+        with gr.Row():
+            input = gr.Image(type="pil", label="Input")
+            with gr.Column():
+                with gr.Row():
+                    api_username = gr.Textbox(label="Danbooru API Username")
+                    api_key = gr.Textbox(label="Danbooru API Key")
+                with gr.Row():
+                    selected_ratings = gr.CheckboxGroup(
+                        choices=["General", "Sensitive", "Questionable", "Explicit"],
+                        value=["General", "Sensitive"],
+                        label="Ratings",
+                    )
+                    selected_metric = gr.Radio(
+                        choices=["cosine"],
+                        value="cosine",
+                        label="Metric selection",
+                        visible=False,
+                    )
+                    n_neighbours = gr.Slider(
+                        minimum=1, maximum=20, value=5, step=1, label="# of images"
+                    )
+                find_btn = gr.Button("Find similar images")
+        similar_images = gr.Gallery(label="Similar images")
+        similar_images.style(grid=5)
+        find_btn.click(
+            fn=searcher.predict,
+            inputs=[
+                input,
+                selected_ratings,
+                selected_metric,
+                api_username,
+                api_key,
+                n_neighbours,
+            ],
+            outputs=[similar_images],
+        )
+    demo.queue()
+    demo.launch(share=args.share)
+if __name__ == "__main__":
+    main()

index/cosine_ids.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df724519c8c1981e49d80e2430261deb4fb6edf6d9c04e134427879710747394
+size 21830676

index/cosine_infos.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"index_key": "OPQ256_1280,IVF16384_HNSW32,PQ256x8", "index_param": "nprobe=16,efSearch=32,ht=2048", "index_path": "/home/SmilingWolf/eval/index/ConvNextBV1_01_14_2023_08h37m46s_cosine_knn.index", "size in bytes": 1535843672, "avg_search_speed_ms": 10.164478485783887, "99p_search_speed_ms": 12.419190758373587, "reconstruction error %": 22.007358074188232, "nb vectors": 5457637, "vectors dimension": 1024, "compression ratio": 14.555180035276402}

index/cosine_knn.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a718ab8370df8b9d84002c55f945ef241e4cc3450d306c2ecd97661f51022ad
+size 1535843672