Spaces:

qihoo360
/

FG-CLIP-Densefeature-demo

Running

App Files Files Community

qingshan777 commited on 6 days ago

Commit

978d150

verified ·

1 Parent(s): 14502a1

Create app.py

Browse files

Files changed (1) hide show

app.py +104 -0

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import gradio as gr
+import torch
+import io
+from PIL import Image
+from transformers import (
+    AutoImageProcessor,
+    AutoTokenizer,
+    AutoModelForCausalLM,
+)
+import numpy as np
+model_root = "qihoo360/fg-clip-base"
+model = AutoModelForCausalLM.from_pretrained(model_root,trust_remote_code=True)
+device = model.device
+tokenizer = AutoTokenizer.from_pretrained(model_root)
+image_processor = AutoImageProcessor.from_pretrained(model_root)
+import math
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+def Get_Densefeature(image, candidate_labels):
+    """
+    Takes an image and a comma-separated string of candidate labels,
+    and returns the classification scores.
+    """
+    candidate_labels = [label.lstrip(" ") for label in candidate_labels.split(",") if label !=""]
+    # print(candidate_labels)
+    image_size=224
+    image = image.convert("RGB")
+    image = image.resize((image_size,image_size))
+    image_input = image_processor.preprocess(image, return_tensors='pt')['pixel_values'].to(device)
+    with torch.no_grad():
+        dense_image_feature = model.get_image_dense_features(image_input)
+        captions = [candidate_labels[0]]
+        caption_input = torch.tensor(tokenizer(captions, max_length=77, padding="max_length", truncation=True).input_ids, dtype=torch.long, device=device)
+        text_feature = model.get_text_features(caption_input,walk_short_pos=True)
+        text_feature = text_feature / text_feature.norm(p=2, dim=-1, keepdim=True)
+        dense_image_feature = dense_image_feature / dense_image_feature.norm(p=2, dim=-1, keepdim=True)
+    similarity = dense_image_feature.squeeze() @ text_feature.squeeze().T
+    similarity = similarity.cpu().numpy()
+    patch_size = int(math.sqrt(similarity.shape[0]))
+    original_shape = (patch_size, patch_size)
+    show_image = similarity.reshape(original_shape)
+    fig = plt.figure(figsize=(6, 6))
+    plt.imshow(show_image)
+    plt.title('similarity Visualization')
+    plt.axis('off')
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png')
+    buf.seek(0)
+    plt.close(fig)
+    pil_img = Image.open(buf)
+    # buf.close()
+    return  pil_img
+with gr.Blocks() as demo:
+    gr.Markdown("# FG-CLIP Densefeature")
+    gr.Markdown(
+        "This app uses the FG-CLIP model (qihoo360/fg-clip-base) for Densefeature show on CPU :"
+    )
+    gr.Markdown(
+    "<span style='color: red; font-weight: bold;'>⚠️ (Run DenseFeature) only support one class</span>"
+    )
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil")
+            text_input = gr.Textbox(label="Input a label")
+            dfs_button = gr.Button("Run Densefeature", visible=True)
+        with gr.Column():
+            dfs_output = gr.Image(label="Similarity Visualization", type="pil")
+    examples = [
+        ["./cat_dfclor.jpg", "white cat,"],
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=[image_input, text_input],
+    )
+    dfs_button.click(fn=Get_Densefeature, inputs=[image_input, text_input], outputs=dfs_output)
+demo.launch()