Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| from ultralytics import YOLO | |
| from torchvision.transforms.functional import to_tensor | |
| from huggingface_hub import hf_hub_download | |
| import torch | |
| import albumentations as A | |
| from albumentations.pytorch.transforms import ToTensorV2 | |
| import pandas as pd | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from utils import * | |
| from models import YOLOStamp, Encoder | |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| yolov8 = YOLO(hf_hub_download('stamps-labs/yolov8-finetuned', filename='best.torchscript'), task='detect') | |
| yolo_stamp = YOLOStamp() | |
| yolo_stamp.load_state_dict(torch.load(hf_hub_download('stamps-labs/yolo-stamp', filename='state_dict.pth'), map_location='cpu')) | |
| yolo_stamp = yolo_stamp.to(device) | |
| yolo_stamp.eval() | |
| transform = A.Compose([ | |
| A.Normalize(), | |
| ToTensorV2(p=1.0), | |
| ]) | |
| vits8 = torch.jit.load(hf_hub_download('stamps-labs/vits8-stamp', filename='vits8stamp-torchscript.pth'), map_location='cpu') | |
| vits8 = vits8.to(device) | |
| vits8.eval() | |
| encoder = Encoder() | |
| encoder.load_state_dict(torch.load(hf_hub_download('stamps-labs/vae-encoder', filename='encoder.pth'), map_location='cpu')) | |
| encoder = encoder.to(device) | |
| encoder.eval() | |
| def predict(image, det_choice, emb_choice): | |
| shape = torch.tensor(image.size) | |
| image = image.convert('RGB') | |
| if det_choice == 'yolov8': | |
| coef = torch.hstack((shape, shape)) / 640 | |
| image = image.resize((640, 640)) | |
| boxes = yolov8(image)[0].boxes.xyxy.cpu() | |
| image_with_boxes = visualize_bbox(image, boxes) | |
| elif det_choice == 'yolo-stamp': | |
| coef = torch.hstack((shape, shape)) / 448 | |
| image = image.resize((448, 448)) | |
| image_tensor = transform(image=np.array(image))['image'] | |
| output = yolo_stamp(image_tensor.unsqueeze(0).to(device)) | |
| boxes = output_tensor_to_boxes(output[0].detach().cpu()) | |
| boxes = nonmax_suppression(boxes) | |
| boxes = xywh2xyxy(torch.tensor(boxes)[:, :4]) | |
| image_with_boxes = visualize_bbox(image, boxes) | |
| else: | |
| return | |
| embeddings = [] | |
| if emb_choice == 'vits8': | |
| for box in boxes: | |
| cropped_stamp = to_tensor(image.crop(box.tolist())) | |
| embeddings.append(vits8(cropped_stamp.unsqueeze(0).to(device))[0].detach().cpu()) | |
| elif emb_choice == 'vae-encoder': | |
| for box in boxes: | |
| cropped_stamp = to_tensor(image.crop(box.tolist()).resize((118, 118))) | |
| embeddings.append(np.array(encoder(cropped_stamp.unsqueeze(0).to(device))[0][0].detach().cpu())) | |
| embeddings = np.stack(embeddings) | |
| similarities = cosine_similarity(embeddings) | |
| boxes = boxes * coef | |
| df_boxes = pd.DataFrame(boxes, columns=['x1', 'y1', 'x2', 'y2']) | |
| fig, ax = plt.subplots() | |
| im, cbar = heatmap(similarities, range(1, len(embeddings) + 1), range(1, len(embeddings) + 1), ax=ax, | |
| cmap="YlGn", cbarlabel="Embeddings similarities") | |
| texts = annotate_heatmap(im, valfmt="{x:.3f}") | |
| return image_with_boxes, df_boxes, embeddings, fig | |
| examples = [['./examples/1.jpg', 'yolov8', 'vits8'], ['./examples/2.jpg', 'yolov8', 'vae-encoder'], ['./examples/3.jpg', 'yolo-stamp', 'vits8']] | |
| inputs = [ | |
| gr.Image(type="pil"), | |
| gr.Dropdown(choices=['yolov8', 'yolo-stamp'], value='yolov8', label='Detection model'), | |
| gr.Dropdown(choices=['vits8', 'vae-encoder'], value='vits8', label='Embedding model'), | |
| ] | |
| outputs = [ | |
| gr.Image(type="pil"), | |
| gr.DataFrame(type='pandas', label="Bounding boxes"), | |
| gr.DataFrame(type='numpy', label="Embeddings"), | |
| gr.Plot(label="Cosine Similarities") | |
| ] | |
| app = gr.Interface(predict, inputs, outputs, examples=examples) | |
| app.launch(debug=True) |