Upload 5 files

Browse files

PoloPan Object Detection

Files changed (6) hide show

.gitattributes +2 -0
README.md +58 -0
config.json +73 -0
model.safetensors +3 -0
preprocessor_config.json +26 -0
sample_image.png +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ model.safetensors filter=lfs diff=lfs merge=lfs -text
2	+ sample_image.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+library_name: transformers
+license: mit
+language:
+- en
+pipeline_tag: object-detection
+base_model:
+- microsoft/conditional-detr-resnet-50
+tags:
+- object-detection
+- fashion
+- search
+---
+This model is fine-tuned version of microsoft/conditional-detr-resnet-50.
+You can find details of model in this github repo -> [fashion-visual-search](https://github.com/yainage90/fashion-visual-search)
+And you can find fashion image feature extractor model -> [yainage90/fashion-image-feature-extractor](https://huggingface.co/yainage90/fashion-image-feature-extractor)
+This model was trained using a combination of two datasets: [modanet](https://github.com/eBay/modanet) and [fashionpedia](https://fashionpedia.github.io/home/)
+The labels are ['bag', 'bottom', 'dress', 'hat', 'shoes', 'outer', 'top']
+In the 96th epoch out of total of 100 epochs, the best score was achieved with mAP 0.7542. Therefore, it is believed that there is a little room for performance improvement.
+``` python
+from PIL import Image
+import torch
+from transformers import  AutoImageProcessor, AutoModelForObjectDetection
+device = 'cpu'
+if torch.cuda.is_available():
+    device = torch.device('cuda')
+elif torch.backends.mps.is_available():
+    device = torch.device('mps')
+ckpt = 'yainage90/fashion-object-detection'
+image_processor = AutoImageProcessor.from_pretrained(ckpt)
+model = AutoModelForObjectDetection.from_pretrained(ckpt).to(device)
+image = Image.open('<path/to/image>').convert('RGB')
+with torch.no_grad():
+    inputs = image_processor(images=[image], return_tensors="pt")
+    outputs = model(**inputs.to(device))
+    target_sizes = torch.tensor([[image.size[1], image.size[0]]])
+    results = image_processor.post_process_object_detection(outputs, threshold=0.4, target_sizes=target_sizes)[0]
+    items = []
+    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        score = score.item()
+        label = label.item()
+        box = [i.item() for i in box]
+        print(f"{model.config.id2label[label]}: {round(score, 3)} at {box}")
+        items.append((score, label, box))
+```
+![sample_image](sample_image.png)

config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "_name_or_path": "./object_detection/model_ckpt",
+  "activation_dropout": 0.0,
+  "activation_function": "relu",
+  "architectures": [
+    "ConditionalDetrForObjectDetection"
+  ],
+  "attention_dropout": 0.0,
+  "auxiliary_loss": false,
+  "backbone": "resnet50",
+  "backbone_config": null,
+  "backbone_kwargs": {
+    "in_chans": 3,
+    "out_indices": [
+      1,
+      2,
+      3,
+      4
+    ]
+  },
+  "bbox_cost": 5,
+  "bbox_loss_coefficient": 5,
+  "class_cost": 2,
+  "cls_loss_coefficient": 2,
+  "d_model": 256,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "dice_loss_coefficient": 1,
+  "dilation": false,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "focal_alpha": 0.25,
+  "giou_cost": 2,
+  "giou_loss_coefficient": 2,
+  "id2label": {
+    "0": "bag",
+    "1": "bottom",
+    "2": "dress",
+    "3": "hat",
+    "4": "outer",
+    "5": "shoes",
+    "6": "top"
+  },
+  "init_std": 0.02,
+  "init_xavier_std": 1.0,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "bag": 0,
+    "bottom": 1,
+    "dress": 2,
+    "hat": 3,
+    "outer": 4,
+    "shoes": 5,
+    "top": 6
+  },
+  "mask_loss_coefficient": 1,
+  "max_position_embeddings": 1024,
+  "model_type": "conditional_detr",
+  "num_channels": 3,
+  "num_hidden_layers": 6,
+  "num_queries": 300,
+  "position_embedding_type": "sine",
+  "scale_embedding": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.0",
+  "use_pretrained_backbone": true,
+  "use_timm_backbone": true
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01f78edc0c0e3abfcf8b09555d04ab5a43216a501bf24559ce5619673c4ce824
+size 174081852

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "do_convert_annotations": true,
+  "do_normalize": true,
+  "do_pad": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "format": "coco_detection",
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "ConditionalDetrImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "pad_size": null,
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "longest_edge": 1333,
+    "shortest_edge": 800
+  }
+}

sample_image.png ADDED Viewed

Git LFS Details

SHA256: e9df36ba15d8d2fdd45f91c39e510214976f9dc60b4664e22dd5d8008e4e61d1
Pointer size: 131 Bytes
Size of remote file: 795 kB