Working on understanding and getting something to work
Browse files- understand.py +41 -5
understand.py
CHANGED
|
@@ -6,7 +6,8 @@ import numpy as np
|
|
| 6 |
from PIL import Image
|
| 7 |
|
| 8 |
from transformers import DetrFeatureExtractor, DetrForSegmentation, MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
|
| 9 |
-
from transformers.models.detr.feature_extraction_detr import rgb_to_id
|
|
|
|
| 10 |
|
| 11 |
TEST_IMAGE = Image.open(r"images/Test_Street_VisDrone.JPG")
|
| 12 |
MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic"
|
|
@@ -21,15 +22,50 @@ model_name = MODEL_NAME_MASKFORMER
|
|
| 21 |
|
| 22 |
# Starting with MaskFormer
|
| 23 |
|
| 24 |
-
processor = MaskFormerImageProcessor.from_pretrained(model_name)
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
|
|
|
|
|
|
| 27 |
model.to(DEVICE)
|
| 28 |
|
| 29 |
# img = np.array(TEST_IMAGE)
|
| 30 |
|
| 31 |
-
inputs = processor(images=image, return_tensors="pt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
inputs.to(DEVICE)
|
| 33 |
|
| 34 |
|
| 35 |
-
outputs = model(**inputs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from PIL import Image
|
| 7 |
|
| 8 |
from transformers import DetrFeatureExtractor, DetrForSegmentation, MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
|
| 9 |
+
# from transformers.models.detr.feature_extraction_detr import rgb_to_id
|
| 10 |
+
from transformers.image_transforms import rgb_to_id
|
| 11 |
|
| 12 |
TEST_IMAGE = Image.open(r"images/Test_Street_VisDrone.JPG")
|
| 13 |
MODEL_NAME_DETR = "facebook/detr-resnet-50-panoptic"
|
|
|
|
| 22 |
|
| 23 |
# Starting with MaskFormer
|
| 24 |
|
| 25 |
+
processor = MaskFormerImageProcessor.from_pretrained(model_name) # <class 'transformers.models.maskformer.image_processing_maskformer.MaskFormerImageProcessor'>
|
| 26 |
+
# DIR() --> ['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__',
|
| 27 |
+
# '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__',
|
| 28 |
+
# '__weakref__', '_auto_class', '_create_repo', '_get_files_timestamps', '_max_size', '_pad_image', '_preprocess', '_preprocess_image', '_preprocess_mask', '_processor_class',
|
| 29 |
+
# '_set_processor_class', '_upload_modified_files', 'center_crop', 'convert_segmentation_map_to_binary_masks', 'do_normalize', 'do_reduce_labels', 'do_rescale', 'do_resize',
|
| 30 |
+
# 'encode_inputs', 'fetch_images', 'from_dict', 'from_json_file', 'from_pretrained', 'get_image_processor_dict', 'ignore_index', 'image_mean', 'image_std', 'model_input_names',
|
| 31 |
+
# 'normalize', 'pad', 'post_process_instance_segmentation', 'post_process_panoptic_segmentation', 'post_process_segmentation', 'post_process_semantic_segmentation', 'preprocess',
|
| 32 |
+
# 'push_to_hub', 'register_for_auto_class', 'resample', 'rescale', 'rescale_factor', 'resize', 'save_pretrained', 'size', 'size_divisor', 'to_dict', 'to_json_file', 'to_json_string']
|
| 33 |
|
| 34 |
+
model = MaskFormerForInstanceSegmentation.from_pretrained(model_name) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentation'>
|
| 35 |
+
# DIR for model was too big
|
| 36 |
model.to(DEVICE)
|
| 37 |
|
| 38 |
# img = np.array(TEST_IMAGE)
|
| 39 |
|
| 40 |
+
inputs = processor(images=image, return_tensors="pt") # <class 'transformers.image_processing_utils.BatchFeature'>
|
| 41 |
+
# DIR() --> ['_MutableMapping__marker', '__abstractmethods__', '__class__', '__contains__', '__copy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__',
|
| 42 |
+
# '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__',
|
| 43 |
+
# '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__slots__', '__str__',
|
| 44 |
+
# '__subclasshook__', '__weakref__', '_abc_impl', '_get_is_as_tensor_fns', 'clear', 'convert_to_tensors', 'copy', 'data', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem',
|
| 45 |
+
# 'setdefault', 'to', 'update', 'values']
|
| 46 |
inputs.to(DEVICE)
|
| 47 |
|
| 48 |
|
| 49 |
+
outputs = model(**inputs) # <class 'transformers.models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput'>
|
| 50 |
+
# Each element of this class is a <class 'torch.Tensor'>
|
| 51 |
+
# DIR() --> ['__annotations__', '__class__', '__contains__', '__dataclass_fields__', '__dataclass_params__', '__delattr__', '__delitem__', '__dict__', '__dir__',
|
| 52 |
+
# '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__',
|
| 53 |
+
# '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__post_init__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__setattr__',
|
| 54 |
+
# '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'attentions', 'auxiliary_logits', 'class_queries_logits', 'clear', 'copy', 'encoder_hidden_states',
|
| 55 |
+
# 'encoder_last_hidden_state', 'fromkeys', 'get', 'hidden_states', 'items', 'keys', 'loss', 'masks_queries_logits', 'move_to_end', 'pixel_decoder_hidden_states',
|
| 56 |
+
# 'pixel_decoder_last_hidden_state', 'pop', 'popitem', 'setdefault', 'to_tuple', 'transformer_decoder_hidden_states', 'transformer_decoder_last_hidden_state',
|
| 57 |
+
# 'update', 'values']
|
| 58 |
+
|
| 59 |
+
results = processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
|
| 60 |
+
# <class 'dict'>
|
| 61 |
+
# Example of
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# From Tutorial (Box 79)
|
| 65 |
+
# def get_mask(segment_idx):
|
| 66 |
+
# segment = results['segments_info'][segment_idx]
|
| 67 |
+
# print("Visualizing mask for:", id2label[segment['label_id']])
|
| 68 |
+
# mask = (predicted_panoptic_seg == segment['id'])
|
| 69 |
+
# visual_mask = (mask * 255).astype(np.uint8)
|
| 70 |
+
# return Image.fromarray(visual_mask)
|
| 71 |
+
|