Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import cv2 | |
| import numpy as np | |
| from typing import List | |
| import torch | |
| import random | |
| from ultralytics import YOLOWorld | |
| import json | |
| class YoloWorld: | |
| def __init__(self,model_name = "yolov8x-worldv2.pt"): | |
| self.model = YOLOWorld(model_name) | |
| self.model.to(device='cpu') | |
| def run_inference(self,image_path:str,object_prompts:List): | |
| object_details = [] | |
| self.model.set_classes(object_prompts) | |
| results = self.model.predict(image_path) | |
| for result in results: | |
| for box in result.boxes: | |
| object_data = {} | |
| x1, y1, x2, y2 = np.array(box.xyxy.cpu(), dtype=np.int32).squeeze() | |
| c1,c2 = (x1,y1),(x2,y2) | |
| confidence = round(float(box.conf.cpu()),2) | |
| label = f'{results[0].names[int(box.cls)]}' # [{100*round(confidence,2)}%]' | |
| print("Object Name :{} Bounding Box:{},{} Confidence score {}\n ".format(label ,c1 ,c2,confidence)) | |
| object_data[label] = { | |
| 'bounding_box':[x1,y1,x2,y2], | |
| 'confidence':confidence | |
| } | |
| object_details.append(object_data) | |
| return object_details | |
| def draw_bboxes(rgb_frame,boxes,labels,line_thickness=3): | |
| rgb_frame = cv2.cvtColor(cv2.imread(rgb_frame),cv2.COLOR_BGR2RGB) | |
| tl = line_thickness or round(0.002 * (rgb_frame.shape[0] + rgb_frame.shape[1]) / 2) + 1 # line/font thickness | |
| rgb_frame_copy = rgb_frame.copy() | |
| color_dict = {} | |
| # color = color or [random.randint(0, 255) for _ in range(3)] | |
| for item in np.unique(np.asarray(labels)): | |
| color_dict[item] = [random.randint(28, 255) for _ in range(3)] | |
| for box,label in zip(boxes,labels): | |
| if box.type() == 'torch.IntTensor': | |
| box = box.numpy() | |
| # extract coordinates | |
| x1,y1,x2,y2 = box | |
| c1,c2 = (x1,y1),(x2,y2) | |
| # Draw rectangle | |
| cv2.rectangle(rgb_frame_copy, c1,c2, color_dict[label], thickness=tl, lineType=cv2.LINE_AA) | |
| tf = max(tl - 1, 1) # font thickness | |
| # label = label2id[int(label.numpy())] | |
| t_size = cv2.getTextSize(str(label), 0, fontScale=tl / 3, thickness=tf)[0] | |
| c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 | |
| cv2.putText(rgb_frame_copy, str(label), (c1[0], c1[1] - 2), 0, tl / 3, color_dict[label], thickness=tf, lineType=cv2.LINE_AA) | |
| return rgb_frame_copy | |
| def format_detections(self,boxes,labels): | |
| text = "" | |
| for box ,label in zip(boxes,labels): | |
| text+="{}\tBounding Box :{}\n".format(label,box) | |
| return (text) | |
| def run_yolo_infer(self,image_path:str,object_prompts:List): | |
| processed_predictions = [] | |
| bounding_boxes = [] | |
| labels = [] | |
| scores = [] | |
| self.model.set_classes(object_prompts) | |
| results = self.model.predict(image_path,conf=0.40) | |
| for result in results: | |
| for i,box in enumerate(result.boxes): | |
| x1, y1, x2, y2 = np.array(box.xyxy.cpu(), dtype=np.int32).squeeze() | |
| bounding_boxes.append([x1,y1,x2,y2]) | |
| labels.append(result.names[int(box.cls.cpu())]) | |
| scores.append(round(float(box.conf.cpu()),2)) | |
| processed_predictions.append(dict( | |
| boxes= torch.tensor(bounding_boxes), | |
| labels= labels, | |
| scores= torch.tensor(scores) | |
| ) | |
| ) | |
| detected_image = self.draw_bboxes(rgb_frame=image_path, | |
| boxes=processed_predictions[0]['boxes'], | |
| labels=processed_predictions[0]['labels'] | |
| ) | |
| predicted_data = self.format_detections(bounding_boxes,labels) | |
| # save image | |
| cv2.imwrite('final_mask.png', cv2.cvtColor(detected_image,cv2.COLOR_BGR2RGB)) | |
| return "Predicted image : final_mask.png .\nDetails :\n{}".format(predicted_data) | |
| if __name__ == "__main__": | |
| yolo = YoloWorld() | |
| predicted_data = yolo.run_yolo_infer('../image_store/demo2.jpg',['person','hat','building']) | |
| print(predicted_data) | |