Spaces:
Runtime error
Runtime error
File size: 6,278 Bytes
9bc9c3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import json
import random
import io
from PIL import Image, ImageDraw, ImageFont
import os
from PIL import ImageColor
# Costante per il fattore di normalizzazione usato nelle coordinate
NORMALIZATION_DIVISOR = 1000
# @title Parsing JSON output
def parse_json(json_output: str):
# Parsing out the markdown fencing
lines = json_output.splitlines()
for i, line in enumerate(lines):
if line == "```json":
json_output = "\n".join(lines[i+1:]) # Remove everything before "```json"
json_output = json_output.split("```")[0] # Remove everything after the closing "```"
break # Exit the loop once "```json" is found
return json_output
# @title Plotting Util
additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
def plot_bounding_boxes(im, bounding_boxes):
"""
Plots bounding boxes on an image with markers for each a name, using PIL, normalized coordinates, and different colors.
Args:
img_path: The path to the image file.
bounding_boxes: A list of bounding boxes containing the name of the object
and their positions in normalized [y1 x1 y2 x2] format.
"""
# Load the image
img = im
width, height = img.size
print(img.size)
# Create a drawing object
draw = ImageDraw.Draw(img)
# Define a list of colors
colors = [
'red',
'green',
'blue',
'yellow',
'orange',
'pink',
'purple',
'brown',
'gray',
'beige',
'turquoise',
'cyan',
'magenta',
'lime',
'navy',
'maroon',
'teal',
'olive',
'coral',
'lavender',
'violet',
'gold',
'silver',
] + additional_colors
# Parsing out the markdown fencing
bounding_boxes = parse_json(bounding_boxes)
# Iterate over the bounding boxes
for i, bounding_box in enumerate(json.loads(bounding_boxes)):
# Select a color from the list
color = colors[i % len(colors)]
# Convert normalized coordinates to absolute coordinates
abs_y1 = int(bounding_box["box_2d"][0] / NORMALIZATION_DIVISOR * height)
abs_x1 = int(bounding_box["box_2d"][1] / NORMALIZATION_DIVISOR * width)
abs_y2 = int(bounding_box["box_2d"][2] / NORMALIZATION_DIVISOR * height)
abs_x2 = int(bounding_box["box_2d"][3] / NORMALIZATION_DIVISOR * width)
if abs_x1 > abs_x2:
abs_x1, abs_x2 = abs_x2, abs_x1
if abs_y1 > abs_y2:
abs_y1, abs_y2 = abs_y2, abs_y1
# Draw the bounding box
draw.rectangle(
((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4
)
# Draw the text
if "label" in bounding_box:
draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color)
# Display the image
img.show()
def save_cropped_images(
im: Image.Image, bounding_boxes_json_str: str, output_folder: str = "output_llm"
) -> list[str]:
"""
Ritaglia oggetti da un'immagine in base alle bounding box e li salva in una cartella specificata.
Args:
im: L'oggetto PIL.Image.
bounding_boxes_json_str: Una stringa JSON contenente le bounding box.
Ogni box dovrebbe avere "label" e "box_2d"
(coordinate normalizzate [y1, x1, y2, x2] su base NORMALIZATION_DIVISOR).
output_folder: La cartella dove verranno salvate le immagini ritagliate. Default "files".
Returns:
list[str]: Una lista dei percorsi ai file delle immagini ritagliate salvate con successo.
"""
saved_file_paths = []
os.makedirs(output_folder, exist_ok=True)
width, height = im.size
# Parsing della stringa JSON
parsed_json_str = parse_json(bounding_boxes_json_str)
try:
bounding_boxes_list = json.loads(parsed_json_str)
except json.JSONDecodeError as e:
print(f"Errore nel decodificare JSON: {e}")
return saved_file_paths # Ritorna lista vuota in caso di errore JSON iniziale
filename_counts = {} # Per gestire etichette duplicate
for i, bounding_box in enumerate(bounding_boxes_list):
if "box_2d" not in bounding_box:
print(f"Bounding box {i} saltata: chiave 'box_2d' mancante.")
continue
if len(bounding_box["box_2d"]) != 4:
print(f"Bounding box {i} saltata: 'box_2d' non ha 4 coordinate.")
continue
# Converte coordinate normalizzate in coordinate assolute
# box_2d è [y1, x1, y2, x2]
abs_y1 = round(bounding_box["box_2d"][0] / NORMALIZATION_DIVISOR * height)
abs_x1 = round(bounding_box["box_2d"][1] / NORMALIZATION_DIVISOR * width)
abs_y2 = round(bounding_box["box_2d"][2] / NORMALIZATION_DIVISOR * height)
abs_x2 = round(bounding_box["box_2d"][3] / NORMALIZATION_DIVISOR * width)
# Assicura che abs_x1 sia sinistra, abs_x2 destra, abs_y1 alto, abs_y2 basso
# per la funzione crop di PIL che richiede (left, upper, right, lower)
crop_left = min(abs_x1, abs_x2)
crop_upper = min(abs_y1, abs_y2)
crop_right = max(abs_x1, abs_x2)
crop_lower = max(abs_y1, abs_y2)
if crop_left >= crop_right or crop_upper >= crop_lower:
label_for_log = bounding_box.get('label', f'indice {i}')
print(f"Bounding box per '{label_for_log}' saltata: area nulla ({crop_left},{crop_upper},{crop_right},{crop_lower})")
continue
cropped_image = im.crop((crop_left, crop_upper, crop_right, crop_lower))
label = bounding_box.get("label", f"unlabeled_crop_{i}")
safe_label = "".join(c for c in label if c.isalnum() or c in (' ', '_', '-')).strip().replace(' ', '_')
if not safe_label:
safe_label = f"unlabeled_crop_{i}"
count = filename_counts.get(safe_label, 0)
filename_counts[safe_label] = count + 1
output_filename = f"{safe_label}_{count}.png" if count > 0 else f"{safe_label}.png"
output_path = os.path.join(output_folder, output_filename)
try:
cropped_image.save(output_path)
saved_file_paths.append(output_path)
except Exception as e:
print(f"Errore nel salvare l'immagine {output_path}: {e}")
return saved_file_paths |