Spaces:
Running
Running
from LLM_package import ObjectDetector | |
import os | |
from dotenv import load_dotenv | |
import numpy as np | |
import cv2 | |
from PIL import Image | |
load_dotenv(dotenv_path='../.env') | |
def llm_to_process_image(risk_level, image_path, point1, point2, thresholds=None): | |
print(risk_level, image_path, point1, point2, thresholds) | |
print('point1,point2', point1, point2) | |
GEMINI_API_KEY=os.getenv('GEMINI_API_KEY') | |
# 画像処理のロジックをここに追加 | |
Objectdetector = ObjectDetector(API_KEY=GEMINI_API_KEY) | |
debug_image_path='/test_llm.jpg' | |
Objectdetector.prompt_objects={'face', 'poster', 'Name tag', 'License plate', 'Digital screens', | |
'signboard', 'sign', 'logo', 'manhole', 'electricity pole', 'cardboard'} | |
# 画像の読み込みとRGB変換 | |
image = cv2.imread(image_path) | |
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
mask_llm = np.zeros(image.shape[:2], dtype=np.uint8) | |
llm_results = Objectdetector.detect_objects(image_path) | |
for result in llm_results: | |
bbox=result['box_2d'] | |
x1, y1 = int(bbox[1]* image.shape[1]), int(bbox[0]* image.shape[0]) | |
x2, y2 = int(bbox[3]* image.shape[1]), int(bbox[2]* image.shape[0]) | |
mask_llm[y1:y2, x1:x2] = 255 # テキスト領域をマスク | |
p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0]) | |
p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0]) | |
x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y)) | |
x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y)) | |
mask_llm[y_min:y_max, x_min:x_max] = 0 # 範囲を黒に設定 | |
save_dir = "./saved_images" | |
os.makedirs(save_dir, exist_ok=True) | |
debug_image_pil = Image.fromarray(mask_llm) | |
debug_image_pil.save(save_dir + debug_image_path) | |
llm_to_process_image(50, "../../16508.jpg", (0, 0), (0, 0), thresholds=None) | |