from LLM_package import ObjectDetector import os from dotenv import load_dotenv import numpy as np import cv2 from PIL import Image load_dotenv(dotenv_path='../.env') def llm_to_process_image(risk_level, image_path, point1, point2, thresholds=None): print(risk_level, image_path, point1, point2, thresholds) print('point1,point2', point1, point2) GEMINI_API_KEY=os.getenv('GEMINI_API_KEY') # 画像処理のロジックをここに追加 Objectdetector = ObjectDetector(API_KEY=GEMINI_API_KEY) debug_image_path='/test_llm.jpg' Objectdetector.prompt_objects={'face', 'poster', 'Name tag', 'License plate', 'Digital screens', 'signboard', 'sign', 'logo', 'manhole', 'electricity pole', 'cardboard'} # 画像の読み込みとRGB変換 image = cv2.imread(image_path) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) mask_llm = np.zeros(image.shape[:2], dtype=np.uint8) llm_results = Objectdetector.detect_objects(image_path) for result in llm_results: bbox=result['box_2d'] x1, y1 = int(bbox[1]* image.shape[1]), int(bbox[0]* image.shape[0]) x2, y2 = int(bbox[3]* image.shape[1]), int(bbox[2]* image.shape[0]) mask_llm[y1:y2, x1:x2] = 255 # テキスト領域をマスク p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0]) p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0]) x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y)) x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y)) mask_llm[y_min:y_max, x_min:x_max] = 0 # 範囲を黒に設定 save_dir = "./saved_images" os.makedirs(save_dir, exist_ok=True) debug_image_pil = Image.fromarray(mask_llm) debug_image_pil.save(save_dir + debug_image_path) llm_to_process_image(50, "../../16508.jpg", (0, 0), (0, 0), thresholds=None)