File size: 1,934 Bytes
709c305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from LLM_package import ObjectDetector
import os
from dotenv import load_dotenv
import numpy as np
import cv2
from PIL import Image
load_dotenv(dotenv_path='../.env')
def llm_to_process_image(risk_level, image_path, point1, point2, thresholds=None):
    print(risk_level, image_path, point1, point2, thresholds)
    print('point1,point2', point1, point2)
    GEMINI_API_KEY=os.getenv('GEMINI_API_KEY')
    # 画像処理のロジックをここに追加
    Objectdetector = ObjectDetector(API_KEY=GEMINI_API_KEY)
    debug_image_path='/test_llm.jpg'
    Objectdetector.prompt_objects={'face', 'poster', 'Name tag', 'License plate', 'Digital screens',
        'signboard', 'sign', 'logo', 'manhole', 'electricity pole', 'cardboard'}
    # 画像の読み込みとRGB変換
    
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    mask_llm = np.zeros(image.shape[:2], dtype=np.uint8)
    llm_results = Objectdetector.detect_objects(image_path)
    for result in llm_results:
            bbox=result['box_2d']
            x1, y1 = int(bbox[1]* image.shape[1]), int(bbox[0]* image.shape[0])
            x2, y2 = int(bbox[3]* image.shape[1]), int(bbox[2]* image.shape[0])
            mask_llm[y1:y2, x1:x2] = 255  # テキスト領域をマスク
    p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0])
    p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0])
    x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y))
    x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y))
    mask_llm[y_min:y_max, x_min:x_max] = 0  # 範囲を黒に設定
    save_dir = "./saved_images"
    os.makedirs(save_dir, exist_ok=True)
    debug_image_pil = Image.fromarray(mask_llm)
    debug_image_pil.save(save_dir + debug_image_path)

llm_to_process_image(50, "../../16508.jpg", (0, 0), (0, 0), thresholds=None)