Spaces:

rein0421
/

AIdentify

Running

App Files Files

rein0421 commited on Nov 14, 2024

Commit

128a3a6

verified ·

1 Parent(s): de21c48

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -9

app.py CHANGED Viewed

@@ -222,6 +222,8 @@ def create_mask(image, x1, y1, x2, y2):
     cv2.rectangle(mask, (int(x1), int(y1)), (int(x2), int(y2)), 255, -1)
     return mask
 # 特殊な処理を行う関数
 def special_process_image_yolo(risk_level, image_path, point1, point2, thresholds=None):
     # デバイスの確認
@@ -239,6 +241,9 @@ def special_process_image_yolo(risk_level, image_path, point1, point2, threshold
     model = YOLO(model_path).to(device)
     print("モデルが正常にロードされ、デバイスに移動しました。")
     # タイムスタンプを作成
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
@@ -250,7 +255,6 @@ def special_process_image_yolo(risk_level, image_path, point1, point2, threshold
     def logistic_decay_for_label(risk_level, label_index, k=0.1, r0=50):
         base_decay = 1 / (1 + np.exp(-k * (risk_level - r0)))
-        # ラベルの順序に応じた減衰の段階を追加
         return max(base_decay + 0.05 * label_index, 0.01)
     adjusted_thresholds = {}
@@ -262,13 +266,19 @@ def special_process_image_yolo(risk_level, image_path, point1, point2, threshold
     image = cv2.imread(image_path)
     image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    # 推論実行
-    results = model(image_rgb)
-    # 初期化したマスク画像
-    mask = np.zeros(image.shape[:2], dtype=np.uint8)
-    # 全ての検出オブジェクトを対象としてマスク作成
     for box in results[0].boxes:
         x1, y1, x2, y2 = map(int, box.xyxy[0])
         confidence = box.conf[0]
@@ -278,14 +288,17 @@ def special_process_image_yolo(risk_level, image_path, point1, point2, threshold
         # オブジェクトの閾値を確認し、マスクを適用
         threshold = adjusted_thresholds.get(object_type, 0.5)
         if confidence >= threshold:
-            mask = create_mask(image, x1, y1, x2, y2)
     # 絶対座標に変換した点の範囲を黒に設定
     p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0])
     p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0])
     x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y))
     x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y))
-    mask[y_min:y_max, x_min:x_max] = 0  # 範囲を黒に設定
     # デバッグ用に白い長方形を描画
     debug_image = image_rgb.copy()
@@ -298,7 +311,7 @@ def special_process_image_yolo(risk_level, image_path, point1, point2, threshold
     debug_image_path = os.path.join(save_dir, f"debug_image_with_rectangle_{timestamp}.jpg")
     debug_image_pil.save(debug_image_path)
-    mask_image_pil = Image.fromarray(mask)
     mask_image_path = os.path.join(save_dir, f"final_mask_{timestamp}.jpg")
     mask_image_pil.save(mask_image_path)

     cv2.rectangle(mask, (int(x1), int(y1)), (int(x2), int(y2)), 255, -1)
     return mask
+import easyocr
 # 特殊な処理を行う関数
 def special_process_image_yolo(risk_level, image_path, point1, point2, thresholds=None):
     # デバイスの確認
     model = YOLO(model_path).to(device)
     print("モデルが正常にロードされ、デバイスに移動しました。")
+    # OCRモデルの初期化
+    reader = easyocr.Reader(['en', 'ja'])  # 言語は必要に応じて調整
     # タイムスタンプを作成
     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
     def logistic_decay_for_label(risk_level, label_index, k=0.1, r0=50):
         base_decay = 1 / (1 + np.exp(-k * (risk_level - r0)))
         return max(base_decay + 0.05 * label_index, 0.01)
     adjusted_thresholds = {}
     image = cv2.imread(image_path)
     image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # OCRによる文字領域マスク作成
+    mask_ocr = np.zeros(image.shape[:2], dtype=np.uint8)
+    ocr_results = reader.readtext(image_rgb)
+    for (bbox, text, ocr_conf) in ocr_results:
+        x1, y1 = int(bbox[0][0]), int(bbox[0][1])
+        x2, y2 = int(bbox[2][0]), int(bbox[2][1])
+        mask_ocr[y1:y2, x1:x2] = 255  # テキスト領域をマスク
+    # YOLO推論実行
+    results = model(image_rgb)
+    # YOLOによる物体検出マスク作成
+    mask_yolo = np.zeros(image.shape[:2], dtype=np.uint8)
     for box in results[0].boxes:
         x1, y1, x2, y2 = map(int, box.xyxy[0])
         confidence = box.conf[0]
         # オブジェクトの閾値を確認し、マスクを適用
         threshold = adjusted_thresholds.get(object_type, 0.5)
         if confidence >= threshold:
+            mask_yolo = create_mask(image, x1, y1, x2, y2)
+    # OCRマスクとYOLOマスクの結合
+    final_mask = cv2.bitwise_or(mask_ocr, mask_yolo)
     # 絶対座標に変換した点の範囲を黒に設定
     p1_x, p1_y = int(point1[0] * image.shape[1]), int(point1[1] * image.shape[0])
     p2_x, p2_y = int(point2[0] * image.shape[1]), int(point2[1] * image.shape[0])
     x_min, y_min = max(0, min(p1_x, p2_x)), max(0, min(p1_y, p2_y))
     x_max, y_max = min(image.shape[1], max(p1_x, p2_x)), min(image.shape[0], max(p1_y, p2_y))
+    final_mask[y_min:y_max, x_min:x_max] = 0  # 範囲を黒に設定
     # デバッグ用に白い長方形を描画
     debug_image = image_rgb.copy()
     debug_image_path = os.path.join(save_dir, f"debug_image_with_rectangle_{timestamp}.jpg")
     debug_image_pil.save(debug_image_path)
+    mask_image_pil = Image.fromarray(final_mask)
     mask_image_path = os.path.join(save_dir, f"final_mask_{timestamp}.jpg")
     mask_image_pil.save(mask_image_path)