Luke
commited on
Commit
·
68e1313
1
Parent(s):
03b6d75
no message
Browse files- .gitignore +2 -0
- Preprocess/preprocessImg.py +59 -0
- app.py +54 -12
- requirements.txt +2 -1
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.idea/*
|
| 2 |
+
*.pyc
|
Preprocess/preprocessImg.py
CHANGED
|
@@ -27,3 +27,62 @@ def preprocess_image002(image):
|
|
| 27 |
gray = cv2.bilateralFilter(gray, 11, 17, 17) # 雙邊濾波去噪
|
| 28 |
edged = cv2.Canny(gray, 30, 200) # 邊緣檢測
|
| 29 |
return Image.fromarray(edged)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
gray = cv2.bilateralFilter(gray, 11, 17, 17) # 雙邊濾波去噪
|
| 28 |
edged = cv2.Canny(gray, 30, 200) # 邊緣檢測
|
| 29 |
return Image.fromarray(edged)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# 方案三:自適應門檻和形態學變換
|
| 33 |
+
def preprocess_image003(image):
|
| 34 |
+
# 將 PIL Image 轉換為 numpy array
|
| 35 |
+
image_np = np.array(image)
|
| 36 |
+
# 轉為灰階影像
|
| 37 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
| 38 |
+
# 自適應門檻處理
|
| 39 |
+
adaptive_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
| 40 |
+
# 形態學變換 (開運算) 去除小噪點
|
| 41 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
| 42 |
+
morph = cv2.morphologyEx(adaptive_thresh, cv2.MORPH_OPEN, kernel)
|
| 43 |
+
return Image.fromarray(morph)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# 方案四:CLAHE(限制對比度自適應直方圖均衡)
|
| 47 |
+
def preprocess_image004(image):
|
| 48 |
+
# 將 PIL Image 轉換為 numpy array
|
| 49 |
+
image_np = np.array(image)
|
| 50 |
+
# 轉為灰階影像
|
| 51 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
| 52 |
+
# 應用 CLAHE
|
| 53 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 54 |
+
clahe_image = clahe.apply(gray)
|
| 55 |
+
# 二值化
|
| 56 |
+
_, binary = cv2.threshold(clahe_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 57 |
+
return Image.fromarray(binary)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# 方案五:直方圖均衡化和高斯模糊
|
| 61 |
+
def preprocess_image005(image):
|
| 62 |
+
# 將 PIL Image 轉換為 numpy array
|
| 63 |
+
image_np = np.array(image)
|
| 64 |
+
# 轉為灰階影像
|
| 65 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
| 66 |
+
# 直方圖均衡化
|
| 67 |
+
equalized = cv2.equalizeHist(gray)
|
| 68 |
+
# 高斯模糊
|
| 69 |
+
blurred = cv2.GaussianBlur(equalized, (5, 5), 0)
|
| 70 |
+
# 二值化
|
| 71 |
+
_, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 72 |
+
return Image.fromarray(binary)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
# 方案六:自適應去噪與銳化
|
| 76 |
+
def preprocess_image006(image):
|
| 77 |
+
# 將 PIL Image 轉換為 numpy array
|
| 78 |
+
image_np = np.array(image)
|
| 79 |
+
# 轉為灰階影像
|
| 80 |
+
gray = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY)
|
| 81 |
+
# 自適應去噪
|
| 82 |
+
denoised = cv2.fastNlMeansDenoising(gray, None, 30, 7, 21)
|
| 83 |
+
# 銳化
|
| 84 |
+
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
|
| 85 |
+
sharpened = cv2.filter2D(denoised, -1, kernel)
|
| 86 |
+
# 二值化
|
| 87 |
+
_, binary = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 88 |
+
return Image.fromarray(binary)
|
app.py
CHANGED
|
@@ -2,7 +2,10 @@ import os
|
|
| 2 |
import gradio as gr
|
| 3 |
from Plan.AiLLM import llm_recognition
|
| 4 |
from Plan.pytesseractOCR import ocr_recognition
|
| 5 |
-
from Preprocess.preprocessImg import
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# 取得所有語言清單
|
| 8 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
|
@@ -15,8 +18,18 @@ def preprocess_and_ocr(image, valid_type, language):
|
|
| 15 |
# 方案二
|
| 16 |
pre_img_002 = preprocess_image002(image)
|
| 17 |
ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
return pre_img_001, pre_img_002,
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def preprocess_and_llm(image, valid_type, language):
|
|
@@ -26,34 +39,63 @@ def preprocess_and_llm(image, valid_type, language):
|
|
| 26 |
# 方案二
|
| 27 |
pre_img_002 = preprocess_image002(image)
|
| 28 |
llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
return pre_img_001, pre_img_002,
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
with gr.Blocks() as demo:
|
| 34 |
with gr.Row():
|
| 35 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
| 36 |
-
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
| 37 |
-
preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
|
| 38 |
-
|
| 39 |
-
with gr.Row():
|
| 40 |
validation_type = gr.Dropdown(choices=["身分證正面", "身分證反面"], label="驗證類別")
|
| 41 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
| 42 |
-
# preprocessed_type = gr.Radio(["001", "002"], label="解析方案")
|
| 43 |
|
| 44 |
with gr.Row():
|
| 45 |
ocr_button = gr.Button("使用 OCR")
|
| 46 |
llm_button = gr.Button("使用 AI LLM")
|
| 47 |
|
| 48 |
with gr.Row():
|
|
|
|
| 49 |
ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
|
|
|
|
|
|
|
|
|
|
| 50 |
ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
|
| 55 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
|
| 57 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
demo.launch(share=False)
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from Plan.AiLLM import llm_recognition
|
| 4 |
from Plan.pytesseractOCR import ocr_recognition
|
| 5 |
+
from Preprocess.preprocessImg import (
|
| 6 |
+
preprocess_image001, preprocess_image002, preprocess_image003,
|
| 7 |
+
preprocess_image004, preprocess_image005
|
| 8 |
+
)
|
| 9 |
|
| 10 |
# 取得所有語言清單
|
| 11 |
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
|
|
|
|
| 18 |
# 方案二
|
| 19 |
pre_img_002 = preprocess_image002(image)
|
| 20 |
ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
|
| 21 |
+
# 方案三
|
| 22 |
+
pre_img_003 = preprocess_image003(image)
|
| 23 |
+
ocr_result_003 = ocr_recognition(pre_img_003, valid_type, language)
|
| 24 |
+
# 方案四
|
| 25 |
+
pre_img_004 = preprocess_image004(image)
|
| 26 |
+
ocr_result_004 = ocr_recognition(pre_img_004, valid_type, language)
|
| 27 |
+
# 方案五
|
| 28 |
+
pre_img_005 = preprocess_image005(image)
|
| 29 |
+
ocr_result_005 = ocr_recognition(pre_img_005, valid_type, language)
|
| 30 |
|
| 31 |
+
return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
|
| 32 |
+
ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005)
|
| 33 |
|
| 34 |
|
| 35 |
def preprocess_and_llm(image, valid_type, language):
|
|
|
|
| 39 |
# 方案二
|
| 40 |
pre_img_002 = preprocess_image002(image)
|
| 41 |
llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
|
| 42 |
+
# 方案三
|
| 43 |
+
pre_img_003 = preprocess_image003(image)
|
| 44 |
+
llm_result_003 = llm_recognition(pre_img_003, valid_type, language)
|
| 45 |
+
# 方案四
|
| 46 |
+
pre_img_004 = preprocess_image004(image)
|
| 47 |
+
llm_result_004 = llm_recognition(pre_img_004, valid_type, language)
|
| 48 |
+
# 方案五
|
| 49 |
+
pre_img_005 = preprocess_image005(image)
|
| 50 |
+
llm_result_005 = llm_recognition(pre_img_005, valid_type, language)
|
| 51 |
|
| 52 |
+
return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
|
| 53 |
+
llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005)
|
| 54 |
|
| 55 |
|
| 56 |
with gr.Blocks() as demo:
|
| 57 |
with gr.Row():
|
| 58 |
image_input = gr.Image(type="pil", label="上傳圖片")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
validation_type = gr.Dropdown(choices=["身分證正面", "身分證反面"], label="驗證類別")
|
| 60 |
language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")
|
|
|
|
| 61 |
|
| 62 |
with gr.Row():
|
| 63 |
ocr_button = gr.Button("使用 OCR")
|
| 64 |
llm_button = gr.Button("使用 AI LLM")
|
| 65 |
|
| 66 |
with gr.Row():
|
| 67 |
+
preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
|
| 68 |
ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
|
| 69 |
+
llm_output_001 = gr.JSON(label="AiLLM-001-解析結果")
|
| 70 |
+
with gr.Row():
|
| 71 |
+
preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
|
| 72 |
ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
|
| 73 |
+
llm_output_002 = gr.JSON(label="AiLLM-002-解析結果")
|
| 74 |
+
|
| 75 |
+
with gr.Row():
|
| 76 |
+
preprocess_output_003 = gr.Image(type="pil", label="預處理後的圖片-方案三")
|
| 77 |
+
ocr_output_003 = gr.JSON(label="OCR-003-解析結果")
|
| 78 |
+
llm_output_003 = gr.JSON(label="AiLLM-003-解析結果")
|
| 79 |
+
with gr.Row():
|
| 80 |
+
preprocess_output_004 = gr.Image(type="pil", label="預處理後的圖片-方案四")
|
| 81 |
+
ocr_output_004 = gr.JSON(label="OCR-004-解析結果")
|
| 82 |
+
llm_output_004 = gr.JSON(label="AiLLM-004-解析結果")
|
| 83 |
+
with gr.Row():
|
| 84 |
+
preprocess_output_005 = gr.Image(type="pil", label="預處理後的圖片-方案五")
|
| 85 |
+
ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
|
| 86 |
+
llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")
|
| 87 |
|
| 88 |
ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
|
| 89 |
+
outputs=[
|
| 90 |
+
preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
|
| 91 |
+
preprocess_output_005,
|
| 92 |
+
ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005
|
| 93 |
+
])
|
| 94 |
llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
|
| 95 |
+
outputs=[
|
| 96 |
+
preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
|
| 97 |
+
preprocess_output_005,
|
| 98 |
+
llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005
|
| 99 |
+
])
|
| 100 |
|
| 101 |
demo.launch(share=False)
|
requirements.txt
CHANGED
|
@@ -4,4 +4,5 @@ transformers
|
|
| 4 |
Pillow
|
| 5 |
torch
|
| 6 |
huggingface-hub
|
| 7 |
-
opencv-python
|
|
|
|
|
|
| 4 |
Pillow
|
| 5 |
torch
|
| 6 |
huggingface-hub
|
| 7 |
+
opencv-python
|
| 8 |
+
numpy
|