Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import easyocr
|
3 |
+
import numpy as np
|
4 |
+
import cv2
|
5 |
+
from PIL import Image
|
6 |
+
import re
|
7 |
+
import json
|
8 |
+
|
9 |
+
# Initialisation d'EasyOCR
|
10 |
+
reader = easyocr.Reader(['fr', 'en'])
|
11 |
+
|
12 |
+
def preprocess_image(pil_image):
|
13 |
+
img = np.array(pil_image)
|
14 |
+
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
15 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
16 |
+
gray = cv2.equalizeHist(gray)
|
17 |
+
blurred = cv2.GaussianBlur(gray, (3, 3), 0)
|
18 |
+
coords = np.column_stack(np.where(blurred > 0))
|
19 |
+
angle = cv2.minAreaRect(coords)[-1]
|
20 |
+
angle = -(90 + angle) if angle < -45 else -angle
|
21 |
+
(h, w) = blurred.shape
|
22 |
+
M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
|
23 |
+
deskewed = cv2.warpAffine(blurred, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
24 |
+
thresh = cv2.adaptiveThreshold(deskewed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
25 |
+
return thresh
|
26 |
+
|
27 |
+
def ocr_easyocr(image_np):
|
28 |
+
results = reader.readtext(image_np)
|
29 |
+
texte = "\n".join([text[1] for text in results])
|
30 |
+
return texte
|
31 |
+
|
32 |
+
def extract_fields(text):
|
33 |
+
data = {}
|
34 |
+
text = text.upper()
|
35 |
+
patterns = {
|
36 |
+
"nom": r"(NOM)[\s:]+([A-Z\-]+)",
|
37 |
+
"prenom": r"(PRENOM)[\s:]+([A-Z\-]+)",
|
38 |
+
"sexe": r"(SEXE)[\s:]+([FM])",
|
39 |
+
"taille": r"(TAILLE)[\s:]+([0-9,]+\s?M)",
|
40 |
+
"nationalite": r"(NATIONALITE)[\s:]+([A-Z]+)",
|
41 |
+
"date_naissance": r"(\d{2}\s(?:JAN|FEB|MAR|APR|MAI|JUN|JUL|AOU|SEP|OCT|NOV|DEC)\s\d{4})",
|
42 |
+
"numero_id": r"([0-9]{16})",
|
43 |
+
"code_pays": r"\bGIN\b",
|
44 |
+
"nin": r"\b[0-9]{15}\b",
|
45 |
+
"lieu_naissance": r"(NAISSANCE|LIEU)[\s:]+([A-Z\-]+)",
|
46 |
+
"prefecture": r"(PREFECTURE)[\s:]+([A-Z\-]+)",
|
47 |
+
"date_emission": r"(EMISSION)[\s:]+(\d{2}\s\w+\s\d{4})",
|
48 |
+
"date_expiration": r"(EXPIRATION)[\s:]+(\d{2}\s\w+\s\d{4})",
|
49 |
+
}
|
50 |
+
|
51 |
+
for key, pattern in patterns.items():
|
52 |
+
match = re.search(pattern, text)
|
53 |
+
if match:
|
54 |
+
data[key] = match.group(2) if len(match.groups()) > 1 else match.group(1)
|
55 |
+
|
56 |
+
return data
|
57 |
+
|
58 |
+
def analyser_carte(recto_img, verso_img):
|
59 |
+
try:
|
60 |
+
recto = preprocess_image(recto_img)
|
61 |
+
verso = preprocess_image(verso_img)
|
62 |
+
text_r = ocr_easyocr(recto)
|
63 |
+
text_v = ocr_easyocr(verso)
|
64 |
+
texte_total = text_r + "\n" + text_v
|
65 |
+
champs = extract_fields(texte_total)
|
66 |
+
return texte_total, champs
|
67 |
+
except Exception as e:
|
68 |
+
return f"Erreur : {str(e)}", {}
|
69 |
+
|
70 |
+
interface = gr.Interface(
|
71 |
+
fn=analyser_carte,
|
72 |
+
inputs=[
|
73 |
+
gr.Image(type="pil", label="Recto Carte d'identité"),
|
74 |
+
gr.Image(type="pil", label="Verso Carte d'identité")
|
75 |
+
],
|
76 |
+
outputs=[
|
77 |
+
gr.Textbox(label="Texte brut extrait"),
|
78 |
+
gr.JSON(label="Champs extraits")
|
79 |
+
],
|
80 |
+
title="OCRIA - Lecture intelligente de carte d'identité guinéenne",
|
81 |
+
description="Téléversez le recto et le verso d'une carte d'identité pour une extraction automatique des informations par IA.",
|
82 |
+
)
|
83 |
+
|
84 |
+
interface.launch()
|