Balde-maria2005 commited on
Commit
1819aaf
·
verified ·
1 Parent(s): aa0f0ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py CHANGED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import easyocr
3
+ import numpy as np
4
+ import cv2
5
+ from PIL import Image
6
+ import re
7
+ import json
8
+
9
+ # Initialisation d'EasyOCR
10
+ reader = easyocr.Reader(['fr', 'en'])
11
+
12
+ def preprocess_image(pil_image):
13
+ img = np.array(pil_image)
14
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
15
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
16
+ gray = cv2.equalizeHist(gray)
17
+ blurred = cv2.GaussianBlur(gray, (3, 3), 0)
18
+ coords = np.column_stack(np.where(blurred > 0))
19
+ angle = cv2.minAreaRect(coords)[-1]
20
+ angle = -(90 + angle) if angle < -45 else -angle
21
+ (h, w) = blurred.shape
22
+ M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
23
+ deskewed = cv2.warpAffine(blurred, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
24
+ thresh = cv2.adaptiveThreshold(deskewed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
25
+ return thresh
26
+
27
+ def ocr_easyocr(image_np):
28
+ results = reader.readtext(image_np)
29
+ texte = "\n".join([text[1] for text in results])
30
+ return texte
31
+
32
+ def extract_fields(text):
33
+ data = {}
34
+ text = text.upper()
35
+ patterns = {
36
+ "nom": r"(NOM)[\s:]+([A-Z\-]+)",
37
+ "prenom": r"(PRENOM)[\s:]+([A-Z\-]+)",
38
+ "sexe": r"(SEXE)[\s:]+([FM])",
39
+ "taille": r"(TAILLE)[\s:]+([0-9,]+\s?M)",
40
+ "nationalite": r"(NATIONALITE)[\s:]+([A-Z]+)",
41
+ "date_naissance": r"(\d{2}\s(?:JAN|FEB|MAR|APR|MAI|JUN|JUL|AOU|SEP|OCT|NOV|DEC)\s\d{4})",
42
+ "numero_id": r"([0-9]{16})",
43
+ "code_pays": r"\bGIN\b",
44
+ "nin": r"\b[0-9]{15}\b",
45
+ "lieu_naissance": r"(NAISSANCE|LIEU)[\s:]+([A-Z\-]+)",
46
+ "prefecture": r"(PREFECTURE)[\s:]+([A-Z\-]+)",
47
+ "date_emission": r"(EMISSION)[\s:]+(\d{2}\s\w+\s\d{4})",
48
+ "date_expiration": r"(EXPIRATION)[\s:]+(\d{2}\s\w+\s\d{4})",
49
+ }
50
+
51
+ for key, pattern in patterns.items():
52
+ match = re.search(pattern, text)
53
+ if match:
54
+ data[key] = match.group(2) if len(match.groups()) > 1 else match.group(1)
55
+
56
+ return data
57
+
58
+ def analyser_carte(recto_img, verso_img):
59
+ try:
60
+ recto = preprocess_image(recto_img)
61
+ verso = preprocess_image(verso_img)
62
+ text_r = ocr_easyocr(recto)
63
+ text_v = ocr_easyocr(verso)
64
+ texte_total = text_r + "\n" + text_v
65
+ champs = extract_fields(texte_total)
66
+ return texte_total, champs
67
+ except Exception as e:
68
+ return f"Erreur : {str(e)}", {}
69
+
70
+ interface = gr.Interface(
71
+ fn=analyser_carte,
72
+ inputs=[
73
+ gr.Image(type="pil", label="Recto Carte d'identité"),
74
+ gr.Image(type="pil", label="Verso Carte d'identité")
75
+ ],
76
+ outputs=[
77
+ gr.Textbox(label="Texte brut extrait"),
78
+ gr.JSON(label="Champs extraits")
79
+ ],
80
+ title="OCRIA - Lecture intelligente de carte d'identité guinéenne",
81
+ description="Téléversez le recto et le verso d'une carte d'identité pour une extraction automatique des informations par IA.",
82
+ )
83
+
84
+ interface.launch()