Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
@@ -1,33 +1,32 @@
|
|
1 |
from paddleocr import PaddleOCR
|
2 |
import re
|
3 |
|
4 |
-
# Initialize OCR model
|
5 |
-
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
6 |
|
7 |
def extract_kyc_fields(file_path):
|
8 |
try:
|
9 |
-
# Run OCR
|
10 |
result = ocr.ocr(file_path, cls=True)
|
11 |
|
|
|
12 |
all_text = ""
|
13 |
for line_group in result:
|
14 |
for line in line_group:
|
15 |
all_text += line[1][0] + "\n"
|
16 |
|
17 |
-
# Aadhaar
|
18 |
aadhaar_match = re.search(r'\b\d{4}[\s\-]?\d{4}[\s\-]?\d{4}\b', all_text)
|
19 |
|
20 |
-
# DOB
|
21 |
dob_match = re.search(r'\b\d{2}[\/\-]\d{2}[\/\-]\d{4}\b', all_text)
|
22 |
|
23 |
-
# Name
|
24 |
name = "Not found"
|
25 |
for line in all_text.split("\n"):
|
26 |
if re.search(r'\b(name|naam|namf)\b', line, re.IGNORECASE):
|
27 |
name = line.split(":")[-1].strip() if ":" in line else line.strip()
|
28 |
break
|
29 |
-
|
30 |
-
if name == "Not found":
|
31 |
name = all_text.split("\n")[0].strip()
|
32 |
|
33 |
return {
|
|
|
1 |
from paddleocr import PaddleOCR
|
2 |
import re
|
3 |
|
4 |
+
# Initialize OCR model
|
5 |
+
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
6 |
|
7 |
def extract_kyc_fields(file_path):
|
8 |
try:
|
|
|
9 |
result = ocr.ocr(file_path, cls=True)
|
10 |
|
11 |
+
# Combine text lines
|
12 |
all_text = ""
|
13 |
for line_group in result:
|
14 |
for line in line_group:
|
15 |
all_text += line[1][0] + "\n"
|
16 |
|
17 |
+
# Aadhaar pattern
|
18 |
aadhaar_match = re.search(r'\b\d{4}[\s\-]?\d{4}[\s\-]?\d{4}\b', all_text)
|
19 |
|
20 |
+
# DOB pattern
|
21 |
dob_match = re.search(r'\b\d{2}[\/\-]\d{2}[\/\-]\d{4}\b', all_text)
|
22 |
|
23 |
+
# Name logic
|
24 |
name = "Not found"
|
25 |
for line in all_text.split("\n"):
|
26 |
if re.search(r'\b(name|naam|namf)\b', line, re.IGNORECASE):
|
27 |
name = line.split(":")[-1].strip() if ":" in line else line.strip()
|
28 |
break
|
29 |
+
if name == "Not found" and all_text.strip():
|
|
|
30 |
name = all_text.split("\n")[0].strip()
|
31 |
|
32 |
return {
|