Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from paddleocr import PaddleOCR
|
2 |
+
import gradio as gr
|
3 |
+
import re
|
4 |
+
|
5 |
+
# Initialize PaddleOCR
|
6 |
+
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
7 |
+
|
8 |
+
def extract_kyc_fields(image_path):
|
9 |
+
result = ocr.ocr(image_path, cls=True)
|
10 |
+
text_lines = [line[1][0] for line in result[0]]
|
11 |
+
|
12 |
+
output = {
|
13 |
+
"aadhaar_number": None,
|
14 |
+
"name": None,
|
15 |
+
"dob": None
|
16 |
+
}
|
17 |
+
|
18 |
+
# Aadhaar number
|
19 |
+
for text in text_lines:
|
20 |
+
match = re.search(r"\d{4}[\s-]?\d{4}[\s-]?\d{4}", text)
|
21 |
+
if match:
|
22 |
+
output["aadhaar_number"] = match.group().replace(" ", "-")
|
23 |
+
break
|
24 |
+
|
25 |
+
# DOB
|
26 |
+
for text in text_lines:
|
27 |
+
match = re.search(r"(\d{2,4})[/-](\d{1,2})[/-](\d{2,4})", text)
|
28 |
+
if match:
|
29 |
+
output["dob"] = match.group()
|
30 |
+
break
|
31 |
+
|
32 |
+
# Name
|
33 |
+
for text in text_lines:
|
34 |
+
if text.isupper() and not any(char.isdigit() for char in text):
|
35 |
+
output["name"] = text.title()
|
36 |
+
break
|
37 |
+
|
38 |
+
return output # Return dictionary directly (key-value)
|
39 |
+
|
40 |
+
# Gradio interface
|
41 |
+
gr.Interface(
|
42 |
+
fn=extract_kyc_fields,
|
43 |
+
inputs=gr.Image(type="filepath", label="Upload Aadhaar/PAN Card"),
|
44 |
+
outputs=gr.JSON(label="Extracted KYC Fields"), # Key-value format
|
45 |
+
title="🧠 Smart KYC OCR",
|
46 |
+
description="Upload Aadhaar/PAN card image and get extracted Name, Aadhaar number, and DOB as key-value output."
|
47 |
+
).launch()
|