gopichandra commited on
Commit
c8d472e
·
verified ·
1 Parent(s): 7b7ee99

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from paddleocr import PaddleOCR
2
+ import gradio as gr
3
+ import re
4
+
5
+ # Initialize PaddleOCR
6
+ ocr = PaddleOCR(use_angle_cls=True, lang='en')
7
+
8
+ def extract_kyc_fields(image_path):
9
+ result = ocr.ocr(image_path, cls=True)
10
+ text_lines = [line[1][0] for line in result[0]]
11
+
12
+ output = {
13
+ "aadhaar_number": None,
14
+ "name": None,
15
+ "dob": None
16
+ }
17
+
18
+ # Aadhaar number
19
+ for text in text_lines:
20
+ match = re.search(r"\d{4}[\s-]?\d{4}[\s-]?\d{4}", text)
21
+ if match:
22
+ output["aadhaar_number"] = match.group().replace(" ", "-")
23
+ break
24
+
25
+ # DOB
26
+ for text in text_lines:
27
+ match = re.search(r"(\d{2,4})[/-](\d{1,2})[/-](\d{2,4})", text)
28
+ if match:
29
+ output["dob"] = match.group()
30
+ break
31
+
32
+ # Name
33
+ for text in text_lines:
34
+ if text.isupper() and not any(char.isdigit() for char in text):
35
+ output["name"] = text.title()
36
+ break
37
+
38
+ return output # Return dictionary directly (key-value)
39
+
40
+ # Gradio interface
41
+ gr.Interface(
42
+ fn=extract_kyc_fields,
43
+ inputs=gr.Image(type="filepath", label="Upload Aadhaar/PAN Card"),
44
+ outputs=gr.JSON(label="Extracted KYC Fields"), # Key-value format
45
+ title="🧠 Smart KYC OCR",
46
+ description="Upload Aadhaar/PAN card image and get extracted Name, Aadhaar number, and DOB as key-value output."
47
+ ).launch()