gopichandra commited on
Commit
c70099c
·
verified ·
1 Parent(s): 92e13ee

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +19 -0
utils.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from PIL import Image
3
+ import re
4
+ import io
5
+
6
+ def extract_kyc_fields(image_bytes):
7
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
8
+ text = pytesseract.image_to_string(image)
9
+
10
+ # Extract Aadhaar number pattern XXXX-XXXX-XXXX or XXXX XXXX XXXX
11
+ aadhaar = re.search(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', text)
12
+ dob = re.search(r'\d{2}[\/\-]\d{2}[\/\-]\d{4}', text)
13
+ name_match = re.findall(r'(?i)(Name|Narne|NAMF)\s*[:\-]?\s*(.*)', text)
14
+
15
+ return {
16
+ "aadhaar_number": aadhaar.group(0) if aadhaar else "Not found",
17
+ "dob": dob.group(0) if dob else "Not found",
18
+ "name": name_match[0][1] if name_match else "Not found"
19
+ }