gopichandra commited on
Commit
a8683a1
·
verified ·
1 Parent(s): 76bc21b

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +16 -13
utils.py CHANGED
@@ -1,19 +1,22 @@
1
  import pytesseract
2
  from PIL import Image
3
  import re
4
- import io
5
 
6
- def extract_kyc_fields(image_bytes):
7
- image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
8
- text = pytesseract.image_to_string(image)
 
9
 
10
- # Extract Aadhaar number pattern XXXX-XXXX-XXXX or XXXX XXXX XXXX
11
- aadhaar = re.search(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', text)
12
- dob = re.search(r'\d{2}[\/\-]\d{2}[\/\-]\d{4}', text)
13
- name_match = re.findall(r'(?i)(Name|Narne|NAMF)\s*[:\-]?\s*(.*)', text)
14
 
15
- return {
16
- "aadhaar_number": aadhaar.group(0) if aadhaar else "Not found",
17
- "dob": dob.group(0) if dob else "Not found",
18
- "name": name_match[0][1] if name_match else "Not found"
19
- }
 
 
 
 
1
  import pytesseract
2
  from PIL import Image
3
  import re
 
4
 
5
+ def extract_kyc_fields(file_path):
6
+ try:
7
+ image = Image.open(file_path).convert("RGB")
8
+ text = pytesseract.image_to_string(image)
9
 
10
+ aadhaar = re.search(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', text)
11
+ dob = re.search(r'\d{2}[\/\-]\d{2}[\/\-]\d{4}', text)
12
+ name_line = next((line for line in text.split("\n") if re.search(r'(?i)name', line)), "")
13
+ name = name_line.split(":")[-1].strip() if ":" in name_line else name_line.strip()
14
 
15
+ return {
16
+ "aadhaar_number": aadhaar.group(0) if aadhaar else "Not found",
17
+ "dob": dob.group(0) if dob else "Not found",
18
+ "name": name if name else "Not found"
19
+ }
20
+
21
+ except Exception as e:
22
+ return {"error": str(e)}