Spaces:
Sleeping
Sleeping
import pytesseract | |
from PIL import Image | |
import re | |
import io | |
def extract_kyc_fields(image_bytes): | |
image = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
text = pytesseract.image_to_string(image) | |
# Extract Aadhaar number pattern XXXX-XXXX-XXXX or XXXX XXXX XXXX | |
aadhaar = re.search(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', text) | |
dob = re.search(r'\d{2}[\/\-]\d{2}[\/\-]\d{4}', text) | |
name_match = re.findall(r'(?i)(Name|Narne|NAMF)\s*[:\-]?\s*(.*)', text) | |
return { | |
"aadhaar_number": aadhaar.group(0) if aadhaar else "Not found", | |
"dob": dob.group(0) if dob else "Not found", | |
"name": name_match[0][1] if name_match else "Not found" | |
} | |