import pytesseract from PIL import Image import re import io def extract_kyc_fields(image_bytes): image = Image.open(io.BytesIO(image_bytes)).convert("RGB") text = pytesseract.image_to_string(image) # Extract Aadhaar number pattern XXXX-XXXX-XXXX or XXXX XXXX XXXX aadhaar = re.search(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', text) dob = re.search(r'\d{2}[\/\-]\d{2}[\/\-]\d{4}', text) name_match = re.findall(r'(?i)(Name|Narne|NAMF)\s*[:\-]?\s*(.*)', text) return { "aadhaar_number": aadhaar.group(0) if aadhaar else "Not found", "dob": dob.group(0) if dob else "Not found", "name": name_match[0][1] if name_match else "Not found" }