SMART_KYC_OCR / utils.py
gopichandra's picture
Create utils.py
c70099c verified
raw
history blame
688 Bytes
import pytesseract
from PIL import Image
import re
import io
def extract_kyc_fields(image_bytes):
image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
text = pytesseract.image_to_string(image)
# Extract Aadhaar number pattern XXXX-XXXX-XXXX or XXXX XXXX XXXX
aadhaar = re.search(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', text)
dob = re.search(r'\d{2}[\/\-]\d{2}[\/\-]\d{4}', text)
name_match = re.findall(r'(?i)(Name|Narne|NAMF)\s*[:\-]?\s*(.*)', text)
return {
"aadhaar_number": aadhaar.group(0) if aadhaar else "Not found",
"dob": dob.group(0) if dob else "Not found",
"name": name_match[0][1] if name_match else "Not found"
}