File size: 688 Bytes
c70099c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import pytesseract
from PIL import Image
import re
import io

def extract_kyc_fields(image_bytes):
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    text = pytesseract.image_to_string(image)

    # Extract Aadhaar number pattern XXXX-XXXX-XXXX or XXXX XXXX XXXX
    aadhaar = re.search(r'\b\d{4}[\s-]?\d{4}[\s-]?\d{4}\b', text)
    dob = re.search(r'\d{2}[\/\-]\d{2}[\/\-]\d{4}', text)
    name_match = re.findall(r'(?i)(Name|Narne|NAMF)\s*[:\-]?\s*(.*)', text)

    return {
        "aadhaar_number": aadhaar.group(0) if aadhaar else "Not found",
        "dob": dob.group(0) if dob else "Not found",
        "name": name_match[0][1] if name_match else "Not found"
    }