SMART_KYC_OCR / app.py
gopichandra's picture
Update app.py
efdc972 verified
raw
history blame
6.8 kB
import os
os.environ["OMP_NUM_THREADS"] = "1"
import re
import json
import datetime as dt
import gradio as gr
from utils import extract_kyc_fields
# ------------------ HARD-CODED SALESFORCE CREDS ------------------
SF_USERNAME = "[email protected]"
SF_PASSWORD = "Lic@2025"
SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
SF_DOMAIN = "login" # "login" for prod/dev, "test" for sandbox
# ---------------------------------------------------------------
# simple-salesforce 1.11.6
from simple_salesforce import Salesforce
# ---------- helpers ----------
def _parse_birthdate(dob_text: str):
"""
Normalize DOB to YYYY-MM-DD for Salesforce Date fields.
Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year).
"""
if not dob_text or dob_text == "Not found":
return None
s = dob_text.strip()
m = re.fullmatch(r"(\d{4})-(\d{2})-(\d{2})", s)
if m:
y, mo, d = map(int, m.groups())
try:
return dt.date(y, mo, d).isoformat()
except ValueError:
return None
m = re.fullmatch(r"(\d{2})[./-](\d{2})[./-](\d{4})", s)
if m:
d, mo, y = map(int, m.groups())
try:
return dt.date(y, mo, d).isoformat()
except ValueError:
return None
m = re.fullmatch(r"(19|20)\d{2}", s)
if m:
y = int(s)
try:
return dt.date(y, 6, 15).isoformat()
except ValueError:
return None
return None
def sf_login():
"""
Login with simple-salesforce (v1.11.6). If creds are invalid or API is blocked,
Salesforce will raise here. We'll surface the raw error to the UI.
"""
sf = Salesforce(
username=SF_USERNAME,
password=SF_PASSWORD,
security_token=SF_SECURITY_TOKEN,
domain=SF_DOMAIN,
)
# Lightweight check so bad auth is surfaced immediately
sf.query("SELECT Id FROM User LIMIT 1")
return sf
def sf_create_kyc_via_session(sf, payload: dict):
"""
Use the same authenticated session from simple-salesforce to POST directly
to the REST sObject endpoint. This avoids the SDK's create() wrapper and
the 'SalesforceError.__init__() missing ...' TypeError you've been seeing.
"""
# In simple-salesforce 1.11.6, base_url ends with `/services/data/vXX.X/`
url = f"{sf.base_url}sobjects/KYC_Record__c"
resp = sf.session.post(url, json=payload, headers=sf.headers, timeout=30)
try:
body = resp.json()
except Exception:
body = None
if 200 <= resp.status_code < 300:
rec_id = body.get("id") if isinstance(body, dict) else None
return {
"success": True,
"id": rec_id,
"status_code": resp.status_code,
"url": url,
"response_json": body
}
# Return *raw* details (no SDK exception wrapper involved)
return {
"success": False,
"status_code": resp.status_code,
"url": url,
"response_json": body,
"response_text": resp.text,
}
def build_payload(ocr_results: dict):
"""
Map OCR outputs to your Salesforce fields on KYC_Record__c.
Fields:
Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date)
PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date)
"""
a = (ocr_results.get("aadhaar") or {})
p = (ocr_results.get("pan") or {})
payload = {
"Aadhaar_Number__c": a.get("aadhaar_number") if a.get("card_type") == "AADHAAR" else None,
"Aadhaar_Name__c": a.get("name") if a.get("card_type") == "AADHAAR" else None,
"Aadhaar_DOB__c": _parse_birthdate(a.get("dob")) if a.get("card_type") == "AADHAAR" else None,
"PAN_Number__c": p.get("pan_number") if p.get("card_type") == "PAN" else None,
"Pan_Name__c": p.get("name") if p.get("card_type") == "PAN" else None,
"Pan_DOB__c": _parse_birthdate(p.get("dob")) if p.get("card_type") == "PAN" else None,
}
# Drop Nones so we don't try to set blanks on required fields
return {k: v for k, v in payload.items() if v is not None}
# ---------- Gradio callback ----------
def process_documents(aadhaar_file, pan_file, push_to_sf):
"""
OCR both uploads; optionally push one KYC_Record__c via simple-salesforce session POST.
"""
results = {"aadhaar": None, "pan": None}
if not aadhaar_file and not pan_file:
return {"error": "Please upload at least one file (Aadhaar and/or PAN)."}
# OCR Aadhaar
if aadhaar_file:
try:
res = extract_kyc_fields(aadhaar_file.name)
res["source_file"] = os.path.basename(aadhaar_file.name)
results["aadhaar"] = res
except Exception as e:
results["aadhaar"] = {"error": f"Aadhaar OCR failed: {str(e)}", "card_type": "UNKNOWN"}
# OCR PAN
if pan_file:
try:
res = extract_kyc_fields(pan_file.name)
res["source_file"] = os.path.basename(pan_file.name)
results["pan"] = res
except Exception as e:
results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
output = {"ocr": results}
if push_to_sf:
payload = build_payload(results)
try:
sf = sf_login()
created = sf_create_kyc_via_session(sf, payload)
output["salesforce"] = {"pushed": created.get("success", False), **created}
except Exception as e:
# Any auth/connection error shown plainly (not wrapped by SDK)
output["salesforce"] = {
"pushed": False,
"error": {"type": e.__class__.__name__, "message": str(e)}
}
return output
# ---------- UI ----------
with gr.Blocks(title="Smart KYC OCR β†’ Salesforce (KYC_Record__c)") as demo:
gr.Markdown(
"""
# 🧾 Smart KYC OCR β†’ Salesforce
Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push one **KYC_Record__c**.
"""
)
with gr.Row():
with gr.Column(scale=1):
aadhaar_uploader = gr.File(label="πŸ“€ Aadhaar Upload", file_types=[".jpg", ".jpeg", ".png"])
with gr.Column(scale=1):
pan_uploader = gr.File(label="πŸ“€ PAN Upload", file_types=[".jpg", ".jpeg", ".png"])
push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False)
submit_btn = gr.Button("πŸ” Extract KYC Info", variant="primary")
output_json = gr.JSON(label="πŸ“‹ Output (OCR + Salesforce)")
submit_btn.click(
fn=process_documents,
inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
outputs=output_json,
)
if __name__ == "__main__":
demo.launch()