Spaces:
Sleeping
Sleeping
import os | |
os.environ["OMP_NUM_THREADS"] = "1" | |
import re | |
import json | |
import datetime as dt | |
import gradio as gr | |
from utils import extract_kyc_fields | |
# ------------------ HARD-CODED SALESFORCE CREDS ------------------ | |
SF_USERNAME = "[email protected]" | |
SF_PASSWORD = "Lic@2025" | |
SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU" | |
SF_DOMAIN = "login" # "login" for prod/dev, "test" for sandbox | |
# --------------------------------------------------------------- | |
# simple-salesforce 1.11.6 | |
from simple_salesforce import Salesforce | |
# ---------- helpers ---------- | |
def _parse_birthdate(dob_text: str): | |
""" | |
Normalize DOB to YYYY-MM-DD for Salesforce Date fields. | |
Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year). | |
""" | |
if not dob_text or dob_text == "Not found": | |
return None | |
s = dob_text.strip() | |
m = re.fullmatch(r"(\d{4})-(\d{2})-(\d{2})", s) | |
if m: | |
y, mo, d = map(int, m.groups()) | |
try: | |
return dt.date(y, mo, d).isoformat() | |
except ValueError: | |
return None | |
m = re.fullmatch(r"(\d{2})[./-](\d{2})[./-](\d{4})", s) | |
if m: | |
d, mo, y = map(int, m.groups()) | |
try: | |
return dt.date(y, mo, d).isoformat() | |
except ValueError: | |
return None | |
m = re.fullmatch(r"(19|20)\d{2}", s) | |
if m: | |
y = int(s) | |
try: | |
return dt.date(y, 6, 15).isoformat() | |
except ValueError: | |
return None | |
return None | |
def sf_login(): | |
""" | |
Login with simple-salesforce (v1.11.6). If creds are invalid or API is blocked, | |
Salesforce will raise here. We'll surface the raw error to the UI. | |
""" | |
sf = Salesforce( | |
username=SF_USERNAME, | |
password=SF_PASSWORD, | |
security_token=SF_SECURITY_TOKEN, | |
domain=SF_DOMAIN, | |
) | |
# Lightweight check so bad auth is surfaced immediately | |
sf.query("SELECT Id FROM User LIMIT 1") | |
return sf | |
def sf_create_kyc_via_session(sf, payload: dict): | |
""" | |
Use the same authenticated session from simple-salesforce to POST directly | |
to the REST sObject endpoint. This avoids the SDK's create() wrapper and | |
the 'SalesforceError.__init__() missing ...' TypeError you've been seeing. | |
""" | |
# In simple-salesforce 1.11.6, base_url ends with `/services/data/vXX.X/` | |
url = f"{sf.base_url}sobjects/KYC_Record__c" | |
resp = sf.session.post(url, json=payload, headers=sf.headers, timeout=30) | |
try: | |
body = resp.json() | |
except Exception: | |
body = None | |
if 200 <= resp.status_code < 300: | |
rec_id = body.get("id") if isinstance(body, dict) else None | |
return { | |
"success": True, | |
"id": rec_id, | |
"status_code": resp.status_code, | |
"url": url, | |
"response_json": body | |
} | |
# Return *raw* details (no SDK exception wrapper involved) | |
return { | |
"success": False, | |
"status_code": resp.status_code, | |
"url": url, | |
"response_json": body, | |
"response_text": resp.text, | |
} | |
def build_payload(ocr_results: dict): | |
""" | |
Map OCR outputs to your Salesforce fields on KYC_Record__c. | |
Fields: | |
Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date) | |
PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date) | |
""" | |
a = (ocr_results.get("aadhaar") or {}) | |
p = (ocr_results.get("pan") or {}) | |
payload = { | |
"Aadhaar_Number__c": a.get("aadhaar_number") if a.get("card_type") == "AADHAAR" else None, | |
"Aadhaar_Name__c": a.get("name") if a.get("card_type") == "AADHAAR" else None, | |
"Aadhaar_DOB__c": _parse_birthdate(a.get("dob")) if a.get("card_type") == "AADHAAR" else None, | |
"PAN_Number__c": p.get("pan_number") if p.get("card_type") == "PAN" else None, | |
"Pan_Name__c": p.get("name") if p.get("card_type") == "PAN" else None, | |
"Pan_DOB__c": _parse_birthdate(p.get("dob")) if p.get("card_type") == "PAN" else None, | |
} | |
# Drop Nones so we don't try to set blanks on required fields | |
return {k: v for k, v in payload.items() if v is not None} | |
# ---------- Gradio callback ---------- | |
def process_documents(aadhaar_file, pan_file, push_to_sf): | |
""" | |
OCR both uploads; optionally push one KYC_Record__c via simple-salesforce session POST. | |
""" | |
results = {"aadhaar": None, "pan": None} | |
if not aadhaar_file and not pan_file: | |
return {"error": "Please upload at least one file (Aadhaar and/or PAN)."} | |
# OCR Aadhaar | |
if aadhaar_file: | |
try: | |
res = extract_kyc_fields(aadhaar_file.name) | |
res["source_file"] = os.path.basename(aadhaar_file.name) | |
results["aadhaar"] = res | |
except Exception as e: | |
results["aadhaar"] = {"error": f"Aadhaar OCR failed: {str(e)}", "card_type": "UNKNOWN"} | |
# OCR PAN | |
if pan_file: | |
try: | |
res = extract_kyc_fields(pan_file.name) | |
res["source_file"] = os.path.basename(pan_file.name) | |
results["pan"] = res | |
except Exception as e: | |
results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"} | |
output = {"ocr": results} | |
if push_to_sf: | |
payload = build_payload(results) | |
try: | |
sf = sf_login() | |
created = sf_create_kyc_via_session(sf, payload) | |
output["salesforce"] = {"pushed": created.get("success", False), **created} | |
except Exception as e: | |
# Any auth/connection error shown plainly (not wrapped by SDK) | |
output["salesforce"] = { | |
"pushed": False, | |
"error": {"type": e.__class__.__name__, "message": str(e)} | |
} | |
return output | |
# ---------- UI ---------- | |
with gr.Blocks(title="Smart KYC OCR β Salesforce (KYC_Record__c)") as demo: | |
gr.Markdown( | |
""" | |
# π§Ύ Smart KYC OCR β Salesforce | |
Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push one **KYC_Record__c**. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
aadhaar_uploader = gr.File(label="π€ Aadhaar Upload", file_types=[".jpg", ".jpeg", ".png"]) | |
with gr.Column(scale=1): | |
pan_uploader = gr.File(label="π€ PAN Upload", file_types=[".jpg", ".jpeg", ".png"]) | |
push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False) | |
submit_btn = gr.Button("π Extract KYC Info", variant="primary") | |
output_json = gr.JSON(label="π Output (OCR + Salesforce)") | |
submit_btn.click( | |
fn=process_documents, | |
inputs=[aadhaar_uploader, pan_uploader, push_to_sf], | |
outputs=output_json, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |