Spaces:
Sleeping
Sleeping
File size: 9,368 Bytes
c767428 30c1744 c767428 52f3a2e c8d472e fa803dc c8d472e 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 20e1d9b 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 076f174 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 076f174 20e1d9b d90a964 52f3a2e 20e1d9b 52f3a2e 30c1744 52f3a2e 20e1d9b 076f174 20e1d9b 52f3a2e 30c1744 52f3a2e 30c1744 52f3a2e 57d3fac d90a964 52f3a2e 7cc77a8 52f3a2e 7cc77a8 52f3a2e 57d3fac 7cc77a8 57d3fac 7cc77a8 20e1d9b 076f174 7cc77a8 20e1d9b 52f3a2e 20e1d9b 52f3a2e 7cc77a8 20e1d9b 52f3a2e 20e1d9b 7cc77a8 57d3fac 7cc77a8 a1903a9 fa803dc 7cc77a8 |
|
import os
os.environ["OMP_NUM_THREADS"] = "1"
import re
import datetime as dt
import gradio as gr
from utils import extract_kyc_fields
# ------------------------------------------------------------------
# π CREDENTIALS STRATEGY
# Prefer environment variables (Hugging Face Space Secrets or local env).
# Emergency fallback (NOT recommended): replace these locally.
# ------------------------------------------------------------------
SF_DEFAULT_USERNAME = os.getenv("[email protected]", "REPLACE_ME_USERNAME")
SF_DEFAULT_PASSWORD = os.getenv("Lic@2025", "REPLACE_ME_PASSWORD")
SF_DEFAULT_TOKEN = os.getenv("AmmfRcd6IiYaRtSGntBnzNMQU", "REPLACE_ME_SECURITY_TOKEN")
SF_DEFAULT_DOMAIN = os.getenv("SF_DOMAIN", "login") # "login" (prod) or "test" (sandbox)
# ------------------------------------------------------------------
# simple-salesforce + exceptions
try:
from simple_salesforce import Salesforce
from simple_salesforce.exceptions import (
SalesforceAuthenticationFailed,
SalesforceGeneralError,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceMoreThanOneRecord,
SalesforceResourceNotFound
)
SF_AVAILABLE = True
except Exception:
SF_AVAILABLE = False
# ---------- helpers ----------
def _parse_birthdate(dob_text: str):
"""
Normalize DOB to YYYY-MM-DD (Salesforce Date fields).
Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year).
"""
if not dob_text or dob_text == "Not found":
return None
s = dob_text.strip()
m = re.fullmatch(r"(\d{4})-(\d{2})-(\d{2})", s)
if m:
y, mo, d = map(int, m.groups())
try:
return dt.date(y, mo, d).isoformat()
except ValueError:
return None
m = re.fullmatch(r"(\d{2})[./-](\d{2})[./-](\d{4})", s)
if m:
d, mo, y = map(int, m.groups())
try:
return dt.date(y, mo, d).isoformat()
except ValueError:
return None
m = re.fullmatch(r"(19|20)\d{2}", s)
if m:
y = int(s)
try:
return dt.date(y, 6, 15).isoformat()
except ValueError:
return None
return None
def _fmt_sf_error(err: Exception):
"""
Produce a clear, JSON-safe dict from any simple-salesforce error.
"""
# Default
info = {"type": err.__class__.__name__, "message": str(err)}
# Enrich known exceptions
if isinstance(err, SalesforceAuthenticationFailed):
# Has attributes: code, response.content, status, message, etc.
content = getattr(err, "content", None) or getattr(err, "response", None)
info.update({
"category": "AUTHENTICATION",
"status": getattr(err, "status", None),
"content": getattr(content, "content", None) if hasattr(content, "content") else content,
})
elif isinstance(err, (SalesforceMalformedRequest, SalesforceGeneralError, SalesforceRefusedRequest)):
# These typically have status/resource_name/content/url
info.update({
"category": "REQUEST",
"status": getattr(err, "status", None),
"resource": getattr(err, "resource_name", None),
"url": getattr(err, "url", None),
"content": getattr(err, "content", None),
})
elif isinstance(err, SalesforceResourceNotFound):
info.update({"category": "NOT_FOUND"})
elif isinstance(err, SalesforceExpiredSession):
info.update({"category": "AUTH_EXPIRED"})
return info
def sf_connect_from_env_or_defaults():
"""
Tries env vars first (HF Space Secrets / local env), then hardcoded defaults.
Env vars: SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_DOMAIN
Also runs a quick auth test query to surface auth issues early.
"""
if not SF_AVAILABLE:
raise RuntimeError("simple-salesforce is not installed. Add `simple-salesforce` to requirements.txt.")
username = os.getenv("SF_USERNAME", SF_DEFAULT_USERNAME)
password = os.getenv("SF_PASSWORD", SF_DEFAULT_PASSWORD)
token = os.getenv("SF_SECURITY_TOKEN", SF_DEFAULT_TOKEN)
domain = os.getenv("SF_DOMAIN", SF_DEFAULT_DOMAIN or "login")
if any(v.startswith("REPLACE_ME") for v in [username, password, token]):
raise ValueError(
"Salesforce credentials missing. "
"Set env vars (SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_DOMAIN) "
"or replace the REPLACE_ME_* constants in app.py."
)
try:
sf = Salesforce(username=username, password=password, security_token=token, domain=domain)
# π lightweight auth sanity check
sf.query("SELECT Id FROM User LIMIT 1")
return sf
except Exception as e:
raise e
def sf_push_kyc_record(sf, ocr_results):
"""
Create one KYC_Record__c combining Aadhaar + PAN.
Fields:
Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date)
PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date)
"""
a = ocr_results.get("aadhaar") or {}
p = ocr_results.get("pan") or {}
aadhaar_number = a.get("aadhaar_number") if (a.get("card_type") == "AADHAAR") else None
aadhaar_name = a.get("name") if (a.get("card_type") == "AADHAAR") else None
aadhaar_dob = _parse_birthdate(a.get("dob")) if (a.get("card_type") == "AADHAAR") else None
pan_number = p.get("pan_number") if (p.get("card_type") == "PAN") else None
pan_name = p.get("name") if (p.get("card_type") == "PAN") else None
pan_dob = _parse_birthdate(p.get("dob")) if (p.get("card_type") == "PAN") else None
payload = {
"Aadhaar_Number__c": aadhaar_number,
"Aadhaar_Name__c": aadhaar_name,
"Aadhaar_DOB__c": aadhaar_dob, # Date field in SF
"PAN_Number__c": pan_number,
"Pan_Name__c": pan_name,
"Pan_DOB__c": pan_dob, # Date field in SF
}
# Remove None keys to avoid nulling non-nullable fields
payload = {k: v for k, v in payload.items() if v is not None}
try:
result = sf.KYC_Record__c.create(payload)
return {"success": True, "id": result.get("id"), "payload": payload}
except Exception as e:
# Return rich error info
return {"success": False, "error": _fmt_sf_error(e), "payload": payload}
# ---------- gradio callback ----------
def process_documents(aadhaar_file, pan_file, push_to_sf):
"""
- Runs OCR on Aadhaar and PAN separately.
- Optionally pushes a single KYC_Record__c to Salesforce.
"""
results = {"aadhaar": None, "pan": None}
if not aadhaar_file and not pan_file:
return {"error": "Please upload at least one file (Aadhaar and/or PAN)."}
# OCR Aadhaar
if aadhaar_file:
try:
res = extract_kyc_fields(aadhaar_file.name)
res["source_file"] = os.path.basename(aadhaar_file.name)
results["aadhaar"] = res
except Exception as e:
results["aadhaar"] = {"error": f"Aadhaar OCR failed: {str(e)}", "card_type": "UNKNOWN"}
# OCR PAN
if pan_file:
try:
res = extract_kyc_fields(pan_file.name)
res["source_file"] = os.path.basename(pan_file.name)
results["pan"] = res
except Exception as e:
results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
output = {"ocr": results}
if push_to_sf:
try:
sf = sf_connect_from_env_or_defaults()
created = sf_push_kyc_record(sf, results)
output["salesforce"] = {"pushed": created.get("success", False), **created}
except Exception as e:
output["salesforce"] = {"pushed": False, "error": _fmt_sf_error(e)}
return output
# ---------- UI ----------
with gr.Blocks(title="Smart KYC OCR β Salesforce (KYC_Record__c)") as demo:
gr.Markdown(
"""
# π§Ύ Smart KYC OCR β Salesforce
Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push one **KYC_Record__c**.
**Creds**: Set env vars (preferred)
`SF_USERNAME`, `SF_PASSWORD`, `SF_SECURITY_TOKEN`, `SF_DOMAIN` (login|test)
or replace the placeholders in `app.py` (not recommended).
"""
)
with gr.Row():
with gr.Column(scale=1):
aadhaar_uploader = gr.File(
label="π€ Aadhaar Upload",
file_types=[".jpg", ".jpeg", ".png"]
)
with gr.Column(scale=1):
pan_uploader = gr.File(
label="π€ PAN Upload",
file_types=[".jpg", ".jpeg", ".png"]
)
push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False)
submit_btn = gr.Button("π Extract KYC Info", variant="primary")
output_json = gr.JSON(label="π Output (OCR + Salesforce)")
submit_btn.click(
fn=process_documents,
inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
outputs=output_json,
)
gr.Markdown("---")
gr.Markdown(
"""
π **Note:** If you see an error, the response now includes the Salesforce status, URL, and error body.
"""
)
if __name__ == "__main__":
demo.launch()
|