Spaces:
Sleeping
Sleeping
File size: 9,908 Bytes
c767428 30c1744 c767428 52f3a2e 89e89ee 52f3a2e c8d472e fa803dc c8d472e 5686c7d 20e1d9b 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 52f3a2e 57d3fac 5686c7d 57d3fac 5686c7d 076f174 5686c7d 52f3a2e 57d3fac 52f3a2e 89e89ee 57d3fac 89e89ee 57d3fac 89e89ee 52f3a2e 5686c7d 52f3a2e 5686c7d 52f3a2e 5686c7d 52f3a2e 89e89ee 52f3a2e 89e89ee 52f3a2e 89e89ee 52f3a2e 89e89ee 52f3a2e 89e89ee 52f3a2e 89e89ee 52f3a2e 89e89ee 076f174 20e1d9b d90a964 52f3a2e 20e1d9b 52f3a2e 30c1744 52f3a2e 20e1d9b 076f174 20e1d9b 52f3a2e 30c1744 52f3a2e 30c1744 52f3a2e 5686c7d 52f3a2e 89e89ee 57d3fac d90a964 52f3a2e 7cc77a8 52f3a2e 7cc77a8 52f3a2e 57d3fac 7cc77a8 20e1d9b 076f174 7cc77a8 20e1d9b 52f3a2e 20e1d9b 52f3a2e 7cc77a8 20e1d9b 52f3a2e 20e1d9b 7cc77a8 89e89ee 7cc77a8 a1903a9 89e89ee fa803dc 7cc77a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 |
import os
os.environ["OMP_NUM_THREADS"] = "1"
import re
import json
import datetime as dt
import gradio as gr
from utils import extract_kyc_fields
# ------------------ HARD-CODED SALESFORCE CREDS (as requested) ------------------
SF_USERNAME = "[email protected]"
SF_PASSWORD = "Lic@2025"
SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
SF_DOMAIN = "login" # "login" for prod, "test" for sandbox
# -------------------------------------------------------------------------------
# simple-salesforce + exceptions
try:
from simple_salesforce import Salesforce
from simple_salesforce.exceptions import (
SalesforceAuthenticationFailed,
SalesforceGeneralError,
SalesforceMalformedRequest,
SalesforceExpiredSession,
SalesforceRefusedRequest,
SalesforceMoreThanOneRecord,
SalesforceResourceNotFound
)
SF_AVAILABLE = True
except Exception:
SF_AVAILABLE = False
# ---------- helpers ----------
def _parse_birthdate(dob_text: str):
"""
Normalize DOB to YYYY-MM-DD (Salesforce Date fields).
Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year).
"""
if not dob_text or dob_text == "Not found":
return None
s = dob_text.strip()
m = re.fullmatch(r"(\d{4})-(\d{2})-(\d{2})", s)
if m:
y, mo, d = map(int, m.groups())
try:
return dt.date(y, mo, d).isoformat()
except ValueError:
return None
m = re.fullmatch(r"(\d{2})[./-](\d{2})[./-](\d{4})", s)
if m:
d, mo, y = map(int, m.groups())
try:
return dt.date(y, mo, d).isoformat()
except ValueError:
return None
m = re.fullmatch(r"(19|20)\d{2}", s)
if m:
y = int(s)
try:
return dt.date(y, 6, 15).isoformat()
except ValueError:
return None
return None
def _fmt_sf_error(err: Exception):
"""Produce a clear, JSON-safe dict from any simple-salesforce error."""
info = {"type": err.__class__.__name__, "message": str(err)}
if isinstance(err, SalesforceAuthenticationFailed):
content = getattr(err, "content", None) or getattr(err, "response", None)
info.update({
"category": "AUTHENTICATION",
"status": getattr(err, "status", None),
"content": getattr(content, "content", None) if hasattr(content, "content") else content,
})
elif isinstance(err, (SalesforceMalformedRequest, SalesforceGeneralError, SalesforceRefusedRequest)):
info.update({
"category": "REQUEST",
"status": getattr(err, "status", None),
"resource": getattr(err, "resource_name", None),
"url": getattr(err, "url", None),
"content": getattr(err, "content", None),
})
elif isinstance(err, SalesforceResourceNotFound):
info.update({"category": "NOT_FOUND"})
elif isinstance(err, SalesforceExpiredSession):
info.update({"category": "AUTH_EXPIRED"})
return info
def sf_connect():
"""
Connect to Salesforce using the hardcoded credentials.
Also runs a quick auth query to surface any auth issues clearly.
"""
if not SF_AVAILABLE:
raise RuntimeError("simple-salesforce is not installed. Add `simple-salesforce` to requirements.txt.")
sf = Salesforce(
username=SF_USERNAME,
password=SF_PASSWORD,
security_token=SF_SECURITY_TOKEN,
domain=SF_DOMAIN,
)
# quick auth sanity check (will raise on bad auth)
sf.query("SELECT Id FROM User LIMIT 1")
return sf
def _raw_create_with_fallback(sf, object_api_name: str, payload: dict):
"""
Fallback path that bypasses simple-salesforce's error wrappers and calls REST directly.
Always returns a dict:
- on success: {"success": True, "id": "...", "url": "...", "status_code": 201}
- on error: {"success": False, "status_code": <int>, "url": "...", "response_json": <json or None>, "response_text": <str>}
"""
url = f"{sf.base_url}sobjects/{object_api_name}"
try:
resp = sf.session.post(url, json=payload, headers=sf.headers, timeout=30)
status = resp.status_code
try:
body = resp.json()
except Exception:
body = None
if 200 <= status < 300:
# Salesforce returns {"id": "...", "success": true, "errors": []}
rec_id = (body or {}).get("id") if isinstance(body, dict) else None
return {"success": True, "id": rec_id, "status_code": status, "url": url, "response_json": body}
else:
# Return raw details so you see the exact field/object error
return {
"success": False,
"status_code": status,
"url": url,
"response_json": body,
"response_text": resp.text,
}
except Exception as e:
# Network or session issues
return {"success": False, "url": url, "exception": _fmt_sf_error(e)}
def sf_push_kyc_record(sf, ocr_results):
"""
Create one KYC_Record__c combining Aadhaar + PAN.
Salesforce custom object fields:
Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date)
PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date)
"""
a = ocr_results.get("aadhaar") or {}
p = ocr_results.get("pan") or {}
aadhaar_number = a.get("aadhaar_number") if (a.get("card_type") == "AADHAAR") else None
aadhaar_name = a.get("name") if (a.get("card_type") == "AADHAAR") else None
aadhaar_dob = _parse_birthdate(a.get("dob")) if (a.get("card_type") == "AADHAAR") else None
pan_number = p.get("pan_number") if (p.get("card_type") == "PAN") else None
pan_name = p.get("name") if (p.get("card_type") == "PAN") else None
pan_dob = _parse_birthdate(p.get("dob")) if (p.get("card_type") == "PAN") else None
payload = {
"Aadhaar_Number__c": aadhaar_number,
"Aadhaar_Name__c": aadhaar_name,
"Aadhaar_DOB__c": aadhaar_dob, # Date field in SF
"PAN_Number__c": pan_number,
"Pan_Name__c": pan_name,
"Pan_DOB__c": pan_dob, # Date field in SF
}
# Remove None keys to avoid nulling non-nullable fields
payload = {k: v for k, v in payload.items() if v is not None}
# First try the nice SDK method
try:
result = sf.KYC_Record__c.create(payload)
return {"success": True, "id": result.get("id"), "payload": payload, "via": "sdk"}
except Exception as e:
# If simple-salesforce throws its unhelpful TypeError, fall back to a raw REST POST
raw = _raw_create_with_fallback(sf, "KYC_Record__c", payload)
if raw.get("success"):
return {"success": True, "id": raw.get("id"), "payload": payload, "via": "raw", "raw": raw}
else:
return {"success": False, "error": _fmt_sf_error(e), "payload": payload, "raw": raw}
# ---------- gradio callback ----------
def process_documents(aadhaar_file, pan_file, push_to_sf):
"""
- Runs OCR on Aadhaar and PAN separately.
- Optionally pushes a single KYC_Record__c to Salesforce with robust fallback.
"""
results = {"aadhaar": None, "pan": None}
if not aadhaar_file and not pan_file:
return {"error": "Please upload at least one file (Aadhaar and/or PAN)."}
# OCR Aadhaar
if aadhaar_file:
try:
res = extract_kyc_fields(aadhaar_file.name)
res["source_file"] = os.path.basename(aadhaar_file.name)
results["aadhaar"] = res
except Exception as e:
results["aadhaar"] = {"error": f"Aadhaar OCR failed: {str(e)}", "card_type": "UNKNOWN"}
# OCR PAN
if pan_file:
try:
res = extract_kyc_fields(pan_file.name)
res["source_file"] = os.path.basename(pan_file.name)
results["pan"] = res
except Exception as e:
results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
output = {"ocr": results}
if push_to_sf:
try:
sf = sf_connect()
created = sf_push_kyc_record(sf, results)
output["salesforce"] = {"pushed": created.get("success", False), **created}
except Exception as e:
# Even connection/auth errors will be formatted
output["salesforce"] = {"pushed": False, "error": _fmt_sf_error(e)}
return output
# ---------- UI ----------
with gr.Blocks(title="Smart KYC OCR → Salesforce (KYC_Record__c)") as demo:
gr.Markdown(
"""
# 🧾 Smart KYC OCR → Salesforce
Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push one **KYC_Record__c**.
"""
)
with gr.Row():
with gr.Column(scale=1):
aadhaar_uploader = gr.File(
label="📤 Aadhaar Upload",
file_types=[".jpg", ".jpeg", ".png"]
)
with gr.Column(scale=1):
pan_uploader = gr.File(
label="📤 PAN Upload",
file_types=[".jpg", ".jpeg", ".png"]
)
push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False)
submit_btn = gr.Button("🔍 Extract KYC Info", variant="primary")
output_json = gr.JSON(label="📋 Output (OCR + Salesforce)")
submit_btn.click(
fn=process_documents,
inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
outputs=output_json,
)
gr.Markdown("---")
gr.Markdown(
"""
If an error occurs, you'll now see **status_code**, **url**, and Salesforce’s **response_json** for fast debugging.
"""
)
# Important for Spaces: keep `demo` at module level
if __name__ == "__main__":
demo.launch()
|