Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,25 +4,26 @@ os.environ["OMP_NUM_THREADS"] = "1"
|
|
4 |
import re
|
5 |
import json
|
6 |
import datetime as dt
|
7 |
-
import csv
|
8 |
-
from urllib.parse import urlparse
|
9 |
-
import xml.etree.ElementTree as ET
|
10 |
-
|
11 |
-
import requests
|
12 |
import gradio as gr
|
13 |
from utils import extract_kyc_fields
|
14 |
|
15 |
-
# ------------------ HARD-CODED SALESFORCE CREDS
|
16 |
SF_USERNAME = "[email protected]"
|
17 |
SF_PASSWORD = "Lic@2025"
|
18 |
SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
|
19 |
-
SF_DOMAIN = "login"
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
# ---------- helpers ----------
|
24 |
def _parse_birthdate(dob_text: str):
|
25 |
-
"""
|
|
|
|
|
|
|
26 |
if not dob_text or dob_text == "Not found":
|
27 |
return None
|
28 |
s = dob_text.strip()
|
@@ -50,146 +51,88 @@ def _parse_birthdate(dob_text: str):
|
|
50 |
return dt.date(y, 6, 15).isoformat()
|
51 |
except ValueError:
|
52 |
return None
|
53 |
-
return None
|
54 |
-
|
55 |
-
|
56 |
-
def _extract_fault(xml_text: str):
|
57 |
-
"""Pull faultcode/faultstring out of a SOAP envelope for clearer errors."""
|
58 |
-
try:
|
59 |
-
root = ET.fromstring(xml_text)
|
60 |
-
except Exception:
|
61 |
-
return None, None
|
62 |
-
fc = fs = None
|
63 |
-
for e in root.iter():
|
64 |
-
tag = e.tag.split('}', 1)[-1]
|
65 |
-
if tag == "faultcode": fc = e.text
|
66 |
-
if tag == "faultstring": fs = e.text
|
67 |
-
return fc, fs
|
68 |
-
|
69 |
-
|
70 |
-
# ---------- AUTH (SOAP login) ----------
|
71 |
-
def soap_login(username, password, token, domain="login", api_version=SF_API_VERSION):
|
72 |
-
"""Log in via Partner SOAP. Returns dict with success flag + details."""
|
73 |
-
endpoint = f"https://{domain}.salesforce.com/services/Soap/u/{api_version}"
|
74 |
-
payload = f"""<?xml version="1.0" encoding="utf-8" ?>
|
75 |
-
<env:Envelope xmlns:xsd="http://www.w3.org/2001/XMLSchema"
|
76 |
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
77 |
-
xmlns:env="http://schemas.xmlsoap.org/soap/envelope/">
|
78 |
-
<env:Body>
|
79 |
-
<n1:login xmlns:n1="urn:partner.soap.sforce.com">
|
80 |
-
<n1:username>{username}</n1:username>
|
81 |
-
<n1:password>{password}{token}</n1:password>
|
82 |
-
</n1:login>
|
83 |
-
</env:Body>
|
84 |
-
</env:Envelope>"""
|
85 |
-
headers = {"Content-Type": "text/xml; charset=UTF-8", "SOAPAction": "login"}
|
86 |
-
r = requests.post(endpoint, data=payload.encode("utf-8"), headers=headers, timeout=30)
|
87 |
-
|
88 |
-
if r.status_code != 200:
|
89 |
-
fc, fs = _extract_fault(r.text)
|
90 |
-
return {
|
91 |
-
"success": False,
|
92 |
-
"status_code": r.status_code,
|
93 |
-
"url": endpoint,
|
94 |
-
"faultcode": fc, "faultstring": fs,
|
95 |
-
"response_text": r.text[:800]
|
96 |
-
}
|
97 |
-
|
98 |
-
try:
|
99 |
-
root = ET.fromstring(r.text)
|
100 |
-
except ET.ParseError as e:
|
101 |
-
return {"success": False, "error": f"SOAP parse error: {e}"}
|
102 |
|
103 |
-
|
104 |
-
for e in root.iter():
|
105 |
-
tag = e.tag.split('}', 1)[-1]
|
106 |
-
if tag == "sessionId": sid = e.text
|
107 |
-
if tag == "serverUrl": srv = e.text
|
108 |
|
109 |
-
if not sid or not srv:
|
110 |
-
fc, fs = _extract_fault(r.text)
|
111 |
-
return {"success": False, "error": "SOAP login missing sessionId/serverUrl", "faultcode": fc, "faultstring": fs}
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
|
118 |
-
def
|
119 |
-
"""
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
123 |
|
124 |
try:
|
125 |
-
body =
|
126 |
except Exception:
|
127 |
body = None
|
128 |
|
129 |
-
if 200 <=
|
130 |
rec_id = body.get("id") if isinstance(body, dict) else None
|
131 |
-
return {
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
"PAN_Number__c", "Pan_Name__c", "Pan_DOB__c"
|
139 |
-
]
|
140 |
-
|
141 |
-
def build_kyc_row(ocr_results: dict):
|
142 |
-
"""Map OCR results to your KYC_Record__c fields."""
|
143 |
-
a = ocr_results.get("aadhaar") or {}
|
144 |
-
p = ocr_results.get("pan") or {}
|
145 |
|
|
|
146 |
return {
|
147 |
-
"
|
148 |
-
"
|
149 |
-
"
|
150 |
-
"
|
151 |
-
"
|
152 |
-
"Pan_DOB__c": _parse_birthdate(p.get("dob")) if (p.get("card_type") == "PAN") else None,
|
153 |
}
|
154 |
|
155 |
-
def write_csv(row: dict, path: str):
|
156 |
-
"""Write a single-row CSV with your exact field API names."""
|
157 |
-
os.makedirs(os.path.dirname(path), exist_ok=True)
|
158 |
-
with open(path, "w", newline="", encoding="utf-8") as f:
|
159 |
-
w = csv.DictWriter(f, fieldnames=CSV_HEADERS)
|
160 |
-
w.writeheader()
|
161 |
-
# Keep only the headers in order; missing keys become empty.
|
162 |
-
cleaned = {k: (row.get(k) or "") for k in CSV_HEADERS}
|
163 |
-
w.writerow(cleaned)
|
164 |
-
return path
|
165 |
|
166 |
-
|
167 |
-
def push_kyc_record(ocr_results: dict):
|
168 |
"""
|
169 |
-
|
170 |
-
|
|
|
|
|
171 |
"""
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
if
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
#
|
184 |
-
|
185 |
-
access_token = primary["access_token"]
|
186 |
-
created = rest_create(instance_url, access_token, "KYC_Record__c", payload)
|
187 |
-
created["auth_used"] = {"domain": SF_DOMAIN}
|
188 |
-
return created
|
189 |
|
190 |
|
191 |
# ---------- Gradio callback ----------
|
192 |
def process_documents(aadhaar_file, pan_file, push_to_sf):
|
|
|
|
|
|
|
193 |
results = {"aadhaar": None, "pan": None}
|
194 |
|
195 |
if not aadhaar_file and not pan_file:
|
@@ -213,17 +156,22 @@ def process_documents(aadhaar_file, pan_file, push_to_sf):
|
|
213 |
except Exception as e:
|
214 |
results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
|
215 |
|
216 |
-
|
217 |
-
csv_row = build_kyc_row(results)
|
218 |
-
csv_path = write_csv(csv_row, "/mnt/data/KYC_Record_upload.csv")
|
219 |
-
|
220 |
-
output = {"ocr": results, "csv_file": csv_path}
|
221 |
|
222 |
if push_to_sf:
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
-
return output
|
227 |
|
228 |
|
229 |
# ---------- UI ----------
|
@@ -231,9 +179,7 @@ with gr.Blocks(title="Smart KYC OCR → Salesforce (KYC_Record__c)") as demo:
|
|
231 |
gr.Markdown(
|
232 |
"""
|
233 |
# 🧾 Smart KYC OCR → Salesforce
|
234 |
-
Upload **Aadhaar** and **PAN**
|
235 |
-
Click **Extract KYC Info** to get OCR + CSV (for Data Import Wizard).
|
236 |
-
Optionally, toggle **Push to Salesforce** (API) — may fail on free/Essentials orgs.
|
237 |
"""
|
238 |
)
|
239 |
|
@@ -243,16 +189,15 @@ with gr.Blocks(title="Smart KYC OCR → Salesforce (KYC_Record__c)") as demo:
|
|
243 |
with gr.Column(scale=1):
|
244 |
pan_uploader = gr.File(label="📤 PAN Upload", file_types=[".jpg", ".jpeg", ".png"])
|
245 |
|
246 |
-
push_to_sf = gr.Checkbox(label="Push to Salesforce
|
247 |
-
submit_btn = gr.Button("🔍 Extract KYC Info", variant="primary")
|
248 |
|
249 |
-
|
250 |
-
|
251 |
|
252 |
submit_btn.click(
|
253 |
fn=process_documents,
|
254 |
inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
|
255 |
-
outputs=
|
256 |
)
|
257 |
|
258 |
if __name__ == "__main__":
|
|
|
4 |
import re
|
5 |
import json
|
6 |
import datetime as dt
|
|
|
|
|
|
|
|
|
|
|
7 |
import gradio as gr
|
8 |
from utils import extract_kyc_fields
|
9 |
|
10 |
+
# ------------------ HARD-CODED SALESFORCE CREDS ------------------
|
11 |
SF_USERNAME = "[email protected]"
|
12 |
SF_PASSWORD = "Lic@2025"
|
13 |
SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
|
14 |
+
SF_DOMAIN = "login" # "login" for prod/dev, "test" for sandbox
|
15 |
+
# ---------------------------------------------------------------
|
16 |
+
|
17 |
+
# simple-salesforce 1.11.6
|
18 |
+
from simple_salesforce import Salesforce
|
19 |
+
|
20 |
|
21 |
# ---------- helpers ----------
|
22 |
def _parse_birthdate(dob_text: str):
|
23 |
+
"""
|
24 |
+
Normalize DOB to YYYY-MM-DD for Salesforce Date fields.
|
25 |
+
Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year).
|
26 |
+
"""
|
27 |
if not dob_text or dob_text == "Not found":
|
28 |
return None
|
29 |
s = dob_text.strip()
|
|
|
51 |
return dt.date(y, 6, 15).isoformat()
|
52 |
except ValueError:
|
53 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
+
return None
|
|
|
|
|
|
|
|
|
56 |
|
|
|
|
|
|
|
57 |
|
58 |
+
def sf_login():
|
59 |
+
"""
|
60 |
+
Login with simple-salesforce (v1.11.6). If creds are invalid or API is blocked,
|
61 |
+
Salesforce will raise here. We'll surface the raw error to the UI.
|
62 |
+
"""
|
63 |
+
sf = Salesforce(
|
64 |
+
username=SF_USERNAME,
|
65 |
+
password=SF_PASSWORD,
|
66 |
+
security_token=SF_SECURITY_TOKEN,
|
67 |
+
domain=SF_DOMAIN,
|
68 |
+
)
|
69 |
+
# Lightweight check so bad auth is surfaced immediately
|
70 |
+
sf.query("SELECT Id FROM User LIMIT 1")
|
71 |
+
return sf
|
72 |
|
73 |
|
74 |
+
def sf_create_kyc_via_session(sf, payload: dict):
|
75 |
+
"""
|
76 |
+
Use the same authenticated session from simple-salesforce to POST directly
|
77 |
+
to the REST sObject endpoint. This avoids the SDK's create() wrapper and
|
78 |
+
the 'SalesforceError.__init__() missing ...' TypeError you've been seeing.
|
79 |
+
"""
|
80 |
+
# In simple-salesforce 1.11.6, base_url ends with `/services/data/vXX.X/`
|
81 |
+
url = f"{sf.base_url}sobjects/KYC_Record__c"
|
82 |
+
resp = sf.session.post(url, json=payload, headers=sf.headers, timeout=30)
|
83 |
|
84 |
try:
|
85 |
+
body = resp.json()
|
86 |
except Exception:
|
87 |
body = None
|
88 |
|
89 |
+
if 200 <= resp.status_code < 300:
|
90 |
rec_id = body.get("id") if isinstance(body, dict) else None
|
91 |
+
return {
|
92 |
+
"success": True,
|
93 |
+
"id": rec_id,
|
94 |
+
"status_code": resp.status_code,
|
95 |
+
"url": url,
|
96 |
+
"response_json": body
|
97 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
+
# Return *raw* details (no SDK exception wrapper involved)
|
100 |
return {
|
101 |
+
"success": False,
|
102 |
+
"status_code": resp.status_code,
|
103 |
+
"url": url,
|
104 |
+
"response_json": body,
|
105 |
+
"response_text": resp.text,
|
|
|
106 |
}
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
+
def build_payload(ocr_results: dict):
|
|
|
110 |
"""
|
111 |
+
Map OCR outputs to your Salesforce fields on KYC_Record__c.
|
112 |
+
Fields:
|
113 |
+
Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date)
|
114 |
+
PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date)
|
115 |
"""
|
116 |
+
a = (ocr_results.get("aadhaar") or {})
|
117 |
+
p = (ocr_results.get("pan") or {})
|
118 |
+
|
119 |
+
payload = {
|
120 |
+
"Aadhaar_Number__c": a.get("aadhaar_number") if a.get("card_type") == "AADHAAR" else None,
|
121 |
+
"Aadhaar_Name__c": a.get("name") if a.get("card_type") == "AADHAAR" else None,
|
122 |
+
"Aadhaar_DOB__c": _parse_birthdate(a.get("dob")) if a.get("card_type") == "AADHAAR" else None,
|
123 |
+
"PAN_Number__c": p.get("pan_number") if p.get("card_type") == "PAN" else None,
|
124 |
+
"Pan_Name__c": p.get("name") if p.get("card_type") == "PAN" else None,
|
125 |
+
"Pan_DOB__c": _parse_birthdate(p.get("dob")) if p.get("card_type") == "PAN" else None,
|
126 |
+
}
|
127 |
+
# Drop Nones so we don't try to set blanks on required fields
|
128 |
+
return {k: v for k, v in payload.items() if v is not None}
|
|
|
|
|
|
|
|
|
129 |
|
130 |
|
131 |
# ---------- Gradio callback ----------
|
132 |
def process_documents(aadhaar_file, pan_file, push_to_sf):
|
133 |
+
"""
|
134 |
+
OCR both uploads; optionally push one KYC_Record__c via simple-salesforce session POST.
|
135 |
+
"""
|
136 |
results = {"aadhaar": None, "pan": None}
|
137 |
|
138 |
if not aadhaar_file and not pan_file:
|
|
|
156 |
except Exception as e:
|
157 |
results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
|
158 |
|
159 |
+
output = {"ocr": results}
|
|
|
|
|
|
|
|
|
160 |
|
161 |
if push_to_sf:
|
162 |
+
payload = build_payload(results)
|
163 |
+
try:
|
164 |
+
sf = sf_login()
|
165 |
+
created = sf_create_kyc_via_session(sf, payload)
|
166 |
+
output["salesforce"] = {"pushed": created.get("success", False), **created}
|
167 |
+
except Exception as e:
|
168 |
+
# Any auth/connection error shown plainly (not wrapped by SDK)
|
169 |
+
output["salesforce"] = {
|
170 |
+
"pushed": False,
|
171 |
+
"error": {"type": e.__class__.__name__, "message": str(e)}
|
172 |
+
}
|
173 |
|
174 |
+
return output
|
175 |
|
176 |
|
177 |
# ---------- UI ----------
|
|
|
179 |
gr.Markdown(
|
180 |
"""
|
181 |
# 🧾 Smart KYC OCR → Salesforce
|
182 |
+
Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push one **KYC_Record__c**.
|
|
|
|
|
183 |
"""
|
184 |
)
|
185 |
|
|
|
189 |
with gr.Column(scale=1):
|
190 |
pan_uploader = gr.File(label="📤 PAN Upload", file_types=[".jpg", ".jpeg", ".png"])
|
191 |
|
192 |
+
push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False)
|
|
|
193 |
|
194 |
+
submit_btn = gr.Button("🔍 Extract KYC Info", variant="primary")
|
195 |
+
output_json = gr.JSON(label="📋 Output (OCR + Salesforce)")
|
196 |
|
197 |
submit_btn.click(
|
198 |
fn=process_documents,
|
199 |
inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
|
200 |
+
outputs=output_json,
|
201 |
)
|
202 |
|
203 |
if __name__ == "__main__":
|