gopichandra commited on
Commit
7706082
·
verified ·
1 Parent(s): a2e1ba8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -185
app.py CHANGED
@@ -4,6 +4,7 @@ os.environ["OMP_NUM_THREADS"] = "1"
4
  import re
5
  import json
6
  import datetime as dt
 
7
  from urllib.parse import urlparse
8
  import xml.etree.ElementTree as ET
9
 
@@ -15,17 +16,13 @@ from utils import extract_kyc_fields
15
  SF_USERNAME = "[email protected]"
16
  SF_PASSWORD = "Lic@2025"
17
  SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
18
- SF_DOMAIN = "login" # "login" for prod, "test" for sandbox (we will auto-try both)
19
- SF_API_VERSION = "60.0" # REST & Partner SOAP API version
20
  # -------------------------------------------------------------------------------
21
 
22
-
23
  # ---------- helpers ----------
24
  def _parse_birthdate(dob_text: str):
25
- """
26
- Normalize DOB to YYYY-MM-DD (Salesforce Date).
27
- Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year).
28
- """
29
  if not dob_text or dob_text == "Not found":
30
  return None
31
  s = dob_text.strip()
@@ -53,190 +50,146 @@ def _parse_birthdate(dob_text: str):
53
  return dt.date(y, 6, 15).isoformat()
54
  except ValueError:
55
  return None
56
-
57
  return None
58
 
59
 
60
- def _soap_login_once(username: str, password: str, domain: str, api_version: str):
61
- """
62
- One SOAP login attempt with given password (already concatenated or not).
63
- Returns (instance_url, session_id) on success; raises RuntimeError on failure with fault details.
64
- """
 
 
 
 
 
 
 
 
 
 
 
 
65
  endpoint = f"https://{domain}.salesforce.com/services/Soap/u/{api_version}"
66
- envelope = f"""<?xml version="1.0" encoding="utf-8" ?>
67
  <env:Envelope xmlns:xsd="http://www.w3.org/2001/XMLSchema"
68
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
69
  xmlns:env="http://schemas.xmlsoap.org/soap/envelope/">
70
  <env:Body>
71
  <n1:login xmlns:n1="urn:partner.soap.sforce.com">
72
  <n1:username>{username}</n1:username>
73
- <n1:password>{password}</n1:password>
74
  </n1:login>
75
  </env:Body>
76
  </env:Envelope>"""
 
 
77
 
78
- headers = {
79
- "Content-Type": "text/xml; charset=UTF-8",
80
- "SOAPAction": "login",
81
- }
82
-
83
- resp = requests.post(endpoint, data=envelope.encode("utf-8"), headers=headers, timeout=30)
84
- if resp.status_code != 200:
85
- # Try to parse a SOAP fault if present
86
- faultcode, faultstring = _extract_fault(resp.text)
87
- msg = f"HTTP {resp.status_code}"
88
- if faultcode or faultstring:
89
- msg += f" | faultcode={faultcode} | faultstring={faultstring}"
90
- raise RuntimeError(msg)
91
 
92
- # Parse XML to find sessionId and serverUrl
93
  try:
94
- root = ET.fromstring(resp.text)
95
  except ET.ParseError as e:
96
- raise RuntimeError(f"SOAP login parse error: {e}")
97
-
98
- session_id = None
99
- server_url = None
100
- for elem in root.iter():
101
- tag = elem.tag.split('}', 1)[-1] # strip namespace
102
- if tag == "sessionId":
103
- session_id = elem.text
104
- elif tag == "serverUrl":
105
- server_url = elem.text
106
-
107
- if not session_id or not server_url:
108
- faultcode, faultstring = _extract_fault(resp.text)
109
- raise RuntimeError(f"SOAP login failed: sessionId/serverUrl not found | faultcode={faultcode} | faultstring={faultstring}")
110
-
111
- parsed = urlparse(server_url)
112
  instance_url = f"{parsed.scheme}://{parsed.netloc}"
113
- return instance_url, session_id
114
 
115
 
116
- def _extract_fault(xml_text: str):
117
- """
118
- Extract SOAP faultcode and faultstring for clearer errors.
119
- """
120
- try:
121
- root = ET.fromstring(xml_text)
122
- except Exception:
123
- return None, None
124
- faultcode = None
125
- faultstring = None
126
- for elem in root.iter():
127
- tag = elem.tag.split('}', 1)[-1]
128
- if tag == "faultcode":
129
- faultcode = elem.text
130
- elif tag == "faultstring":
131
- faultstring = elem.text
132
- return faultcode, faultstring
133
-
134
-
135
- def soap_login_all_paths(username: str, password: str, token: str, preferred_domain: str = "login", api_version: str = SF_API_VERSION):
136
- """
137
- Try multiple safe login permutations:
138
- 1) preferred_domain with password+token
139
- 2) preferred_domain with password only
140
- 3) alternate domain with password+token
141
- 4) alternate domain with password only
142
-
143
- Returns (instance_url, session_id, diagnostics) on success.
144
- On failure, raises RuntimeError with aggregated diagnostics.
145
- """
146
- domains = [preferred_domain] + [d for d in ["login", "test"] if d != preferred_domain]
147
- attempts = []
148
- for domain in domains:
149
- for mode in ["pw_token", "pw_only"]:
150
- pw = f"{password}{token}" if mode == "pw_token" else password
151
- try:
152
- instance_url, session_id = _soap_login_once(username, pw, domain, api_version)
153
- diag = {"domain": domain, "mode": mode, "result": "success"}
154
- attempts.append(diag)
155
- return instance_url, session_id, attempts
156
- except Exception as e:
157
- # Collect reason but keep trying
158
- attempts.append({"domain": domain, "mode": mode, "result": "fail", "reason": str(e)})
159
-
160
- # If we’re here, all attempts failed
161
- raise RuntimeError(json.dumps({
162
- "message": "All SOAP login attempts failed",
163
- "attempts": attempts
164
- }))
165
-
166
-
167
- def rest_create(instance_url: str, access_token: str, object_api: str, payload: dict, api_version: str = SF_API_VERSION):
168
- """
169
- Create a record via REST API. Returns dict with success flag and full details.
170
- """
171
  url = f"{instance_url}/services/data/v{api_version}/sobjects/{object_api}"
172
- headers = {
173
- "Authorization": f"Bearer {access_token}",
174
- "Content-Type": "application/json",
175
- }
176
- resp = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
177
 
178
  try:
179
- body = resp.json()
180
  except Exception:
181
  body = None
182
 
183
- if 200 <= resp.status_code < 300:
184
  rec_id = body.get("id") if isinstance(body, dict) else None
185
- return {"success": True, "id": rec_id, "status_code": resp.status_code, "url": url, "response_json": body}
186
- else:
187
- return {
188
- "success": False,
189
- "status_code": resp.status_code,
190
- "url": url,
191
- "response_json": body,
192
- "response_text": resp.text,
193
- }
194
 
195
 
196
- def sf_push_kyc_record(ocr_results: dict):
197
- """
198
- Combine Aadhaar + PAN into one KYC_Record__c and create via REST.
199
- Fields expected (API names):
200
- Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date)
201
- PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date)
202
- """
 
203
  a = ocr_results.get("aadhaar") or {}
204
  p = ocr_results.get("pan") or {}
205
 
206
- aadhaar_number = a.get("aadhaar_number") if (a.get("card_type") == "AADHAAR") else None
207
- aadhaar_name = a.get("name") if (a.get("card_type") == "AADHAAR") else None
208
- aadhaar_dob = _parse_birthdate(a.get("dob")) if (a.get("card_type") == "AADHAAR") else None
209
-
210
- pan_number = p.get("pan_number") if (p.get("card_type") == "PAN") else None
211
- pan_name = p.get("name") if (p.get("card_type") == "PAN") else None
212
- pan_dob = _parse_birthdate(p.get("dob")) if (p.get("card_type") == "PAN") else None
213
-
214
- payload = {
215
- "Aadhaar_Number__c": aadhaar_number,
216
- "Aadhaar_Name__c": aadhaar_name,
217
- "Aadhaar_DOB__c": aadhaar_dob,
218
- "PAN_Number__c": pan_number,
219
- "Pan_Name__c": pan_name,
220
- "Pan_DOB__c": pan_dob,
221
  }
222
- payload = {k: v for k, v in payload.items() if v is not None}
223
 
224
- # 1) SOAP login (auto-tries domains & password modes)
225
- instance_url, access_token, diagnostics = soap_login_all_paths(
226
- SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, preferred_domain=SF_DOMAIN, api_version=SF_API_VERSION
227
- )
228
- # 2) REST create
229
- rest_res = rest_create(instance_url, access_token, "KYC_Record__c", payload, SF_API_VERSION)
230
- rest_res["login_diagnostics"] = diagnostics
231
- return rest_res
 
 
232
 
233
 
234
- # ---------- gradio callback ----------
235
- def process_documents(aadhaar_file, pan_file, push_to_sf):
236
  """
237
- - Runs OCR on Aadhaar and PAN separately.
238
- - Optionally pushes a single KYC_Record__c to Salesforce via SOAP+REST.
239
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  results = {"aadhaar": None, "pan": None}
241
 
242
  if not aadhaar_file and not pan_file:
@@ -260,19 +213,17 @@ def process_documents(aadhaar_file, pan_file, push_to_sf):
260
  except Exception as e:
261
  results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
262
 
263
- output = {"ocr": results}
 
 
 
 
264
 
265
  if push_to_sf:
266
- try:
267
- created = sf_push_kyc_record(results)
268
- output["salesforce"] = {"pushed": created.get("success", False), **created}
269
- except Exception as e:
270
- output["salesforce"] = {
271
- "pushed": False,
272
- "error": {"type": e.__class__.__name__, "message": str(e)}
273
- }
274
 
275
- return output
276
 
277
 
278
  # ---------- UI ----------
@@ -280,41 +231,29 @@ with gr.Blocks(title="Smart KYC OCR → Salesforce (KYC_Record__c)") as demo:
280
  gr.Markdown(
281
  """
282
  # 🧾 Smart KYC OCR → Salesforce
283
- Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push one **KYC_Record__c**.
 
 
284
  """
285
  )
286
 
287
  with gr.Row():
288
  with gr.Column(scale=1):
289
- aadhaar_uploader = gr.File(
290
- label="📤 Aadhaar Upload",
291
- file_types=[".jpg", ".jpeg", ".png"]
292
- )
293
  with gr.Column(scale=1):
294
- pan_uploader = gr.File(
295
- label="📤 PAN Upload",
296
- file_types=[".jpg", ".jpeg", ".png"]
297
- )
298
-
299
- push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False)
300
 
 
301
  submit_btn = gr.Button("🔍 Extract KYC Info", variant="primary")
302
- output_json = gr.JSON(label="📋 Output (OCR + Salesforce)")
 
 
303
 
304
  submit_btn.click(
305
  fn=process_documents,
306
  inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
307
- outputs=output_json,
308
- )
309
-
310
- gr.Markdown("---")
311
- gr.Markdown(
312
- """
313
- On failure you’ll see a `login_diagnostics` list (each attempt with domain+mode+reason)
314
- and REST `status_code/url/response_json`. This eliminates the SDK TypeError and pinpoints issues fast.
315
- """
316
  )
317
 
318
- # Keep `demo` available for local run / Spaces
319
  if __name__ == "__main__":
320
  demo.launch()
 
4
  import re
5
  import json
6
  import datetime as dt
7
+ import csv
8
  from urllib.parse import urlparse
9
  import xml.etree.ElementTree as ET
10
 
 
16
  SF_USERNAME = "[email protected]"
17
  SF_PASSWORD = "Lic@2025"
18
  SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
19
+ SF_DOMAIN = "login" # "login" (prod/dev) or "test" (sandbox)
20
+ SF_API_VERSION = "60.0" # Partner & REST API version
21
  # -------------------------------------------------------------------------------
22
 
 
23
  # ---------- helpers ----------
24
  def _parse_birthdate(dob_text: str):
25
+ """Normalize common DOB formats to YYYY-MM-DD (Salesforce Date)."""
 
 
 
26
  if not dob_text or dob_text == "Not found":
27
  return None
28
  s = dob_text.strip()
 
50
  return dt.date(y, 6, 15).isoformat()
51
  except ValueError:
52
  return None
 
53
  return None
54
 
55
 
56
+ def _extract_fault(xml_text: str):
57
+ """Pull faultcode/faultstring out of a SOAP envelope for clearer errors."""
58
+ try:
59
+ root = ET.fromstring(xml_text)
60
+ except Exception:
61
+ return None, None
62
+ fc = fs = None
63
+ for e in root.iter():
64
+ tag = e.tag.split('}', 1)[-1]
65
+ if tag == "faultcode": fc = e.text
66
+ if tag == "faultstring": fs = e.text
67
+ return fc, fs
68
+
69
+
70
+ # ---------- AUTH (SOAP login) ----------
71
+ def soap_login(username, password, token, domain="login", api_version=SF_API_VERSION):
72
+ """Log in via Partner SOAP. Returns dict with success flag + details."""
73
  endpoint = f"https://{domain}.salesforce.com/services/Soap/u/{api_version}"
74
+ payload = f"""<?xml version="1.0" encoding="utf-8" ?>
75
  <env:Envelope xmlns:xsd="http://www.w3.org/2001/XMLSchema"
76
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
77
  xmlns:env="http://schemas.xmlsoap.org/soap/envelope/">
78
  <env:Body>
79
  <n1:login xmlns:n1="urn:partner.soap.sforce.com">
80
  <n1:username>{username}</n1:username>
81
+ <n1:password>{password}{token}</n1:password>
82
  </n1:login>
83
  </env:Body>
84
  </env:Envelope>"""
85
+ headers = {"Content-Type": "text/xml; charset=UTF-8", "SOAPAction": "login"}
86
+ r = requests.post(endpoint, data=payload.encode("utf-8"), headers=headers, timeout=30)
87
 
88
+ if r.status_code != 200:
89
+ fc, fs = _extract_fault(r.text)
90
+ return {
91
+ "success": False,
92
+ "status_code": r.status_code,
93
+ "url": endpoint,
94
+ "faultcode": fc, "faultstring": fs,
95
+ "response_text": r.text[:800]
96
+ }
 
 
 
 
97
 
 
98
  try:
99
+ root = ET.fromstring(r.text)
100
  except ET.ParseError as e:
101
+ return {"success": False, "error": f"SOAP parse error: {e}"}
102
+
103
+ sid = srv = None
104
+ for e in root.iter():
105
+ tag = e.tag.split('}', 1)[-1]
106
+ if tag == "sessionId": sid = e.text
107
+ if tag == "serverUrl": srv = e.text
108
+
109
+ if not sid or not srv:
110
+ fc, fs = _extract_fault(r.text)
111
+ return {"success": False, "error": "SOAP login missing sessionId/serverUrl", "faultcode": fc, "faultstring": fs}
112
+
113
+ parsed = urlparse(srv)
 
 
 
114
  instance_url = f"{parsed.scheme}://{parsed.netloc}"
115
+ return {"success": True, "instance_url": instance_url, "access_token": sid}
116
 
117
 
118
+ def rest_create(instance_url: str, access_token: str, object_api: str, payload: dict, api_version=SF_API_VERSION):
119
+ """Create a record via REST. Returns dict with success flag + details."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  url = f"{instance_url}/services/data/v{api_version}/sobjects/{object_api}"
121
+ headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json"}
122
+ r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
 
 
 
123
 
124
  try:
125
+ body = r.json()
126
  except Exception:
127
  body = None
128
 
129
+ if 200 <= r.status_code < 300:
130
  rec_id = body.get("id") if isinstance(body, dict) else None
131
+ return {"success": True, "id": rec_id, "status_code": r.status_code, "url": url, "response_json": body}
132
+ return {"success": False, "status_code": r.status_code, "url": url, "response_json": body, "response_text": r.text}
 
 
 
 
 
 
 
133
 
134
 
135
+ # ---------- Build payload + push (and always export CSV) ----------
136
+ CSV_HEADERS = [
137
+ "Aadhaar_Number__c", "Aadhaar_Name__c", "Aadhaar_DOB__c",
138
+ "PAN_Number__c", "Pan_Name__c", "Pan_DOB__c"
139
+ ]
140
+
141
+ def build_kyc_row(ocr_results: dict):
142
+ """Map OCR results to your KYC_Record__c fields."""
143
  a = ocr_results.get("aadhaar") or {}
144
  p = ocr_results.get("pan") or {}
145
 
146
+ return {
147
+ "Aadhaar_Number__c": a.get("aadhaar_number") if (a.get("card_type") == "AADHAAR") else None,
148
+ "Aadhaar_Name__c": a.get("name") if (a.get("card_type") == "AADHAAR") else None,
149
+ "Aadhaar_DOB__c": _parse_birthdate(a.get("dob")) if (a.get("card_type") == "AADHAAR") else None,
150
+ "PAN_Number__c": p.get("pan_number") if (p.get("card_type") == "PAN") else None,
151
+ "Pan_Name__c": p.get("name") if (p.get("card_type") == "PAN") else None,
152
+ "Pan_DOB__c": _parse_birthdate(p.get("dob")) if (p.get("card_type") == "PAN") else None,
 
 
 
 
 
 
 
 
153
  }
 
154
 
155
+ def write_csv(row: dict, path: str):
156
+ """Write a single-row CSV with your exact field API names."""
157
+ os.makedirs(os.path.dirname(path), exist_ok=True)
158
+ with open(path, "w", newline="", encoding="utf-8") as f:
159
+ w = csv.DictWriter(f, fieldnames=CSV_HEADERS)
160
+ w.writeheader()
161
+ # Keep only the headers in order; missing keys become empty.
162
+ cleaned = {k: (row.get(k) or "") for k in CSV_HEADERS}
163
+ w.writerow(cleaned)
164
+ return path
165
 
166
 
167
+ def push_kyc_record(ocr_results: dict):
 
168
  """
169
+ Try API push (SOAP login + REST create). Return detailed result.
170
+ If API login is blocked (INVALID_LOGIN), you'll still have the CSV file from write_csv().
171
  """
172
+ # 1) Build payload for REST
173
+ payload = build_kyc_row(ocr_results)
174
+ payload = {k: v for k, v in payload.items() if v not in (None, "")}
175
+
176
+ # 2) Try SOAP login on preferred domain, then alternate
177
+ primary = soap_login(SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_DOMAIN)
178
+ if not primary.get("success"):
179
+ alt_domain = "test" if SF_DOMAIN == "login" else "login"
180
+ alternate = soap_login(SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, alt_domain)
181
+ return {"success": False, "auth": {"primary": primary, "alternate": alternate}}
182
+
183
+ # 3) Create record via REST
184
+ instance_url = primary["instance_url"]
185
+ access_token = primary["access_token"]
186
+ created = rest_create(instance_url, access_token, "KYC_Record__c", payload)
187
+ created["auth_used"] = {"domain": SF_DOMAIN}
188
+ return created
189
+
190
+
191
+ # ---------- Gradio callback ----------
192
+ def process_documents(aadhaar_file, pan_file, push_to_sf):
193
  results = {"aadhaar": None, "pan": None}
194
 
195
  if not aadhaar_file and not pan_file:
 
213
  except Exception as e:
214
  results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
215
 
216
+ # Always export CSV (works even if API login is blocked)
217
+ csv_row = build_kyc_row(results)
218
+ csv_path = write_csv(csv_row, "/mnt/data/KYC_Record_upload.csv")
219
+
220
+ output = {"ocr": results, "csv_file": csv_path}
221
 
222
  if push_to_sf:
223
+ push_res = push_kyc_record(results)
224
+ output["salesforce"] = {"pushed": push_res.get("success", False), **push_res}
 
 
 
 
 
 
225
 
226
+ return output, csv_path
227
 
228
 
229
  # ---------- UI ----------
 
231
  gr.Markdown(
232
  """
233
  # 🧾 Smart KYC OCR → Salesforce
234
+ Upload **Aadhaar** and **PAN** separately.
235
+ Click **Extract KYC Info** to get OCR + CSV (for Data Import Wizard).
236
+ Optionally, toggle **Push to Salesforce** (API) — may fail on free/Essentials orgs.
237
  """
238
  )
239
 
240
  with gr.Row():
241
  with gr.Column(scale=1):
242
+ aadhaar_uploader = gr.File(label="📤 Aadhaar Upload", file_types=[".jpg", ".jpeg", ".png"])
 
 
 
243
  with gr.Column(scale=1):
244
+ pan_uploader = gr.File(label="📤 PAN Upload", file_types=[".jpg", ".jpeg", ".png"])
 
 
 
 
 
245
 
246
+ push_to_sf = gr.Checkbox(label="Push to Salesforce via API (create KYC_Record__c)", value=False)
247
  submit_btn = gr.Button("🔍 Extract KYC Info", variant="primary")
248
+
249
+ output_json = gr.JSON(label="📋 Output (OCR + Salesforce/Diagnostics)")
250
+ csv_download = gr.File(label="⬇️ CSV for Data Import Wizard")
251
 
252
  submit_btn.click(
253
  fn=process_documents,
254
  inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
255
+ outputs=[output_json, csv_download],
 
 
 
 
 
 
 
 
256
  )
257
 
 
258
  if __name__ == "__main__":
259
  demo.launch()