gopichandra commited on
Commit
efdc972
·
verified ·
1 Parent(s): 7706082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -144
app.py CHANGED
@@ -4,25 +4,26 @@ os.environ["OMP_NUM_THREADS"] = "1"
4
  import re
5
  import json
6
  import datetime as dt
7
- import csv
8
- from urllib.parse import urlparse
9
- import xml.etree.ElementTree as ET
10
-
11
- import requests
12
  import gradio as gr
13
  from utils import extract_kyc_fields
14
 
15
- # ------------------ HARD-CODED SALESFORCE CREDS (as requested) ------------------
16
  SF_USERNAME = "[email protected]"
17
  SF_PASSWORD = "Lic@2025"
18
  SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
19
- SF_DOMAIN = "login" # "login" (prod/dev) or "test" (sandbox)
20
- SF_API_VERSION = "60.0" # Partner & REST API version
21
- # -------------------------------------------------------------------------------
 
 
 
22
 
23
  # ---------- helpers ----------
24
  def _parse_birthdate(dob_text: str):
25
- """Normalize common DOB formats to YYYY-MM-DD (Salesforce Date)."""
 
 
 
26
  if not dob_text or dob_text == "Not found":
27
  return None
28
  s = dob_text.strip()
@@ -50,146 +51,88 @@ def _parse_birthdate(dob_text: str):
50
  return dt.date(y, 6, 15).isoformat()
51
  except ValueError:
52
  return None
53
- return None
54
-
55
-
56
- def _extract_fault(xml_text: str):
57
- """Pull faultcode/faultstring out of a SOAP envelope for clearer errors."""
58
- try:
59
- root = ET.fromstring(xml_text)
60
- except Exception:
61
- return None, None
62
- fc = fs = None
63
- for e in root.iter():
64
- tag = e.tag.split('}', 1)[-1]
65
- if tag == "faultcode": fc = e.text
66
- if tag == "faultstring": fs = e.text
67
- return fc, fs
68
-
69
-
70
- # ---------- AUTH (SOAP login) ----------
71
- def soap_login(username, password, token, domain="login", api_version=SF_API_VERSION):
72
- """Log in via Partner SOAP. Returns dict with success flag + details."""
73
- endpoint = f"https://{domain}.salesforce.com/services/Soap/u/{api_version}"
74
- payload = f"""<?xml version="1.0" encoding="utf-8" ?>
75
- <env:Envelope xmlns:xsd="http://www.w3.org/2001/XMLSchema"
76
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
77
- xmlns:env="http://schemas.xmlsoap.org/soap/envelope/">
78
- <env:Body>
79
- <n1:login xmlns:n1="urn:partner.soap.sforce.com">
80
- <n1:username>{username}</n1:username>
81
- <n1:password>{password}{token}</n1:password>
82
- </n1:login>
83
- </env:Body>
84
- </env:Envelope>"""
85
- headers = {"Content-Type": "text/xml; charset=UTF-8", "SOAPAction": "login"}
86
- r = requests.post(endpoint, data=payload.encode("utf-8"), headers=headers, timeout=30)
87
-
88
- if r.status_code != 200:
89
- fc, fs = _extract_fault(r.text)
90
- return {
91
- "success": False,
92
- "status_code": r.status_code,
93
- "url": endpoint,
94
- "faultcode": fc, "faultstring": fs,
95
- "response_text": r.text[:800]
96
- }
97
-
98
- try:
99
- root = ET.fromstring(r.text)
100
- except ET.ParseError as e:
101
- return {"success": False, "error": f"SOAP parse error: {e}"}
102
 
103
- sid = srv = None
104
- for e in root.iter():
105
- tag = e.tag.split('}', 1)[-1]
106
- if tag == "sessionId": sid = e.text
107
- if tag == "serverUrl": srv = e.text
108
 
109
- if not sid or not srv:
110
- fc, fs = _extract_fault(r.text)
111
- return {"success": False, "error": "SOAP login missing sessionId/serverUrl", "faultcode": fc, "faultstring": fs}
112
 
113
- parsed = urlparse(srv)
114
- instance_url = f"{parsed.scheme}://{parsed.netloc}"
115
- return {"success": True, "instance_url": instance_url, "access_token": sid}
 
 
 
 
 
 
 
 
 
 
 
116
 
117
 
118
- def rest_create(instance_url: str, access_token: str, object_api: str, payload: dict, api_version=SF_API_VERSION):
119
- """Create a record via REST. Returns dict with success flag + details."""
120
- url = f"{instance_url}/services/data/v{api_version}/sobjects/{object_api}"
121
- headers = {"Authorization": f"Bearer {access_token}", "Content-Type": "application/json"}
122
- r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
 
 
 
 
123
 
124
  try:
125
- body = r.json()
126
  except Exception:
127
  body = None
128
 
129
- if 200 <= r.status_code < 300:
130
  rec_id = body.get("id") if isinstance(body, dict) else None
131
- return {"success": True, "id": rec_id, "status_code": r.status_code, "url": url, "response_json": body}
132
- return {"success": False, "status_code": r.status_code, "url": url, "response_json": body, "response_text": r.text}
133
-
134
-
135
- # ---------- Build payload + push (and always export CSV) ----------
136
- CSV_HEADERS = [
137
- "Aadhaar_Number__c", "Aadhaar_Name__c", "Aadhaar_DOB__c",
138
- "PAN_Number__c", "Pan_Name__c", "Pan_DOB__c"
139
- ]
140
-
141
- def build_kyc_row(ocr_results: dict):
142
- """Map OCR results to your KYC_Record__c fields."""
143
- a = ocr_results.get("aadhaar") or {}
144
- p = ocr_results.get("pan") or {}
145
 
 
146
  return {
147
- "Aadhaar_Number__c": a.get("aadhaar_number") if (a.get("card_type") == "AADHAAR") else None,
148
- "Aadhaar_Name__c": a.get("name") if (a.get("card_type") == "AADHAAR") else None,
149
- "Aadhaar_DOB__c": _parse_birthdate(a.get("dob")) if (a.get("card_type") == "AADHAAR") else None,
150
- "PAN_Number__c": p.get("pan_number") if (p.get("card_type") == "PAN") else None,
151
- "Pan_Name__c": p.get("name") if (p.get("card_type") == "PAN") else None,
152
- "Pan_DOB__c": _parse_birthdate(p.get("dob")) if (p.get("card_type") == "PAN") else None,
153
  }
154
 
155
- def write_csv(row: dict, path: str):
156
- """Write a single-row CSV with your exact field API names."""
157
- os.makedirs(os.path.dirname(path), exist_ok=True)
158
- with open(path, "w", newline="", encoding="utf-8") as f:
159
- w = csv.DictWriter(f, fieldnames=CSV_HEADERS)
160
- w.writeheader()
161
- # Keep only the headers in order; missing keys become empty.
162
- cleaned = {k: (row.get(k) or "") for k in CSV_HEADERS}
163
- w.writerow(cleaned)
164
- return path
165
 
166
-
167
- def push_kyc_record(ocr_results: dict):
168
  """
169
- Try API push (SOAP login + REST create). Return detailed result.
170
- If API login is blocked (INVALID_LOGIN), you'll still have the CSV file from write_csv().
 
 
171
  """
172
- # 1) Build payload for REST
173
- payload = build_kyc_row(ocr_results)
174
- payload = {k: v for k, v in payload.items() if v not in (None, "")}
175
-
176
- # 2) Try SOAP login on preferred domain, then alternate
177
- primary = soap_login(SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_DOMAIN)
178
- if not primary.get("success"):
179
- alt_domain = "test" if SF_DOMAIN == "login" else "login"
180
- alternate = soap_login(SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, alt_domain)
181
- return {"success": False, "auth": {"primary": primary, "alternate": alternate}}
182
-
183
- # 3) Create record via REST
184
- instance_url = primary["instance_url"]
185
- access_token = primary["access_token"]
186
- created = rest_create(instance_url, access_token, "KYC_Record__c", payload)
187
- created["auth_used"] = {"domain": SF_DOMAIN}
188
- return created
189
 
190
 
191
  # ---------- Gradio callback ----------
192
  def process_documents(aadhaar_file, pan_file, push_to_sf):
 
 
 
193
  results = {"aadhaar": None, "pan": None}
194
 
195
  if not aadhaar_file and not pan_file:
@@ -213,17 +156,22 @@ def process_documents(aadhaar_file, pan_file, push_to_sf):
213
  except Exception as e:
214
  results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
215
 
216
- # Always export CSV (works even if API login is blocked)
217
- csv_row = build_kyc_row(results)
218
- csv_path = write_csv(csv_row, "/mnt/data/KYC_Record_upload.csv")
219
-
220
- output = {"ocr": results, "csv_file": csv_path}
221
 
222
  if push_to_sf:
223
- push_res = push_kyc_record(results)
224
- output["salesforce"] = {"pushed": push_res.get("success", False), **push_res}
 
 
 
 
 
 
 
 
 
225
 
226
- return output, csv_path
227
 
228
 
229
  # ---------- UI ----------
@@ -231,9 +179,7 @@ with gr.Blocks(title="Smart KYC OCR → Salesforce (KYC_Record__c)") as demo:
231
  gr.Markdown(
232
  """
233
  # 🧾 Smart KYC OCR → Salesforce
234
- Upload **Aadhaar** and **PAN** separately.
235
- Click **Extract KYC Info** to get OCR + CSV (for Data Import Wizard).
236
- Optionally, toggle **Push to Salesforce** (API) — may fail on free/Essentials orgs.
237
  """
238
  )
239
 
@@ -243,16 +189,15 @@ with gr.Blocks(title="Smart KYC OCR → Salesforce (KYC_Record__c)") as demo:
243
  with gr.Column(scale=1):
244
  pan_uploader = gr.File(label="📤 PAN Upload", file_types=[".jpg", ".jpeg", ".png"])
245
 
246
- push_to_sf = gr.Checkbox(label="Push to Salesforce via API (create KYC_Record__c)", value=False)
247
- submit_btn = gr.Button("🔍 Extract KYC Info", variant="primary")
248
 
249
- output_json = gr.JSON(label="📋 Output (OCR + Salesforce/Diagnostics)")
250
- csv_download = gr.File(label="⬇️ CSV for Data Import Wizard")
251
 
252
  submit_btn.click(
253
  fn=process_documents,
254
  inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
255
- outputs=[output_json, csv_download],
256
  )
257
 
258
  if __name__ == "__main__":
 
4
  import re
5
  import json
6
  import datetime as dt
 
 
 
 
 
7
  import gradio as gr
8
  from utils import extract_kyc_fields
9
 
10
+ # ------------------ HARD-CODED SALESFORCE CREDS ------------------
11
  SF_USERNAME = "[email protected]"
12
  SF_PASSWORD = "Lic@2025"
13
  SF_SECURITY_TOKEN = "AmmfRcd6IiYaRtSGntBnzNMQU"
14
+ SF_DOMAIN = "login" # "login" for prod/dev, "test" for sandbox
15
+ # ---------------------------------------------------------------
16
+
17
+ # simple-salesforce 1.11.6
18
+ from simple_salesforce import Salesforce
19
+
20
 
21
  # ---------- helpers ----------
22
  def _parse_birthdate(dob_text: str):
23
+ """
24
+ Normalize DOB to YYYY-MM-DD for Salesforce Date fields.
25
+ Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year).
26
+ """
27
  if not dob_text or dob_text == "Not found":
28
  return None
29
  s = dob_text.strip()
 
51
  return dt.date(y, 6, 15).isoformat()
52
  except ValueError:
53
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ return None
 
 
 
 
56
 
 
 
 
57
 
58
+ def sf_login():
59
+ """
60
+ Login with simple-salesforce (v1.11.6). If creds are invalid or API is blocked,
61
+ Salesforce will raise here. We'll surface the raw error to the UI.
62
+ """
63
+ sf = Salesforce(
64
+ username=SF_USERNAME,
65
+ password=SF_PASSWORD,
66
+ security_token=SF_SECURITY_TOKEN,
67
+ domain=SF_DOMAIN,
68
+ )
69
+ # Lightweight check so bad auth is surfaced immediately
70
+ sf.query("SELECT Id FROM User LIMIT 1")
71
+ return sf
72
 
73
 
74
+ def sf_create_kyc_via_session(sf, payload: dict):
75
+ """
76
+ Use the same authenticated session from simple-salesforce to POST directly
77
+ to the REST sObject endpoint. This avoids the SDK's create() wrapper and
78
+ the 'SalesforceError.__init__() missing ...' TypeError you've been seeing.
79
+ """
80
+ # In simple-salesforce 1.11.6, base_url ends with `/services/data/vXX.X/`
81
+ url = f"{sf.base_url}sobjects/KYC_Record__c"
82
+ resp = sf.session.post(url, json=payload, headers=sf.headers, timeout=30)
83
 
84
  try:
85
+ body = resp.json()
86
  except Exception:
87
  body = None
88
 
89
+ if 200 <= resp.status_code < 300:
90
  rec_id = body.get("id") if isinstance(body, dict) else None
91
+ return {
92
+ "success": True,
93
+ "id": rec_id,
94
+ "status_code": resp.status_code,
95
+ "url": url,
96
+ "response_json": body
97
+ }
 
 
 
 
 
 
 
98
 
99
+ # Return *raw* details (no SDK exception wrapper involved)
100
  return {
101
+ "success": False,
102
+ "status_code": resp.status_code,
103
+ "url": url,
104
+ "response_json": body,
105
+ "response_text": resp.text,
 
106
  }
107
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+ def build_payload(ocr_results: dict):
 
110
  """
111
+ Map OCR outputs to your Salesforce fields on KYC_Record__c.
112
+ Fields:
113
+ Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date)
114
+ PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date)
115
  """
116
+ a = (ocr_results.get("aadhaar") or {})
117
+ p = (ocr_results.get("pan") or {})
118
+
119
+ payload = {
120
+ "Aadhaar_Number__c": a.get("aadhaar_number") if a.get("card_type") == "AADHAAR" else None,
121
+ "Aadhaar_Name__c": a.get("name") if a.get("card_type") == "AADHAAR" else None,
122
+ "Aadhaar_DOB__c": _parse_birthdate(a.get("dob")) if a.get("card_type") == "AADHAAR" else None,
123
+ "PAN_Number__c": p.get("pan_number") if p.get("card_type") == "PAN" else None,
124
+ "Pan_Name__c": p.get("name") if p.get("card_type") == "PAN" else None,
125
+ "Pan_DOB__c": _parse_birthdate(p.get("dob")) if p.get("card_type") == "PAN" else None,
126
+ }
127
+ # Drop Nones so we don't try to set blanks on required fields
128
+ return {k: v for k, v in payload.items() if v is not None}
 
 
 
 
129
 
130
 
131
  # ---------- Gradio callback ----------
132
  def process_documents(aadhaar_file, pan_file, push_to_sf):
133
+ """
134
+ OCR both uploads; optionally push one KYC_Record__c via simple-salesforce session POST.
135
+ """
136
  results = {"aadhaar": None, "pan": None}
137
 
138
  if not aadhaar_file and not pan_file:
 
156
  except Exception as e:
157
  results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
158
 
159
+ output = {"ocr": results}
 
 
 
 
160
 
161
  if push_to_sf:
162
+ payload = build_payload(results)
163
+ try:
164
+ sf = sf_login()
165
+ created = sf_create_kyc_via_session(sf, payload)
166
+ output["salesforce"] = {"pushed": created.get("success", False), **created}
167
+ except Exception as e:
168
+ # Any auth/connection error shown plainly (not wrapped by SDK)
169
+ output["salesforce"] = {
170
+ "pushed": False,
171
+ "error": {"type": e.__class__.__name__, "message": str(e)}
172
+ }
173
 
174
+ return output
175
 
176
 
177
  # ---------- UI ----------
 
179
  gr.Markdown(
180
  """
181
  # 🧾 Smart KYC OCR → Salesforce
182
+ Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push one **KYC_Record__c**.
 
 
183
  """
184
  )
185
 
 
189
  with gr.Column(scale=1):
190
  pan_uploader = gr.File(label="📤 PAN Upload", file_types=[".jpg", ".jpeg", ".png"])
191
 
192
+ push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False)
 
193
 
194
+ submit_btn = gr.Button("🔍 Extract KYC Info", variant="primary")
195
+ output_json = gr.JSON(label="📋 Output (OCR + Salesforce)")
196
 
197
  submit_btn.click(
198
  fn=process_documents,
199
  inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
200
+ outputs=output_json,
201
  )
202
 
203
  if __name__ == "__main__":