gopichandra commited on
Commit
52f3a2e
Β·
verified Β·
1 Parent(s): 20e1d9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -17
app.py CHANGED
@@ -1,47 +1,179 @@
1
  import os
2
  os.environ["OMP_NUM_THREADS"] = "1"
3
 
 
 
4
  import gradio as gr
5
  from utils import extract_kyc_fields
6
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- def process_documents(aadhaar_file, pan_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  """
10
- Takes one Aadhaar file and one PAN file,
11
- extracts KYC fields, and returns combined JSON.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  """
13
  results = {"aadhaar": None, "pan": None}
14
 
 
 
 
 
15
  if aadhaar_file:
16
  try:
17
  res = extract_kyc_fields(aadhaar_file.name)
18
  res["source_file"] = os.path.basename(aadhaar_file.name)
19
  results["aadhaar"] = res
20
  except Exception as e:
21
- results["aadhaar"] = {"error": f"Aadhaar OCR failed: {str(e)}"}
22
 
 
23
  if pan_file:
24
  try:
25
  res = extract_kyc_fields(pan_file.name)
26
  res["source_file"] = os.path.basename(pan_file.name)
27
  results["pan"] = res
28
  except Exception as e:
29
- results["pan"] = {"error": f"PAN OCR failed: {str(e)}"}
30
 
31
- if not aadhaar_file and not pan_file:
32
- return {"error": "Please upload both Aadhaar and PAN files."}
33
 
34
- return results
 
 
 
 
 
 
35
 
 
36
 
37
- with gr.Blocks(title="Smart KYC OCR") as demo:
 
 
38
  gr.Markdown(
39
  """
40
- # 🧾 Smart KYC OCR Tool
41
- Upload an **Aadhaar card** and a **PAN card** separately.
42
- Click **Extract KYC Info** to get structured output.
43
 
44
- ---
 
 
45
  """
46
  )
47
 
@@ -57,20 +189,21 @@ with gr.Blocks(title="Smart KYC OCR") as demo:
57
  file_types=[".jpg", ".jpeg", ".png"]
58
  )
59
 
 
 
60
  submit_btn = gr.Button("πŸ” Extract KYC Info", variant="primary")
61
- output_json = gr.JSON(label="πŸ“‹ Extracted KYC Fields")
62
 
63
  submit_btn.click(
64
  fn=process_documents,
65
- inputs=[aadhaar_uploader, pan_uploader],
66
  outputs=output_json,
67
  )
68
 
69
  gr.Markdown("---")
70
  gr.Markdown(
71
  """
72
- πŸ”’ **Privacy Note:** This app processes your documents locally in the cloud.
73
- No data is stored or shared.
74
  """
75
  )
76
 
 
1
  import os
2
  os.environ["OMP_NUM_THREADS"] = "1"
3
 
4
+ import re
5
+ import datetime as dt
6
  import gradio as gr
7
  from utils import extract_kyc_fields
8
 
9
+ # ------------------------------------------------------------------
10
+ # πŸ” CREDENTIALS STRATEGY
11
+ # 1) Preferred: read from environment (HF Space Secrets / .env).
12
+ # 2) Fallback (NOT RECOMMENDED): hardcoded defaults below.
13
+ # If you insist, replace the REPLACE_ME values locally.
14
+ # ------------------------------------------------------------------
15
+ SF_DEFAULT_USERNAME = "REPLACE_ME_USERNAME" # e.g., "[email protected]"
16
+ SF_DEFAULT_PASSWORD = "REPLACE_ME_PASSWORD" # e.g., "Lic@2025"
17
+ SF_DEFAULT_TOKEN = "REPLACE_ME_SECURITY_TOKEN" # e.g., "AmmfRcd6IiYaRtSGntBnzNMQU"
18
+ SF_DEFAULT_DOMAIN = "login" # "login" (prod) or "test" (sandbox)
19
+ # ------------------------------------------------------------------
20
 
21
+ # simple-salesforce is required for SF push
22
+ try:
23
+ from simple_salesforce import Salesforce
24
+ SF_AVAILABLE = True
25
+ except Exception:
26
+ SF_AVAILABLE = False
27
+
28
+
29
+ # ---------- helpers ----------
30
+ def _parse_birthdate(dob_text: str):
31
+ """
32
+ Normalize DOB to YYYY-MM-DD for Salesforce Date fields.
33
+ Supports dd/mm/yyyy, dd-mm-yyyy, dd.mm.yyyy, yyyy-mm-dd, or just YYYY (mapped to mid-year).
34
+ """
35
+ if not dob_text or dob_text == "Not found":
36
+ return None
37
+ s = dob_text.strip()
38
+
39
+ m = re.fullmatch(r"(\d{4})-(\d{2})-(\d{2})", s)
40
+ if m:
41
+ y, mo, d = map(int, m.groups())
42
+ try:
43
+ return dt.date(y, mo, d).isoformat()
44
+ except ValueError:
45
+ return None
46
+
47
+ m = re.fullmatch(r"(\d{2})[./-](\d{2})[./-](\d{4})", s)
48
+ if m:
49
+ d, mo, y = map(int, m.groups())
50
+ try:
51
+ return dt.date(y, mo, d).isoformat()
52
+ except ValueError:
53
+ return None
54
+
55
+ m = re.fullmatch(r"(19|20)\d{2}", s)
56
+ if m:
57
+ y = int(s)
58
+ try:
59
+ return dt.date(y, 6, 15).isoformat()
60
+ except ValueError:
61
+ return None
62
+
63
+ return None
64
+
65
+
66
+ def sf_connect_from_env_or_defaults():
67
  """
68
+ Tries env vars first (HF Space Secrets / .env), then hardcoded defaults.
69
+ Env vars: SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_DOMAIN
70
+ """
71
+ if not SF_AVAILABLE:
72
+ raise RuntimeError("simple-salesforce is not installed.")
73
+
74
+ username = os.getenv("SF_USERNAME", SF_DEFAULT_USERNAME)
75
+ password = os.getenv("SF_PASSWORD", SF_DEFAULT_PASSWORD)
76
+ token = os.getenv("SF_SECURITY_TOKEN", SF_DEFAULT_TOKEN)
77
+ domain = os.getenv("SF_DOMAIN", SF_DEFAULT_DOMAIN or "login")
78
+
79
+ # Basic validation: block if still placeholders
80
+ if any(v.startswith("REPLACE_ME") for v in [username, password, token]):
81
+ raise ValueError(
82
+ "Salesforce credentials missing. "
83
+ "Either set Space Secrets/ENV (SF_USERNAME, SF_PASSWORD, SF_SECURITY_TOKEN, SF_DOMAIN) "
84
+ "or replace the REPLACE_ME_* constants in app.py."
85
+ )
86
+
87
+ return Salesforce(username=username, password=password, security_token=token, domain=domain)
88
+
89
+
90
+ def sf_push_kyc_record(sf, ocr_results):
91
+ """
92
+ Create one KYC_Record__c combining Aadhaar + PAN.
93
+ Fields:
94
+ Aadhaar_Number__c, Aadhaar_Name__c, Aadhaar_DOB__c (Date)
95
+ PAN_Number__c, Pan_Name__c, Pan_DOB__c (Date)
96
+ """
97
+ a = ocr_results.get("aadhaar") or {}
98
+ p = ocr_results.get("pan") or {}
99
+
100
+ aadhaar_number = a.get("aadhaar_number") if (a.get("card_type") == "AADHAAR") else None
101
+ aadhaar_name = a.get("name") if (a.get("card_type") == "AADHAAR") else None
102
+ aadhaar_dob = _parse_birthdate(a.get("dob")) if (a.get("card_type") == "AADHAAR") else None
103
+
104
+ pan_number = p.get("pan_number") if (p.get("card_type") == "PAN") else None
105
+ pan_name = p.get("name") if (p.get("card_type") == "PAN") else None
106
+ pan_dob = _parse_birthdate(p.get("dob")) if (p.get("card_type") == "PAN") else None
107
+
108
+ payload = {
109
+ "Aadhaar_Number__c": aadhaar_number,
110
+ "Aadhaar_Name__c": aadhaar_name,
111
+ "Aadhaar_DOB__c": aadhaar_dob,
112
+ "PAN_Number__c": pan_number,
113
+ "Pan_Name__c": pan_name,
114
+ "Pan_DOB__c": pan_dob,
115
+ }
116
+ payload = {k: v for k, v in payload.items() if v is not None}
117
+
118
+ try:
119
+ result = sf.KYC_Record__c.create(payload)
120
+ return {"success": True, "id": result.get("id"), "payload": payload}
121
+ except Exception as e:
122
+ return {"success": False, "error": str(e), "payload": payload}
123
+
124
+
125
+ # ---------- gradio callback ----------
126
+ def process_documents(aadhaar_file, pan_file, push_to_sf):
127
+ """
128
+ - Runs OCR on Aadhaar and PAN separately.
129
+ - Optionally pushes a single KYC_Record__c to Salesforce.
130
  """
131
  results = {"aadhaar": None, "pan": None}
132
 
133
+ if not aadhaar_file and not pan_file:
134
+ return {"error": "Please upload at least one file (Aadhaar and/or PAN)."}
135
+
136
+ # OCR Aadhaar
137
  if aadhaar_file:
138
  try:
139
  res = extract_kyc_fields(aadhaar_file.name)
140
  res["source_file"] = os.path.basename(aadhaar_file.name)
141
  results["aadhaar"] = res
142
  except Exception as e:
143
+ results["aadhaar"] = {"error": f"Aadhaar OCR failed: {str(e)}", "card_type": "UNKNOWN"}
144
 
145
+ # OCR PAN
146
  if pan_file:
147
  try:
148
  res = extract_kyc_fields(pan_file.name)
149
  res["source_file"] = os.path.basename(pan_file.name)
150
  results["pan"] = res
151
  except Exception as e:
152
+ results["pan"] = {"error": f"PAN OCR failed: {str(e)}", "card_type": "UNKNOWN"}
153
 
154
+ output = {"ocr": results}
 
155
 
156
+ if push_to_sf:
157
+ try:
158
+ sf = sf_connect_from_env_or_defaults()
159
+ created = sf_push_kyc_record(sf, results)
160
+ output["salesforce"] = {"pushed": created.get("success", False), **created}
161
+ except Exception as e:
162
+ output["salesforce"] = {"pushed": False, "error": str(e)}
163
 
164
+ return output
165
 
166
+
167
+ # ---------- UI ----------
168
+ with gr.Blocks(title="Smart KYC OCR β†’ Salesforce (KYC_Record__c)") as demo:
169
  gr.Markdown(
170
  """
171
+ # 🧾 Smart KYC OCR β†’ Salesforce
172
+ Upload **Aadhaar** and **PAN** in separate boxes, then (optional) push a single **KYC_Record__c**.
 
173
 
174
+ - Preferred: configure `SF_USERNAME`, `SF_PASSWORD`, `SF_SECURITY_TOKEN`, `SF_DOMAIN` as environment variables
175
+ (Hugging Face **Space Secrets**).
176
+ - Emergency fallback: edit the `SF_DEFAULT_*` constants in `app.py` (NOT recommended).
177
  """
178
  )
179
 
 
189
  file_types=[".jpg", ".jpeg", ".png"]
190
  )
191
 
192
+ push_to_sf = gr.Checkbox(label="Push to Salesforce (create KYC_Record__c)", value=False)
193
+
194
  submit_btn = gr.Button("πŸ” Extract KYC Info", variant="primary")
195
+ output_json = gr.JSON(label="πŸ“‹ Output (OCR + Salesforce)")
196
 
197
  submit_btn.click(
198
  fn=process_documents,
199
+ inputs=[aadhaar_uploader, pan_uploader, push_to_sf],
200
  outputs=output_json,
201
  )
202
 
203
  gr.Markdown("---")
204
  gr.Markdown(
205
  """
206
+ πŸ”’ **Security note:** Avoid hardcoding secrets. Prefer Space Secrets / environment variables.
 
207
  """
208
  )
209