gopichandra commited on
Commit
7916a6e
·
verified ·
1 Parent(s): 79e8e57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -37
app.py CHANGED
@@ -1,51 +1,32 @@
1
  import gradio as gr
2
- from PIL import Image
3
  import pytesseract
4
- import re
 
5
 
6
- def extract_kyc_fields(image):
7
  try:
8
- text = pytesseract.image_to_string(image)
9
-
 
10
  result = {}
11
 
12
- # Aadhaar number pattern
13
- aadhaar_match = re.search(r'\d{4}[\s-]?\d{4}[\s-]?\d{4}', text)
14
- if aadhaar_match:
15
- aadhaar_number = aadhaar_match.group().replace(" ", "-")
16
- result["aadhaar_number"] = aadhaar_number
17
- else:
18
- result["aadhaar_number"] = "Not found"
19
-
20
- # Name (first all-alphabetic line with 2+ words)
21
- lines = text.split('\n')
22
  for line in lines:
23
- clean_line = line.strip()
24
- if clean_line and re.match(r'^[A-Za-z ]+$', clean_line) and len(clean_line.split()) >= 2:
25
- result["name"] = clean_line
26
- break
27
- if "name" not in result:
28
- result["name"] = "Not found"
29
 
30
- # DOB or Birth Date
31
- dob_match = re.search(r'(\d{2}[/-]\d{2}[/-]\d{4})|(\d{4}-\d{2}-\d{2})', text)
32
- if dob_match:
33
- result["dob"] = dob_match.group()
34
- else:
35
- result["dob"] = "Not found"
36
 
37
- return result
38
  except Exception as e:
39
  return {"error": str(e)}
40
 
41
- # Gradio UI
42
- iface = gr.Interface(
43
- fn=extract_kyc_fields,
44
  inputs=gr.Image(type="pil"),
45
  outputs="json",
46
- title="🧠 Smart KYC OCR Extractor",
47
- description="Upload Aadhaar/PAN image and extract KYC fields using Tesseract"
48
- )
49
-
50
- if __name__ == "__main__":
51
- iface.launch()
 
1
  import gradio as gr
 
2
  import pytesseract
3
+ from PIL import Image
4
+ import json
5
 
6
+ def extract_fields(image):
7
  try:
8
+ # Extract text using Tesseract
9
+ raw_text = pytesseract.image_to_string(image)
10
+ lines = raw_text.split('\n')
11
  result = {}
12
 
 
 
 
 
 
 
 
 
 
 
13
  for line in lines:
14
+ if ':' in line:
15
+ parts = line.split(':', 1)
16
+ key = parts[0].strip()
17
+ value = parts[1].strip()
18
+ result[key] = value
 
19
 
20
+ # Return as pretty JSON
21
+ return json.dumps(result, indent=2)
 
 
 
 
22
 
 
23
  except Exception as e:
24
  return {"error": str(e)}
25
 
26
+ gr.Interface(
27
+ fn=extract_fields,
 
28
  inputs=gr.Image(type="pil"),
29
  outputs="json",
30
+ title="Smart KYC OCR (Tesseract)",
31
+ description="Upload Aadhaar or PAN image to extract KYC fields as key-value pairs using Tesseract OCR."
32
+ ).launch()