gopichandra commited on
Commit
edd207e
·
verified ·
1 Parent(s): 84821bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -5,21 +5,19 @@ import json
5
 
6
  def extract_fields(image):
7
  try:
8
- # Extract text using Tesseract
9
  raw_text = pytesseract.image_to_string(image)
10
  lines = raw_text.split('\n')
11
  result = {}
12
 
13
  for line in lines:
 
14
  if ':' in line:
15
- parts = line.split(':', 1)
16
- key = parts[0].strip()
17
- value = parts[1].strip()
18
- result[key] = value
19
 
20
- # Return as pretty JSON
21
  return json.dumps(result, indent=2)
22
-
23
  except Exception as e:
24
  return {"error": str(e)}
25
 
@@ -28,5 +26,5 @@ gr.Interface(
28
  inputs=gr.Image(type="pil"),
29
  outputs="json",
30
  title="Smart KYC OCR (Tesseract)",
31
- description="Upload Aadhaar or PAN image to extract KYC fields as key-value pairs using Tesseract OCR."
32
  ).launch()
 
5
 
6
  def extract_fields(image):
7
  try:
 
8
  raw_text = pytesseract.image_to_string(image)
9
  lines = raw_text.split('\n')
10
  result = {}
11
 
12
  for line in lines:
13
+ line = line.strip()
14
  if ':' in line:
15
+ key, value = line.split(':', 1)
16
+ result[key.strip()] = value.strip()
17
+ elif len(line.split()) >= 2:
18
+ result["line_" + str(len(result))] = line
19
 
 
20
  return json.dumps(result, indent=2)
 
21
  except Exception as e:
22
  return {"error": str(e)}
23
 
 
26
  inputs=gr.Image(type="pil"),
27
  outputs="json",
28
  title="Smart KYC OCR (Tesseract)",
29
+ description="Upload Aadhaar or PAN image to extract key-value KYC fields using Tesseract OCR."
30
  ).launch()