SMART_KYC_OCR / app.py
gopichandra's picture
Update app.py
edd207e verified
raw
history blame
853 Bytes
import gradio as gr
import pytesseract
from PIL import Image
import json
def extract_fields(image):
try:
raw_text = pytesseract.image_to_string(image)
lines = raw_text.split('\n')
result = {}
for line in lines:
line = line.strip()
if ':' in line:
key, value = line.split(':', 1)
result[key.strip()] = value.strip()
elif len(line.split()) >= 2:
result["line_" + str(len(result))] = line
return json.dumps(result, indent=2)
except Exception as e:
return {"error": str(e)}
gr.Interface(
fn=extract_fields,
inputs=gr.Image(type="pil"),
outputs="json",
title="Smart KYC OCR (Tesseract)",
description="Upload Aadhaar or PAN image to extract key-value KYC fields using Tesseract OCR."
).launch()