Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pytesseract | |
from PIL import Image | |
import json | |
def extract_fields(image): | |
try: | |
raw_text = pytesseract.image_to_string(image) | |
lines = raw_text.split('\n') | |
result = {} | |
for line in lines: | |
line = line.strip() | |
if ':' in line: | |
key, value = line.split(':', 1) | |
result[key.strip()] = value.strip() | |
elif len(line.split()) >= 2: | |
result["line_" + str(len(result))] = line | |
return json.dumps(result, indent=2) | |
except Exception as e: | |
return {"error": str(e)} | |
gr.Interface( | |
fn=extract_fields, | |
inputs=gr.Image(type="pil"), | |
outputs="json", | |
title="Smart KYC OCR (Tesseract)", | |
description="Upload Aadhaar or PAN image to extract key-value KYC fields using Tesseract OCR." | |
).launch() | |