Spaces:
Running
Running
import streamlit as st | |
from transformers import AutoProcessor, AutoModelForVision2Seq | |
from PIL import Image | |
import torch | |
import io | |
# Load model and processor once | |
def load_model(): | |
model_id = "HuggingFaceTB/SmolVLM2-2.2B-Instruct" | |
processor = AutoProcessor.from_pretrained(model_id) | |
model = AutoModelForVision2Seq.from_pretrained(model_id).to("cuda" if torch.cuda.is_available() else "cpu") | |
return processor, model | |
processor, model = load_model() | |
# Streamlit UI | |
st.title("Aadhaar Card Information Extractor") | |
uploaded_file = st.file_uploader("Upload Aadhaar card image", type=["jpg", "png", "jpeg"]) | |
if uploaded_file is not None: | |
image = Image.open(uploaded_file).convert("RGB") | |
st.image(image, caption="Uploaded Aadhaar Card", use_column_width=True) | |
if st.button("Extract Info"): | |
with st.spinner("Extracting..."): | |
prompt = ( | |
"You are an AI system for extracting information from Indian Aadhaar cards. " | |
"From the image, extract and return a structured JSON with:\n" | |
"- Name\n" | |
"- Father's Name\n" | |
"- Date of Birth\n" | |
"- Gender\n" | |
"- Aadhaar Number\n" | |
"- Address (Street, Locality, District, State, PIN)\n" | |
"- QR code data (if visible)\n" | |
"- Bounding box of photograph as [x1, y1, x2, y2]\n" | |
"Respond only with JSON." | |
) | |
inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device) | |
outputs = model.generate(**inputs, max_new_tokens=512) | |
result = processor.batch_decode(outputs, skip_special_tokens=True)[0] | |
st.code(result, language="json") | |