OCR / app.py
rahul7star's picture
Create app.py
1c4e9d0 verified
raw
history blame
2.02 kB
import gradio as gr
from PIL import Image, ImageDraw
import requests
from io import BytesIO
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# Load OCR model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
def load_image(image_file, image_url):
"""
Load image from file or URL.
"""
if image_file:
return image_file
elif image_url:
response = requests.get(image_url)
return Image.open(BytesIO(response.content)).convert("RGB")
else:
return None
def detect_text(image_file, image_url):
"""
Detect text in an image and return annotated image + text coordinates.
"""
image = load_image(image_file, image_url)
if image is None:
return None, "No image provided."
# Use the OCR processor to get pixel-level data
pixel_values = processor(images=image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# For demonstration: bounding box around the full image (TroCR doesn't return coordinates)
# For proper coordinates use an OCR model like PaddleOCR or EasyOCR
draw = ImageDraw.Draw(image)
w, h = image.size
draw.rectangle([0, 0, w, h], outline="red", width=3)
coords_str = f"Full image bounding box: [0,0,{w},{h}]\nDetected text: {text}"
return image, coords_str
iface = gr.Interface(
fn=detect_text,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Image URL (optional)")
],
outputs=[
gr.Image(type="pil", label="Annotated Image"),
gr.Textbox(label="Detected Text & Coordinates")
],
title="Text Detection from Image",
description="Upload an image or enter an image URL, and the app will detect text and show bounding boxes."
)
if __name__ == "__main__":
iface.launch()