Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PIL import Image, ImageDraw | |
import requests | |
from io import BytesIO | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
# Load OCR model | |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") | |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") | |
def load_image(image_file, image_url): | |
""" | |
Load image from file or URL. | |
""" | |
if image_file: | |
return image_file | |
elif image_url: | |
response = requests.get(image_url) | |
return Image.open(BytesIO(response.content)).convert("RGB") | |
else: | |
return None | |
def detect_text(image_file, image_url): | |
""" | |
Detect text in an image and return annotated image + text coordinates. | |
""" | |
image = load_image(image_file, image_url) | |
if image is None: | |
return None, "No image provided." | |
# Use the OCR processor to get pixel-level data | |
pixel_values = processor(images=image, return_tensors="pt").pixel_values | |
generated_ids = model.generate(pixel_values) | |
text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
# For demonstration: bounding box around the full image (TroCR doesn't return coordinates) | |
# For proper coordinates use an OCR model like PaddleOCR or EasyOCR | |
draw = ImageDraw.Draw(image) | |
w, h = image.size | |
draw.rectangle([0, 0, w, h], outline="red", width=3) | |
coords_str = f"Full image bounding box: [0,0,{w},{h}]\nDetected text: {text}" | |
return image, coords_str | |
iface = gr.Interface( | |
fn=detect_text, | |
inputs=[ | |
gr.Image(type="pil", label="Upload Image"), | |
gr.Textbox(label="Image URL (optional)") | |
], | |
outputs=[ | |
gr.Image(type="pil", label="Annotated Image"), | |
gr.Textbox(label="Detected Text & Coordinates") | |
], | |
title="Text Detection from Image", | |
description="Upload an image or enter an image URL, and the app will detect text and show bounding boxes." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |