Spaces:

rahul7star
/

OCR

Sleeping

App Files Files Community

rahul7star commited on 23 days ago

Commit

1c4e9d0

verified ·

1 Parent(s): bdb9f8c

Create app.py

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import gradio as gr
+from PIL import Image, ImageDraw
+import requests
+from io import BytesIO
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+# Load OCR model
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+def load_image(image_file, image_url):
+    """
+    Load image from file or URL.
+    """
+    if image_file:
+        return image_file
+    elif image_url:
+        response = requests.get(image_url)
+        return Image.open(BytesIO(response.content)).convert("RGB")
+    else:
+        return None
+def detect_text(image_file, image_url):
+    """
+    Detect text in an image and return annotated image + text coordinates.
+    """
+    image = load_image(image_file, image_url)
+    if image is None:
+        return None, "No image provided."
+    # Use the OCR processor to get pixel-level data
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # For demonstration: bounding box around the full image (TroCR doesn't return coordinates)
+    # For proper coordinates use an OCR model like PaddleOCR or EasyOCR
+    draw = ImageDraw.Draw(image)
+    w, h = image.size
+    draw.rectangle([0, 0, w, h], outline="red", width=3)
+    coords_str = f"Full image bounding box: [0,0,{w},{h}]\nDetected text: {text}"
+    return image, coords_str
+iface = gr.Interface(
+    fn=detect_text,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Image URL (optional)")
+    ],
+    outputs=[
+        gr.Image(type="pil", label="Annotated Image"),
+        gr.Textbox(label="Detected Text & Coordinates")
+    ],
+    title="Text Detection from Image",
+    description="Upload an image or enter an image URL, and the app will detect text and show bounding boxes."
+)
+if __name__ == "__main__":
+    iface.launch()