rahul7star commited on
Commit
1c4e9d0
·
verified ·
1 Parent(s): bdb9f8c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image, ImageDraw
3
+ import requests
4
+ from io import BytesIO
5
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
6
+
7
+ # Load OCR model
8
+ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
9
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
10
+
11
+ def load_image(image_file, image_url):
12
+ """
13
+ Load image from file or URL.
14
+ """
15
+ if image_file:
16
+ return image_file
17
+ elif image_url:
18
+ response = requests.get(image_url)
19
+ return Image.open(BytesIO(response.content)).convert("RGB")
20
+ else:
21
+ return None
22
+
23
+ def detect_text(image_file, image_url):
24
+ """
25
+ Detect text in an image and return annotated image + text coordinates.
26
+ """
27
+ image = load_image(image_file, image_url)
28
+ if image is None:
29
+ return None, "No image provided."
30
+
31
+ # Use the OCR processor to get pixel-level data
32
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
33
+ generated_ids = model.generate(pixel_values)
34
+ text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
35
+
36
+ # For demonstration: bounding box around the full image (TroCR doesn't return coordinates)
37
+ # For proper coordinates use an OCR model like PaddleOCR or EasyOCR
38
+ draw = ImageDraw.Draw(image)
39
+ w, h = image.size
40
+ draw.rectangle([0, 0, w, h], outline="red", width=3)
41
+ coords_str = f"Full image bounding box: [0,0,{w},{h}]\nDetected text: {text}"
42
+
43
+ return image, coords_str
44
+
45
+ iface = gr.Interface(
46
+ fn=detect_text,
47
+ inputs=[
48
+ gr.Image(type="pil", label="Upload Image"),
49
+ gr.Textbox(label="Image URL (optional)")
50
+ ],
51
+ outputs=[
52
+ gr.Image(type="pil", label="Annotated Image"),
53
+ gr.Textbox(label="Detected Text & Coordinates")
54
+ ],
55
+ title="Text Detection from Image",
56
+ description="Upload an image or enter an image URL, and the app will detect text and show bounding boxes."
57
+ )
58
+
59
+ if __name__ == "__main__":
60
+ iface.launch()