rahul7star commited on
Commit
98c9504
·
verified ·
1 Parent(s): 3f3cb7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -1,11 +1,11 @@
1
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel
2
- import easyocr
3
- from PIL import Image, ImageDraw
4
- import numpy as np
5
  import gradio as gr
 
6
  import requests
7
  from io import BytesIO
 
8
  import json
 
 
9
 
10
  # TrOCR model for recognition
11
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
@@ -27,16 +27,14 @@ def detect_text_trocr_json(image_file, image_url):
27
  if image is None:
28
  return None, "No image provided.", None
29
 
30
- # Step 1: Detect bounding boxes with EasyOCR
31
  results = reader.readtext(np.array(image))
32
-
33
  draw = ImageDraw.Draw(image)
34
  words_json = []
35
- paragraph_json = []
36
 
37
  for bbox, _, conf in results:
38
- x_coords = [point[0] for point in bbox]
39
- y_coords = [point[1] for point in bbox]
 
40
  x_min, y_min = min(x_coords), min(y_coords)
41
  x_max, y_max = max(x_coords), max(y_coords)
42
 
@@ -54,14 +52,16 @@ def detect_text_trocr_json(image_file, image_url):
54
  "confidence": float(conf)
55
  })
56
 
57
- paragraph_json = words_json.copy()
 
58
 
59
  output_json = {
60
  "words": words_json,
61
- "paragraphs": paragraph_json
62
  }
63
 
64
- return image, json.dumps(output_json, indent=2), json.dumps(output_json)
 
65
 
66
  iface = gr.Interface(
67
  fn=detect_text_trocr_json,
 
 
 
 
 
1
  import gradio as gr
2
+ from PIL import Image, ImageDraw
3
  import requests
4
  from io import BytesIO
5
+ import numpy as np
6
  import json
7
+ import easyocr
8
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
9
 
10
  # TrOCR model for recognition
11
  processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 
27
  if image is None:
28
  return None, "No image provided.", None
29
 
 
30
  results = reader.readtext(np.array(image))
 
31
  draw = ImageDraw.Draw(image)
32
  words_json = []
 
33
 
34
  for bbox, _, conf in results:
35
+ # Convert coordinates to float
36
+ x_coords = [float(point[0]) for point in bbox]
37
+ y_coords = [float(point[1]) for point in bbox]
38
  x_min, y_min = min(x_coords), min(y_coords)
39
  x_max, y_max = max(x_coords), max(y_coords)
40
 
 
52
  "confidence": float(conf)
53
  })
54
 
55
+ # For simplicity, treat words as paragraphs
56
+ paragraphs_json = words_json.copy()
57
 
58
  output_json = {
59
  "words": words_json,
60
+ "paragraphs": paragraphs_json
61
  }
62
 
63
+ json_str = json.dumps(output_json, indent=2) # now serializable
64
+ return image, json_str, json_str
65
 
66
  iface = gr.Interface(
67
  fn=detect_text_trocr_json,