davanstrien HF Staff commited on
Commit
3be90a3
·
verified ·
1 Parent(s): e739816

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -79
app.py CHANGED
@@ -12,14 +12,11 @@ def get_alto_namespace(xml_file_path):
12
  try:
13
  tree = ET.parse(xml_file_path)
14
  root = tree.getroot()
15
- # The namespace is usually defined in the root <alto> tag
16
- # e.g., xmlns="http://www.loc.gov/standards/alto/v3/alto.xsd"
17
- # ET.ElementTree prepends this as {namespace_uri}tag
18
  if '}' in root.tag:
19
- return root.tag.split('}')[0] + '}' # e.g., {http://www.loc.gov/standards/alto/v3/alto.xsd}
20
  except ET.ParseError:
21
  print(f"Error parsing XML to find namespace: {xml_file_path}")
22
- return '' # Default to no namespace if not found or error
23
 
24
  def parse_alto_xml(xml_file_path):
25
  """
@@ -36,18 +33,15 @@ def parse_alto_xml(xml_file_path):
36
  return "Error: XML file not provided or does not exist.", []
37
 
38
  try:
39
- # Dynamically determine the namespace
40
  ns_prefix = get_alto_namespace(xml_file_path)
41
-
42
  tree = ET.parse(xml_file_path)
43
  root = tree.getroot()
44
 
45
- # Find all TextLine elements
46
  for text_line in root.findall(f'.//{ns_prefix}TextLine'):
47
  line_text_parts = []
48
  for string_element in text_line.findall(f'{ns_prefix}String'):
49
  text = string_element.get('CONTENT')
50
- if text: # Ensure text is not None
51
  line_text_parts.append(text)
52
  try:
53
  hpos = int(float(string_element.get('HPOS')))
@@ -63,9 +57,8 @@ def parse_alto_xml(xml_file_path):
63
  })
64
  except (ValueError, TypeError) as e:
65
  print(f"Warning: Could not parse coordinates for '{text}': {e}")
66
- # Add with default/placeholder values if needed, or skip
67
  ocr_data.append({
68
- 'text': text, 'x': 0, 'y': 0, 'w': 10, 'h': 10 # Placeholder
69
  })
70
  if line_text_parts:
71
  full_text_lines.append(" ".join(line_text_parts))
@@ -81,77 +74,61 @@ def parse_alto_xml(xml_file_path):
81
  def draw_ocr_on_image(image_pil, ocr_data):
82
  """
83
  Draws bounding boxes and text from ocr_data onto the image.
84
- Args:
85
- image_pil (PIL.Image): The image to draw on.
86
- ocr_data (list): List of OCR data dictionaries.
87
- Returns:
88
- PIL.Image: Image with overlays.
89
  """
90
  if not image_pil or not ocr_data:
91
- return image_pil # Return original image if no data or image
92
 
93
  draw = ImageDraw.Draw(image_pil)
94
 
95
- # Try to load a font, fallback to default if not found
96
  try:
97
- # Adjust font size based on average box height or fixed small size
98
- avg_height = sum(d['h'] for d in ocr_data if d['h'] > 0) / len(ocr_data) if ocr_data else 10
99
- font_size = max(8, int(avg_height * 0.6)) # Heuristic for font size
100
  font = ImageFont.truetype("arial.ttf", font_size)
101
- except IOError:
102
  font = ImageFont.load_default()
103
- font_size = 10 # Default font is usually small
104
- print("Arial font not found, using default font.")
105
 
106
  for item in ocr_data:
107
  x, y, w, h = item['x'], item['y'], item['w'], item['h']
108
  text = item['text']
109
-
110
- # Draw bounding box
111
  draw.rectangle([(x, y), (x + w, y + h)], outline="red", width=2)
112
-
113
- # Draw text (slightly offset for better visibility, handle multi-line if necessary)
114
- # Simple text drawing; for complex layouts, more sophisticated placement is needed
115
- text_position = (x + 2, y - font_size - 2 if y - font_size - 2 > 0 else y + 2) # Above or below
116
-
117
- # Optional: Draw a small background for text for better readability
118
- # text_bbox = draw.textbbox(text_position, text, font=font)
119
- # draw.rectangle(text_bbox, fill="rgba(255,255,255,0.7)")
120
-
121
  draw.text(text_position, text, fill="green", font=font)
122
 
123
  return image_pil
124
 
125
  # --- Gradio Interface Function ---
126
 
127
- def process_image_and_xml(image_file, xml_file, show_overlay):
128
  """
129
  Main function for the Gradio interface.
 
130
  """
131
- if image_file is None:
132
  return None, "Please upload an image.", None
133
- if xml_file is None:
134
- return Image.open(image_file.name), "Please upload an OCR XML file.", None
 
 
 
 
 
135
 
136
  try:
137
- # Load the image
138
- img_pil = Image.open(image_file.name).convert("RGB") # Ensure RGB for drawing
139
  except Exception as e:
140
  return None, f"Error loading image: {e}", None
141
 
142
- # Parse XML
143
- extracted_text, ocr_box_data = parse_alto_xml(xml_file.name)
144
 
145
  overlay_image_pil = None
146
  if show_overlay and ocr_box_data:
147
- # Create a copy for drawing to keep the original clean for the first output
148
  img_for_overlay = img_pil.copy()
149
  overlay_image_pil = draw_ocr_on_image(img_for_overlay, ocr_box_data)
150
- elif show_overlay and not ocr_box_data and not extracted_text.startswith("Error"):
151
- # If overlay is checked but no bounding boxes (e.g. empty XML or parsing issue not caught as error string)
152
  extracted_text += "\n(No bounding box data found or parsed for overlay)"
153
 
154
-
155
  return img_pil, extracted_text, overlay_image_pil
156
 
157
 
@@ -167,8 +144,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
167
 
168
  with gr.Row():
169
  with gr.Column(scale=1):
170
- image_input = gr.File(label="Upload Image (PNG, JPG, etc.)", type="file") # Using type="file" for path
171
- xml_input = gr.File(label="Upload ALTO XML File (.xml)", type="file")
 
172
  show_overlay_checkbox = gr.Checkbox(label="Show OCR Overlay on Image", value=False)
173
  submit_button = gr.Button("Process Files", variant="primary")
174
 
@@ -178,25 +156,22 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
178
  with gr.Column(scale=1):
179
  output_text = gr.Textbox(label="Extracted Plain Text", lines=15, interactive=False)
180
 
181
- output_image_overlay = gr.Image(label="Image with OCR Overlay", type="pil", interactive=False, visible=True) # Always visible, content changes
182
-
183
- def update_interface(image_f, xml_f, show_overlay_val):
184
- if image_f is None or xml_f is None:
185
- # Handle cases where one or both files are not yet uploaded
186
- img_to_show = Image.open(image_f.name).convert("RGB") if image_f else None
187
- text_to_show = "Please upload both an image and an XML file."
188
- overlay_to_show = None
189
- if image_f is None:
190
- text_to_show = "Please upload an image file."
191
- elif xml_f is None:
192
- text_to_show = "Please upload an XML file."
193
-
194
- return img_to_show, text_to_show, overlay_to_show
195
 
196
- img, text, overlay_img = process_image_and_xml(image_f, xml_f, show_overlay_val)
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
- # If "show overlay" is not checked, overlay_img will be None from process_image_and_xml
199
- # The gr.Image component will handle None by showing nothing or a placeholder.
200
  return img, text, overlay_img
201
 
202
  submit_button.click(
@@ -205,7 +180,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
205
  outputs=[output_image_orig, output_text, output_image_overlay]
206
  )
207
 
208
- # Also update if the checkbox changes, provided files are already there
209
  show_overlay_checkbox.change(
210
  fn=update_interface,
211
  inputs=[image_input, xml_input, show_overlay_checkbox],
@@ -243,22 +217,21 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
243
 
244
 
245
  if __name__ == "__main__":
246
- # To test, create a dummy image and use the XML from your example.
247
- # Save the XML content you provided as "189819724.34.xml" in the same directory.
248
- # Create a dummy image, e.g., "dummy_image.png"
249
  try:
250
- from PIL import Image as PImage
251
- img = PImage.new('RGB', (2394, 3612), color = 'lightgray') # Dimensions from example XML
252
- # Optionally add some text to image if you want to see if boxes align (roughly)
253
- # d = ImageDraw.Draw(img)
254
- # d.text((500,200), "Test Image", fill=(0,0,0))
255
  img.save("dummy_image.png")
256
  print("Created dummy_image.png for testing.")
257
 
258
- # Ensure the example XML file (189819724.34.xml) exists in the same directory
259
- # or provide the correct path if it's elsewhere.
260
- if not os.path.exists("189819724.34.xml"):
261
- print("WARNING: Example XML '189819724.34.xml' not found. Please create it or upload your own.")
 
 
 
 
 
262
 
263
  except ImportError:
264
  print("Pillow not installed, can't create dummy image.")
 
12
  try:
13
  tree = ET.parse(xml_file_path)
14
  root = tree.getroot()
 
 
 
15
  if '}' in root.tag:
16
+ return root.tag.split('}')[0] + '}'
17
  except ET.ParseError:
18
  print(f"Error parsing XML to find namespace: {xml_file_path}")
19
+ return ''
20
 
21
  def parse_alto_xml(xml_file_path):
22
  """
 
33
  return "Error: XML file not provided or does not exist.", []
34
 
35
  try:
 
36
  ns_prefix = get_alto_namespace(xml_file_path)
 
37
  tree = ET.parse(xml_file_path)
38
  root = tree.getroot()
39
 
 
40
  for text_line in root.findall(f'.//{ns_prefix}TextLine'):
41
  line_text_parts = []
42
  for string_element in text_line.findall(f'{ns_prefix}String'):
43
  text = string_element.get('CONTENT')
44
+ if text:
45
  line_text_parts.append(text)
46
  try:
47
  hpos = int(float(string_element.get('HPOS')))
 
57
  })
58
  except (ValueError, TypeError) as e:
59
  print(f"Warning: Could not parse coordinates for '{text}': {e}")
 
60
  ocr_data.append({
61
+ 'text': text, 'x': 0, 'y': 0, 'w': 10, 'h': 10
62
  })
63
  if line_text_parts:
64
  full_text_lines.append(" ".join(line_text_parts))
 
74
  def draw_ocr_on_image(image_pil, ocr_data):
75
  """
76
  Draws bounding boxes and text from ocr_data onto the image.
 
 
 
 
 
77
  """
78
  if not image_pil or not ocr_data:
79
+ return image_pil
80
 
81
  draw = ImageDraw.Draw(image_pil)
82
 
 
83
  try:
84
+ avg_height = sum(d['h'] for d in ocr_data if d['h'] > 0) / len(ocr_data) if ocr_data and any(d['h'] > 0 for d in ocr_data) else 10
85
+ font_size = max(8, int(avg_height * 0.6))
 
86
  font = ImageFont.truetype("arial.ttf", font_size)
87
+ except (IOError, ZeroDivisionError): # Added ZeroDivisionError for safety
88
  font = ImageFont.load_default()
89
+ font_size = 10
90
+ print("Arial font not found or issue with height calculation, using default font.")
91
 
92
  for item in ocr_data:
93
  x, y, w, h = item['x'], item['y'], item['w'], item['h']
94
  text = item['text']
 
 
95
  draw.rectangle([(x, y), (x + w, y + h)], outline="red", width=2)
96
+ text_position = (x + 2, y - font_size - 2 if y - font_size - 2 > 0 else y + 2)
 
 
 
 
 
 
 
 
97
  draw.text(text_position, text, fill="green", font=font)
98
 
99
  return image_pil
100
 
101
  # --- Gradio Interface Function ---
102
 
103
+ def process_image_and_xml(image_path, xml_path, show_overlay):
104
  """
105
  Main function for the Gradio interface.
106
+ image_path and xml_path are now file paths (strings).
107
  """
108
+ if image_path is None:
109
  return None, "Please upload an image.", None
110
+ if xml_path is None:
111
+ # If image_path is not None, we can still show the image
112
+ try:
113
+ img_pil_orig = Image.open(image_path).convert("RGB")
114
+ except Exception as e:
115
+ return None, f"Error loading image: {e}. Also, please upload an OCR XML file.", None
116
+ return img_pil_orig, "Please upload an OCR XML file.", None
117
 
118
  try:
119
+ img_pil = Image.open(image_path).convert("RGB")
 
120
  except Exception as e:
121
  return None, f"Error loading image: {e}", None
122
 
123
+ extracted_text, ocr_box_data = parse_alto_xml(xml_path)
 
124
 
125
  overlay_image_pil = None
126
  if show_overlay and ocr_box_data:
 
127
  img_for_overlay = img_pil.copy()
128
  overlay_image_pil = draw_ocr_on_image(img_for_overlay, ocr_box_data)
129
+ elif show_overlay and not ocr_box_data and not (isinstance(extracted_text, str) and extracted_text.startswith("Error")):
 
130
  extracted_text += "\n(No bounding box data found or parsed for overlay)"
131
 
 
132
  return img_pil, extracted_text, overlay_image_pil
133
 
134
 
 
144
 
145
  with gr.Row():
146
  with gr.Column(scale=1):
147
+ # Corrected: type="filepath"
148
+ image_input = gr.File(label="Upload Image (PNG, JPG, etc.)", type="filepath")
149
+ xml_input = gr.File(label="Upload ALTO XML File (.xml)", type="filepath")
150
  show_overlay_checkbox = gr.Checkbox(label="Show OCR Overlay on Image", value=False)
151
  submit_button = gr.Button("Process Files", variant="primary")
152
 
 
156
  with gr.Column(scale=1):
157
  output_text = gr.Textbox(label="Extracted Plain Text", lines=15, interactive=False)
158
 
159
+ output_image_overlay = gr.Image(label="Image with OCR Overlay", type="pil", interactive=False, visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ def update_interface(image_filepath, xml_filepath, show_overlay_val):
162
+ # image_filepath and xml_filepath are now strings (paths) or None
163
+
164
+ # Initial check for None inputs to provide guidance
165
+ if image_filepath is None and xml_filepath is None:
166
+ return None, "Please upload an image and an XML file.", None
167
+ if image_filepath is None:
168
+ # xml_filepath might be present, but we need the image first
169
+ return None, "Please upload an image file.", None
170
+ # xml_filepath can be None here, process_image_and_xml handles it
171
+
172
+ # Call the main processing function
173
+ img, text, overlay_img = process_image_and_xml(image_filepath, xml_filepath, show_overlay_val)
174
 
 
 
175
  return img, text, overlay_img
176
 
177
  submit_button.click(
 
180
  outputs=[output_image_orig, output_text, output_image_overlay]
181
  )
182
 
 
183
  show_overlay_checkbox.change(
184
  fn=update_interface,
185
  inputs=[image_input, xml_input, show_overlay_checkbox],
 
217
 
218
 
219
  if __name__ == "__main__":
 
 
 
220
  try:
221
+ # from PIL import Image as PImage # Already imported as Image
222
+ img = Image.new('RGB', (2394, 3612), color = 'lightgray')
 
 
 
223
  img.save("dummy_image.png")
224
  print("Created dummy_image.png for testing.")
225
 
226
+ # Make sure your example XML is named 189819724.34.xml and is in the same directory
227
+ # Or, create it if it doesn't exist with the content you provided
228
+ example_xml_filename = "189819724.34.xml"
229
+ if not os.path.exists(example_xml_filename):
230
+ print(f"WARNING: Example XML '{example_xml_filename}' not found. Please create it or upload your own.")
231
+ # You could also write the example XML content here if needed for testing
232
+ # with open(example_xml_filename, "w") as f:
233
+ # f.write("""<?xml version="1.0" encoding="UTF-8"?> ...your full XML... </alto>""")
234
+
235
 
236
  except ImportError:
237
  print("Pillow not installed, can't create dummy image.")