Gabriel commited on
Commit
bb2fbb1
·
verified ·
1 Parent(s): 2c99aea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +395 -160
app.py CHANGED
@@ -2,10 +2,13 @@ import gradio as gr
2
  import json
3
  import tempfile
4
  import os
5
- from typing import List, Optional, Literal, Tuple, Union
 
 
6
  from PIL import Image
7
  import requests
8
  from io import BytesIO
 
9
 
10
  import spaces
11
  from pathlib import Path
@@ -144,13 +147,14 @@ PIPELINE_CONFIGS = {
144
  }
145
 
146
 
147
- def handle_image_input(image_path: Union[str, None], progress: gr.Progress = None) -> str:
148
  """
149
  Handle image input from various sources (local file, URL, or uploaded file).
150
 
151
  Args:
152
  image_path: Path to image file or URL
153
  progress: Progress tracker for UI updates
 
154
 
155
  Returns:
156
  Local file path to the image
@@ -159,13 +163,13 @@ def handle_image_input(image_path: Union[str, None], progress: gr.Progress = Non
159
  raise ValueError("No image provided. Please upload an image or provide a URL.")
160
 
161
  if progress:
162
- progress(0.1, desc="Processing image input...")
163
 
164
  # If it's a URL, download the image
165
  if isinstance(image_path, str) and (image_path.startswith("http://") or image_path.startswith("https://")):
166
  try:
167
  if progress:
168
- progress(0.2, desc="Downloading image from URL...")
169
  response = requests.get(image_path, timeout=30)
170
  response.raise_for_status()
171
 
@@ -192,18 +196,49 @@ def handle_image_input(image_path: Union[str, None], progress: gr.Progress = Non
192
  return image_path
193
 
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  @spaces.GPU
196
- def _process_htr_pipeline(
197
- image_path: str,
198
  document_type: FormatChoices,
199
  custom_settings: Optional[str] = None,
200
  progress: gr.Progress = None
201
- ) -> Collection:
202
- """Process HTR pipeline and return the processed collection."""
 
 
 
 
 
203
 
204
- # Handle image input
205
- image_path = handle_image_input(image_path, progress)
206
-
207
  if custom_settings:
208
  try:
209
  config = json.loads(custom_settings)
@@ -211,79 +246,120 @@ def _process_htr_pipeline(
211
  raise ValueError("Invalid JSON in custom_settings parameter. Please check your JSON syntax.")
212
  else:
213
  config = PIPELINE_CONFIGS[document_type]
214
-
215
- if progress:
216
- progress(0.3, desc="Initializing HTR pipeline...")
217
 
218
- collection = Collection([image_path])
219
  pipeline = Pipeline.from_config(config)
220
-
221
- try:
222
- # Track pipeline steps
223
- total_steps = len(config.get("steps", []))
224
-
225
- if progress:
226
- progress(0.4, desc=f"Running HTR pipeline with {total_steps} steps...")
227
-
228
- # Run the pipeline (we could add more granular progress here if the pipeline supports it)
229
- processed_collection = pipeline.run(collection)
230
-
231
- if progress:
232
- progress(0.9, desc="Pipeline complete, preparing results...")
233
-
234
- return processed_collection
235
- except Exception as pipeline_error:
236
- raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}")
237
- finally:
238
- # Clean up temporary file if it was downloaded
239
- if image_path and image_path.startswith(tempfile.gettempdir()):
240
- try:
241
- os.unlink(image_path)
242
- except:
243
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
 
246
- def htr_text(
247
- image_path: str,
248
  document_type: FormatChoices = "letter_swedish",
249
  custom_settings: Optional[str] = None,
 
250
  progress: gr.Progress = gr.Progress()
251
  ) -> str:
252
  """
253
- Extract text from handwritten documents using HTR (Handwritten Text Recognition).
254
 
255
- This tool processes historical handwritten documents and extracts the text content.
256
- Supports various document layouts including letters and book spreads in English and Swedish.
257
 
258
  Args:
259
- image_path: Path to the document image file or URL to download from
260
- document_type: Type of document layout - choose based on your document's structure and language
261
  custom_settings: Optional JSON configuration for advanced pipeline customization
 
262
 
263
  Returns:
264
- Extracted text from the handwritten document
265
  """
266
  try:
267
- progress(0, desc="Starting HTR text extraction...")
 
 
 
 
 
 
268
 
269
- processed_collection = _process_htr_pipeline(
270
- image_path, document_type, custom_settings, progress
 
 
 
271
  )
272
 
273
- progress(0.95, desc="Extracting text from results...")
274
- extracted_text = extract_text_from_collection(processed_collection)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- progress(1.0, desc="Text extraction complete!")
277
- return extracted_text
278
-
279
  except ValueError as e:
280
  return f"Input error: {str(e)}"
281
  except Exception as e:
282
- return f"HTR text extraction failed: {str(e)}"
283
 
284
 
285
- def htrflow_file(
286
- image_path: str,
287
  document_type: FormatChoices = "letter_swedish",
288
  output_format: FileChoices = DEFAULT_OUTPUT,
289
  custom_settings: Optional[str] = None,
@@ -291,120 +367,190 @@ def htrflow_file(
291
  progress: gr.Progress = gr.Progress()
292
  ) -> str:
293
  """
294
- Process handwritten document and generate a formatted output file.
295
 
296
- This tool performs HTR on a document and exports the results in various formats
297
- suitable for digital archiving, further processing, or integration with other systems.
298
 
299
  Args:
300
- image_path: Path to the document image file or URL to download from
301
  document_type: Type of document layout - affects segmentation and reading order
302
  output_format: Desired output format (txt for plain text, alto/page for XML with coordinates, json for structured data)
303
  custom_settings: Optional JSON configuration for advanced pipeline customization
304
  server_name: Base URL of the server (used for generating download links)
305
 
306
  Returns:
307
- Path to the generated file for download
308
  """
309
  try:
310
- progress(0, desc="Starting HTR file processing...")
311
 
312
- original_filename = Path(image_path).stem if image_path else "output"
313
-
314
- processed_collection = _process_htr_pipeline(
315
- image_path, document_type, custom_settings, progress
 
 
 
 
 
 
 
 
 
 
316
  )
317
-
318
- progress(0.92, desc=f"Generating {output_format.upper()} file...")
319
 
 
 
 
320
  temp_dir = Path(tempfile.mkdtemp())
321
- export_dir = temp_dir / output_format
322
- processed_collection.save(directory=str(export_dir), serializer=output_format)
323
-
324
- output_file_path = None
325
- for root, _, files in os.walk(export_dir):
326
- for file in files:
327
- old_path = os.path.join(root, file)
328
- file_ext = Path(file).suffix
329
- new_filename = (
330
- f"{original_filename}.{output_format}"
331
- if not file_ext
332
- else f"{original_filename}{file_ext}"
333
- )
334
- new_path = os.path.join(root, new_filename)
335
- os.rename(old_path, new_path)
336
- output_file_path = new_path
337
- break
338
-
339
- progress(1.0, desc="File generation complete!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
- if output_file_path and os.path.exists(output_file_path):
342
- return output_file_path
343
- else:
344
- return None
345
-
346
  except ValueError as e:
347
- # Create an error file with the error message
348
  error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
349
- error_file.write(f"Error: {str(e)}")
350
  error_file.close()
351
  return error_file.name
352
  except Exception as e:
353
- # Create an error file with the error message
354
  error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
355
- error_file.write(f"HTR file generation failed: {str(e)}")
356
  error_file.close()
357
  return error_file.name
358
 
359
 
360
- def htrflow_visualizer_with_progress(
361
- image_path: str,
362
- htr_document_path: str,
363
  server_name: str = "https://gabriel-htrflow-mcp.hf.space",
364
  progress: gr.Progress = gr.Progress()
365
  ) -> str:
366
  """
367
- Create a visualization of HTR results overlaid on the original document.
368
 
369
- This tool generates an annotated image showing detected text regions, reading order,
370
- and recognized text overlaid on the original document image. Useful for quality control
371
- and understanding the HTR process.
372
 
373
  Args:
374
- image_path: Path to the original document image file or URL
375
- htr_document_path: Path to the HTR output file (ALTO or PAGE XML format)
376
  server_name: Base URL of the server (used for generating download links)
377
 
378
  Returns:
379
- Path to the generated visualization image for download
380
  """
381
  try:
382
- progress(0, desc="Starting visualization generation...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
- # Handle image input
385
- image_path = handle_image_input(image_path, progress)
 
 
 
 
386
 
387
- progress(0.5, desc="Creating visualization...")
388
 
389
- # Call the original visualizer function
390
- result = htrflow_visualizer(image_path, htr_document_path, server_name)
 
 
 
391
 
392
- progress(1.0, desc="Visualization complete!")
 
 
393
 
394
- return result
395
  except Exception as e:
396
- # Create an error file
397
  error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
398
- error_file.write(f"Visualization failed: {str(e)}")
399
  error_file.close()
400
  return error_file.name
401
- finally:
402
- # Clean up temporary file if it was downloaded
403
- if image_path and image_path.startswith(tempfile.gettempdir()):
404
- try:
405
- os.unlink(image_path)
406
- except:
407
- pass
408
 
409
 
410
  def extract_text_from_collection(collection: Collection) -> str:
@@ -418,16 +564,20 @@ def extract_text_from_collection(collection: Collection) -> str:
418
 
419
 
420
  def create_htrflow_mcp_server():
421
- # HTR Text extraction interface with improved API description
422
- htr_text_interface = gr.Interface(
423
- fn=htr_text,
424
  inputs=[
425
- gr.Image(type="filepath", label="Upload Image or Enter URL"),
 
 
 
 
426
  gr.Dropdown(
427
  choices=FORMAT_CHOICES,
428
  value="letter_swedish",
429
  label="Document Type",
430
- info="Select the type that best matches your document's layout and language"
431
  ),
432
  gr.Textbox(
433
  label="Custom Settings (JSON)",
@@ -435,24 +585,34 @@ def create_htrflow_mcp_server():
435
  value="",
436
  lines=3
437
  ),
 
 
 
 
 
 
438
  ],
439
- outputs=[gr.Textbox(label="Extracted Text", lines=15)],
440
- title="Extract Text from Handwritten Documents",
441
- description="Upload a handwritten document image to extract text using AI-powered HTR",
442
- api_name="htr_text",
443
- api_description="Extract text from handwritten historical documents using advanced HTR models. Supports letters and book spreads in English and Swedish.",
444
  )
445
 
446
- # HTR File generation interface
447
- htrflow_file_interface = gr.Interface(
448
- fn=htrflow_file,
449
  inputs=[
450
- gr.Image(type="filepath", label="Upload Image or Enter URL"),
 
 
 
 
451
  gr.Dropdown(
452
  choices=FORMAT_CHOICES,
453
  value="letter_swedish",
454
  label="Document Type",
455
- info="Select the type that best matches your document's layout and language"
456
  ),
457
  gr.Dropdown(
458
  choices=FILE_CHOICES,
@@ -473,19 +633,27 @@ def create_htrflow_mcp_server():
473
  visible=False # Hide this from UI but keep for API
474
  ),
475
  ],
476
- outputs=[gr.File(label="Download HTR Output File")],
477
- title="Generate HTR Output Files",
478
- description="Process handwritten documents and export in various formats (XML, JSON, TXT)",
479
- api_name="htrflow_file",
480
- api_description="Process handwritten documents and generate formatted output files. Outputs can be in ALTO XML (with text coordinates), PAGE XML, JSON (structured data), or plain text format.",
481
  )
482
 
483
- # HTR Visualization interface
484
- htrflow_viz = gr.Interface(
485
- fn=htrflow_visualizer_with_progress,
486
  inputs=[
487
- gr.Image(type="filepath", label="Upload Original Image"),
488
- gr.File(label="Upload ALTO/PAGE XML File", file_types=[".xml"]),
 
 
 
 
 
 
 
 
489
  gr.Textbox(
490
  label="Server Name",
491
  value="https://gabriel-htrflow-mcp.hf.space",
@@ -493,30 +661,97 @@ def create_htrflow_mcp_server():
493
  visible=False # Hide this from UI but keep for API
494
  ),
495
  ],
496
- outputs=gr.File(label="Download Visualization Image"),
497
- title="Visualize HTR Results",
498
- description="Create an annotated image showing detected text regions and recognized text",
499
- api_name="htrflow_visualizer",
500
- api_description="Generate a visualization image showing HTR results overlaid on the original document. Shows detected text regions, reading order, and recognized text for quality control.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  )
502
 
503
  # Create tabbed interface with better organization
504
  demo = gr.TabbedInterface(
505
- [htr_text_interface, htrflow_file_interface, htrflow_viz],
506
- ["Extract Text", "Generate Files", "Visualize Results"],
507
- title="🖋️ HTRflow - Handwritten Text Recognition",
 
 
 
 
 
 
 
 
 
 
 
 
508
  analytics_enabled=False,
509
  )
510
 
511
  return demo
512
 
513
-
514
  if __name__ == "__main__":
515
  demo = create_htrflow_mcp_server()
516
  demo.launch(
517
  mcp_server=True,
518
  share=False,
519
  debug=False,
520
- show_api=True, # Ensure API is visible
521
  favicon_path=None,
522
  )
 
2
  import json
3
  import tempfile
4
  import os
5
+ import zipfile
6
+ import shutil
7
+ from typing import List, Optional, Literal, Tuple, Union, Dict
8
  from PIL import Image
9
  import requests
10
  from io import BytesIO
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
 
13
  import spaces
14
  from pathlib import Path
 
147
  }
148
 
149
 
150
+ def handle_image_input(image_path: Union[str, None], progress: gr.Progress = None, desc_prefix: str = "") -> str:
151
  """
152
  Handle image input from various sources (local file, URL, or uploaded file).
153
 
154
  Args:
155
  image_path: Path to image file or URL
156
  progress: Progress tracker for UI updates
157
+ desc_prefix: Prefix for progress descriptions
158
 
159
  Returns:
160
  Local file path to the image
 
163
  raise ValueError("No image provided. Please upload an image or provide a URL.")
164
 
165
  if progress:
166
+ progress(0.1, desc=f"{desc_prefix}Processing image input...")
167
 
168
  # If it's a URL, download the image
169
  if isinstance(image_path, str) and (image_path.startswith("http://") or image_path.startswith("https://")):
170
  try:
171
  if progress:
172
+ progress(0.2, desc=f"{desc_prefix}Downloading image from URL...")
173
  response = requests.get(image_path, timeout=30)
174
  response.raise_for_status()
175
 
 
196
  return image_path
197
 
198
 
199
+ def parse_image_input(image_input: Union[str, List[str], None]) -> List[str]:
200
+ """
201
+ Parse image input which can be a single path, multiple paths, or URLs separated by newlines.
202
+
203
+ Args:
204
+ image_input: Single image path, list of paths, or newline-separated URLs/paths
205
+
206
+ Returns:
207
+ List of image paths/URLs
208
+ """
209
+ if not image_input:
210
+ return []
211
+
212
+ if isinstance(image_input, list):
213
+ return image_input
214
+
215
+ if isinstance(image_input, str):
216
+ # Check if it's multiple URLs/paths separated by newlines
217
+ lines = image_input.strip().split('\n')
218
+ paths = []
219
+ for line in lines:
220
+ line = line.strip()
221
+ if line: # Skip empty lines
222
+ paths.append(line)
223
+ return paths if paths else [image_input]
224
+
225
+ return []
226
+
227
+
228
  @spaces.GPU
229
+ def _process_htr_pipeline_batch(
230
+ image_paths: List[str],
231
  document_type: FormatChoices,
232
  custom_settings: Optional[str] = None,
233
  progress: gr.Progress = None
234
+ ) -> Dict[str, Collection]:
235
+ """Process HTR pipeline for multiple images and return processed collections."""
236
+
237
+ results = {}
238
+ temp_files = []
239
+
240
+ total_images = len(image_paths)
241
 
 
 
 
242
  if custom_settings:
243
  try:
244
  config = json.loads(custom_settings)
 
246
  raise ValueError("Invalid JSON in custom_settings parameter. Please check your JSON syntax.")
247
  else:
248
  config = PIPELINE_CONFIGS[document_type]
 
 
 
249
 
250
+ # Initialize pipeline once for all images
251
  pipeline = Pipeline.from_config(config)
252
+
253
+ for idx, image_path in enumerate(image_paths):
254
+ try:
255
+ image_name = Path(image_path).stem if not image_path.startswith("http") else f"image_{idx+1}"
256
+
257
+ if progress:
258
+ progress((idx + 0.2) / total_images,
259
+ desc=f"Processing image {idx+1}/{total_images}: {image_name}")
260
+
261
+ # Handle image input
262
+ processed_path = handle_image_input(image_path, progress,
263
+ desc_prefix=f"[{idx+1}/{total_images}] ")
264
+
265
+ # Track temp files for cleanup
266
+ if processed_path.startswith(tempfile.gettempdir()):
267
+ temp_files.append(processed_path)
268
+
269
+ if progress:
270
+ progress((idx + 0.5) / total_images,
271
+ desc=f"Running HTR on image {idx+1}/{total_images}: {image_name}")
272
+
273
+ # Process with pipeline
274
+ collection = Collection([processed_path])
275
+ processed_collection = pipeline.run(collection)
276
+
277
+ results[image_name] = processed_collection
278
+
279
+ if progress:
280
+ progress((idx + 0.9) / total_images,
281
+ desc=f"Completed image {idx+1}/{total_images}: {image_name}")
282
+
283
+ except Exception as e:
284
+ results[image_name] = f"Error: {str(e)}"
285
+ print(f"Error processing {image_path}: {str(e)}")
286
+
287
+ # Cleanup temp files
288
+ for temp_file in temp_files:
289
+ try:
290
+ os.unlink(temp_file)
291
+ except:
292
+ pass
293
+
294
+ if progress:
295
+ progress(1.0, desc=f"Completed processing {total_images} images!")
296
+
297
+ return results
298
 
299
 
300
+ def htr_text_batch(
301
+ image_input: Union[str, List[str]],
302
  document_type: FormatChoices = "letter_swedish",
303
  custom_settings: Optional[str] = None,
304
+ return_format: str = "separate", # "separate" or "combined"
305
  progress: gr.Progress = gr.Progress()
306
  ) -> str:
307
  """
308
+ Extract text from multiple handwritten documents using HTR.
309
 
310
+ This tool processes multiple historical handwritten documents and extracts text content from each.
311
+ You can provide multiple image paths/URLs separated by newlines, or upload multiple files.
312
 
313
  Args:
314
+ image_input: Single image path/URL, multiple paths/URLs (newline-separated), or list of uploaded files
315
+ document_type: Type of document layout - choose based on your documents' structure and language
316
  custom_settings: Optional JSON configuration for advanced pipeline customization
317
+ return_format: "separate" to show each document's text separately, "combined" to merge all text
318
 
319
  Returns:
320
+ Extracted text from all handwritten documents
321
  """
322
  try:
323
+ progress(0, desc="Starting batch HTR text extraction...")
324
+
325
+ # Parse input to get list of images
326
+ image_paths = parse_image_input(image_input)
327
+
328
+ if not image_paths:
329
+ return "No images provided. Please upload images or provide URLs."
330
 
331
+ progress(0.1, desc=f"Processing {len(image_paths)} images...")
332
+
333
+ # Process all images
334
+ results = _process_htr_pipeline_batch(
335
+ image_paths, document_type, custom_settings, progress
336
  )
337
 
338
+ # Extract text from results
339
+ all_texts = []
340
+ for image_name, collection in results.items():
341
+ if isinstance(collection, str): # Error case
342
+ all_texts.append(f"=== {image_name} ===\n{collection}\n")
343
+ else:
344
+ text = extract_text_from_collection(collection)
345
+ if return_format == "separate":
346
+ all_texts.append(f"=== {image_name} ===\n{text}\n")
347
+ else:
348
+ all_texts.append(text)
349
+
350
+ if return_format == "separate":
351
+ return "\n".join(all_texts)
352
+ else:
353
+ return "\n\n".join(all_texts)
354
 
 
 
 
355
  except ValueError as e:
356
  return f"Input error: {str(e)}"
357
  except Exception as e:
358
+ return f"Batch HTR text extraction failed: {str(e)}"
359
 
360
 
361
+ def htrflow_file_batch(
362
+ image_input: Union[str, List[str]],
363
  document_type: FormatChoices = "letter_swedish",
364
  output_format: FileChoices = DEFAULT_OUTPUT,
365
  custom_settings: Optional[str] = None,
 
367
  progress: gr.Progress = gr.Progress()
368
  ) -> str:
369
  """
370
+ Process multiple handwritten documents and generate formatted output files.
371
 
372
+ This tool performs HTR on multiple documents and exports the results in various formats.
373
+ Returns a ZIP file containing all processed documents.
374
 
375
  Args:
376
+ image_input: Single image path/URL, multiple paths/URLs (newline-separated), or list of uploaded files
377
  document_type: Type of document layout - affects segmentation and reading order
378
  output_format: Desired output format (txt for plain text, alto/page for XML with coordinates, json for structured data)
379
  custom_settings: Optional JSON configuration for advanced pipeline customization
380
  server_name: Base URL of the server (used for generating download links)
381
 
382
  Returns:
383
+ Path to ZIP file containing all generated files
384
  """
385
  try:
386
+ progress(0, desc="Starting batch HTR file processing...")
387
 
388
+ # Parse input to get list of images
389
+ image_paths = parse_image_input(image_input)
390
+
391
+ if not image_paths:
392
+ error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
393
+ error_file.write("No images provided. Please upload images or provide URLs.")
394
+ error_file.close()
395
+ return error_file.name
396
+
397
+ progress(0.1, desc=f"Processing {len(image_paths)} images...")
398
+
399
+ # Process all images
400
+ results = _process_htr_pipeline_batch(
401
+ image_paths, document_type, custom_settings, progress
402
  )
 
 
403
 
404
+ progress(0.9, desc="Creating ZIP archive...")
405
+
406
+ # Create temporary directory for output files
407
  temp_dir = Path(tempfile.mkdtemp())
408
+ output_files = []
409
+
410
+ for image_name, collection in results.items():
411
+ if isinstance(collection, str): # Error case
412
+ # Write error to text file
413
+ error_file_path = temp_dir / f"{image_name}_error.txt"
414
+ with open(error_file_path, 'w') as f:
415
+ f.write(collection)
416
+ output_files.append(error_file_path)
417
+ else:
418
+ # Save collection in requested format
419
+ export_dir = temp_dir / image_name
420
+ collection.save(directory=str(export_dir), serializer=output_format)
421
+
422
+ # Find and rename the generated file
423
+ for root, _, files in os.walk(export_dir):
424
+ for file in files:
425
+ old_path = Path(root) / file
426
+ file_ext = Path(file).suffix
427
+ new_filename = (
428
+ f"{image_name}.{output_format}"
429
+ if not file_ext
430
+ else f"{image_name}{file_ext}"
431
+ )
432
+ new_path = temp_dir / new_filename
433
+ shutil.move(str(old_path), str(new_path))
434
+ output_files.append(new_path)
435
+ break
436
+
437
+ # Create ZIP file
438
+ zip_path = temp_dir / f"htr_batch_{output_format}.zip"
439
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
440
+ for file_path in output_files:
441
+ zipf.write(file_path, file_path.name)
442
+
443
+ progress(1.0, desc=f"Batch processing complete! Processed {len(image_paths)} images.")
444
+
445
+ return str(zip_path)
446
 
 
 
 
 
 
447
  except ValueError as e:
 
448
  error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
449
+ error_file.write(f"Input error: {str(e)}")
450
  error_file.close()
451
  return error_file.name
452
  except Exception as e:
 
453
  error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
454
+ error_file.write(f"Batch HTR file generation failed: {str(e)}")
455
  error_file.close()
456
  return error_file.name
457
 
458
 
459
+ def htrflow_visualizer_batch(
460
+ image_input: Union[str, List[str]],
461
+ htr_documents: Union[str, List[str]],
462
  server_name: str = "https://gabriel-htrflow-mcp.hf.space",
463
  progress: gr.Progress = gr.Progress()
464
  ) -> str:
465
  """
466
+ Create visualizations for multiple HTR results overlaid on original documents.
467
 
468
+ This tool generates annotated images showing detected text regions and recognized text
469
+ for multiple documents. Returns a ZIP file containing all visualization images.
 
470
 
471
  Args:
472
+ image_input: Original document image paths/URLs (newline-separated if string)
473
+ htr_documents: HTR output files (ALTO/PAGE XML) - must match order of images
474
  server_name: Base URL of the server (used for generating download links)
475
 
476
  Returns:
477
+ Path to ZIP file containing all visualization images
478
  """
479
  try:
480
+ progress(0, desc="Starting batch visualization generation...")
481
+
482
+ # Parse inputs
483
+ image_paths = parse_image_input(image_input)
484
+ htr_paths = parse_image_input(htr_documents) if isinstance(htr_documents, str) else htr_documents
485
+
486
+ if not image_paths:
487
+ raise ValueError("No images provided")
488
+
489
+ if not htr_paths:
490
+ raise ValueError("No HTR documents provided")
491
+
492
+ if len(image_paths) != len(htr_paths):
493
+ raise ValueError(f"Number of images ({len(image_paths)}) doesn't match number of HTR documents ({len(htr_paths)})")
494
+
495
+ progress(0.1, desc=f"Creating visualizations for {len(image_paths)} documents...")
496
+
497
+ temp_dir = Path(tempfile.mkdtemp())
498
+ output_files = []
499
+ temp_files = []
500
+
501
+ for idx, (image_path, htr_path) in enumerate(zip(image_paths, htr_paths)):
502
+ try:
503
+ image_name = Path(image_path).stem if not image_path.startswith("http") else f"image_{idx+1}"
504
+
505
+ progress((idx + 0.3) / len(image_paths),
506
+ desc=f"Visualizing document {idx+1}/{len(image_paths)}: {image_name}")
507
+
508
+ # Handle image input
509
+ processed_image = handle_image_input(image_path, progress,
510
+ desc_prefix=f"[{idx+1}/{len(image_paths)}] ")
511
+ if processed_image.startswith(tempfile.gettempdir()):
512
+ temp_files.append(processed_image)
513
+
514
+ # Generate visualization
515
+ viz_result = htrflow_visualizer(processed_image, htr_path, server_name)
516
+
517
+ if viz_result and os.path.exists(viz_result):
518
+ # Move to temp dir with proper name
519
+ viz_path = temp_dir / f"{image_name}_visualization.png"
520
+ shutil.move(viz_result, str(viz_path))
521
+ output_files.append(viz_path)
522
+
523
+ except Exception as e:
524
+ # Create error file for this visualization
525
+ error_path = temp_dir / f"{image_name}_viz_error.txt"
526
+ with open(error_path, 'w') as f:
527
+ f.write(f"Visualization failed: {str(e)}")
528
+ output_files.append(error_path)
529
 
530
+ # Cleanup temp files
531
+ for temp_file in temp_files:
532
+ try:
533
+ os.unlink(temp_file)
534
+ except:
535
+ pass
536
 
537
+ progress(0.9, desc="Creating ZIP archive...")
538
 
539
+ # Create ZIP file
540
+ zip_path = temp_dir / "htr_visualizations.zip"
541
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
542
+ for file_path in output_files:
543
+ zipf.write(file_path, file_path.name)
544
 
545
+ progress(1.0, desc=f"Visualization complete! Created {len(output_files)} visualizations.")
546
+
547
+ return str(zip_path)
548
 
 
549
  except Exception as e:
 
550
  error_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt')
551
+ error_file.write(f"Batch visualization failed: {str(e)}")
552
  error_file.close()
553
  return error_file.name
 
 
 
 
 
 
 
554
 
555
 
556
  def extract_text_from_collection(collection: Collection) -> str:
 
564
 
565
 
566
  def create_htrflow_mcp_server():
567
+ # Batch HTR Text extraction interface
568
+ htr_text_batch_interface = gr.Interface(
569
+ fn=htr_text_batch,
570
  inputs=[
571
+ gr.Textbox(
572
+ label="Image Paths/URLs (one per line) or Upload Files",
573
+ placeholder="https://example.com/image1.jpg\nhttps://example.com/image2.jpg\n\nOR drag and drop multiple files",
574
+ lines=5
575
+ ),
576
  gr.Dropdown(
577
  choices=FORMAT_CHOICES,
578
  value="letter_swedish",
579
  label="Document Type",
580
+ info="Select the type that best matches your documents' layout and language"
581
  ),
582
  gr.Textbox(
583
  label="Custom Settings (JSON)",
 
585
  value="",
586
  lines=3
587
  ),
588
+ gr.Radio(
589
+ choices=["separate", "combined"],
590
+ value="separate",
591
+ label="Output Format",
592
+ info="'separate' shows each document's text with headers, 'combined' merges all text"
593
+ ),
594
  ],
595
+ outputs=[gr.Textbox(label="Extracted Text", lines=20)],
596
+ title="Batch Extract Text from Handwritten Documents",
597
+ description="Process multiple handwritten document images at once. Upload files or provide URLs (one per line)",
598
+ api_name="htr_text_batch",
599
+ api_description="Extract text from multiple handwritten historical documents using advanced HTR models. Supports batch processing of letters and book spreads in English and Swedish. If a user passes a file as an input, use the upload_file_to_gradio tool, if present, to upload the file to the gradio app and create a Gradio File Input. Then use the returned path as the input to the tool",
600
  )
601
 
602
+ # Batch HTR File generation interface
603
+ htrflow_file_batch_interface = gr.Interface(
604
+ fn=htrflow_file_batch,
605
  inputs=[
606
+ gr.Textbox(
607
+ label="Image Paths/URLs (one per line) or Upload Files",
608
+ placeholder="https://example.com/image1.jpg\nhttps://example.com/image2.jpg\n\nOR drag and drop multiple files",
609
+ lines=5
610
+ ),
611
  gr.Dropdown(
612
  choices=FORMAT_CHOICES,
613
  value="letter_swedish",
614
  label="Document Type",
615
+ info="Select the type that best matches your documents' layout and language"
616
  ),
617
  gr.Dropdown(
618
  choices=FILE_CHOICES,
 
633
  visible=False # Hide this from UI but keep for API
634
  ),
635
  ],
636
+ outputs=[gr.File(label="Download ZIP with HTR Output Files")],
637
+ title="Batch Generate HTR Output Files",
638
+ description="Process multiple handwritten documents and export in various formats. Returns a ZIP file with all results.",
639
+ api_name="htrflow_file_batch",
640
+ api_description="Process multiple handwritten documents and generate formatted output files. Returns a ZIP containing outputs in ALTO XML (with text coordinates), PAGE XML, JSON (structured data), or plain text format. If a user passes a file as an input, use the upload_file_to_gradio tool, if present, to upload the file to the gradio app and create a Gradio File Input. Then use the returned path as the input to the tool",
641
  )
642
 
643
+ # Batch HTR Visualization interface
644
+ htrflow_viz_batch_interface = gr.Interface(
645
+ fn=htrflow_visualizer_batch,
646
  inputs=[
647
+ gr.Textbox(
648
+ label="Original Image Paths/URLs (one per line)",
649
+ placeholder="https://example.com/image1.jpg\nhttps://example.com/image2.jpg",
650
+ lines=5
651
+ ),
652
+ gr.File(
653
+ label="Upload HTR XML Files (ALTO/PAGE)",
654
+ file_types=[".xml"],
655
+ file_count="multiple"
656
+ ),
657
  gr.Textbox(
658
  label="Server Name",
659
  value="https://gabriel-htrflow-mcp.hf.space",
 
661
  visible=False # Hide this from UI but keep for API
662
  ),
663
  ],
664
+ outputs=gr.File(label="Download ZIP with Visualization Images"),
665
+ title="Batch Visualize HTR Results",
666
+ description="Create annotated images for multiple documents. Images and XML files must be in matching order.",
667
+ api_name="htrflow_visualizer_batch",
668
+ api_description="Generate visualization images showing HTR results overlaid on multiple original documents. Shows detected text regions, reading order, and recognized text for quality control. Returns a ZIP file with all visualizations. If a user passes a file as an input, use the upload_file_to_gradio tool, if present, to upload the file to the gradio app and create a Gradio File Input. Then use the returned path as the input to the tool",
669
+ )
670
+
671
+ # Legacy single-file interfaces (kept for backward compatibility)
672
+ htr_text_interface = gr.Interface(
673
+ fn=lambda img, doc_type, settings: htr_text_batch(img, doc_type, settings, "separate"),
674
+ inputs=[
675
+ gr.Image(type="filepath", label="Upload Image or Enter URL"),
676
+ gr.Dropdown(
677
+ choices=FORMAT_CHOICES,
678
+ value="letter_swedish",
679
+ label="Document Type"
680
+ ),
681
+ gr.Textbox(
682
+ label="Custom Settings (JSON)",
683
+ placeholder='{"steps": [...]}',
684
+ value="",
685
+ lines=3
686
+ ),
687
+ ],
688
+ outputs=[gr.Textbox(label="Extracted Text", lines=15)],
689
+ title="Extract Text (Single Document)",
690
+ description="Upload a single handwritten document image to extract text",
691
+ api_name="htr_text",
692
+ api_description="Extract text from handwritten historical documents using advanced HTR models. Supports letters and book spreads in English and Swedish. If a user passes a file as an input, use the upload_file_to_gradio tool, if present, to upload the file to the gradio app and create a Gradio File Input. Then use the returned path as the input to the tool",
693
+ )
694
+
695
+ htrflow_file_interface = gr.Interface(
696
+ fn=lambda img, doc_type, fmt, settings, srv: htrflow_file_batch([img], doc_type, fmt, settings, srv),
697
+ inputs=[
698
+ gr.Image(type="filepath", label="Upload Image or Enter URL"),
699
+ gr.Dropdown(
700
+ choices=FORMAT_CHOICES,
701
+ value="letter_swedish",
702
+ label="Document Type"
703
+ ),
704
+ gr.Dropdown(
705
+ choices=FILE_CHOICES,
706
+ value=DEFAULT_OUTPUT,
707
+ label="Output Format"
708
+ ),
709
+ gr.Textbox(
710
+ label="Custom Settings (JSON)",
711
+ value="",
712
+ lines=3
713
+ ),
714
+ gr.Textbox(
715
+ label="Server Name",
716
+ value="https://gabriel-htrflow-mcp.hf.space",
717
+ visible=False
718
+ ),
719
+ ],
720
+ outputs=[gr.File(label="Download HTR Output File")],
721
+ title="Generate File (Single Document)",
722
+ description="Process a single handwritten document and export in various formats",
723
+ api_name="htrflow_file",
724
+ api_description="Process handwritten documents and generate formatted output files. Outputs can be in ALTO XML (with text coordinates), PAGE XML, JSON (structured data), or plain text format. If a user passes a file as an input, use the upload_file_to_gradio tool, if present, to upload the file to the gradio app and create a Gradio File Input. Then use the returned path as the input to the tool",
725
  )
726
 
727
  # Create tabbed interface with better organization
728
  demo = gr.TabbedInterface(
729
+ [
730
+ htr_text_batch_interface,
731
+ htrflow_file_batch_interface,
732
+ htrflow_viz_batch_interface,
733
+ htr_text_interface,
734
+ htrflow_file_interface,
735
+ ],
736
+ [
737
+ "📚 Batch Text Extract",
738
+ "📁 Batch File Generate",
739
+ "🖼️ Batch Visualize",
740
+ "📄 Single Text",
741
+ "💾 Single File",
742
+ ],
743
+ title="🖋️ HTRflow - Handwritten Text Recognition (Batch & Single Processing)",
744
  analytics_enabled=False,
745
  )
746
 
747
  return demo
748
 
 
749
  if __name__ == "__main__":
750
  demo = create_htrflow_mcp_server()
751
  demo.launch(
752
  mcp_server=True,
753
  share=False,
754
  debug=False,
755
+ show_api=True,
756
  favicon_path=None,
757
  )