Gabriel commited on
Commit
c6b50f6
·
verified ·
1 Parent(s): 6d382b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -21
app.py CHANGED
@@ -116,9 +116,9 @@ PIPELINE_CONFIGS = {
116
  }
117
 
118
  @spaces.GPU
119
- def htrflow_htr(image_path: str, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_swedish", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None) -> str:
120
  """
121
- Process handwritten text recognition (HTR) on uploaded images and return extracted text in the specified format.
122
 
123
  This function uses machine learning models to automatically detect, segment, and transcribe handwritten text
124
  from historical documents. It supports different document types and languages, with specialized models
@@ -130,8 +130,8 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
130
 
131
  document_type (Literal): The type of document and language processing template to use.
132
  Available options:
133
- - "letter_english": Single-page English handwritten letters (default: "letter_swedish")
134
- - "letter_swedish": Single-page Swedish handwritten letters
135
  - "spread_english": Two-page spread English documents with marginalia
136
  - "spread_swedish": Two-page spread Swedish documents with marginalia
137
  Default: "letter_swedish"
@@ -143,20 +143,20 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
143
  - "page": PAGE XML format with structural markup and positioning data
144
  - "json": JSON format with structured text, layout information and metadata
145
  Default: "alto"
146
- Note: Both "alto" and "page" formats are XML-based with layout information.
147
 
148
  custom_settings (Optional[str]): Advanced users can provide custom pipeline configuration as a
149
- JSON string to override the default processing steps. This allows
150
- fine-tuning of model parameters, batch sizes, and processing workflow.
151
  Default: None (uses predefined configuration for document_type)
152
 
153
  Returns:
154
- str: The file path to the generated output file containing the transcribed text in the requested format,
155
- or an error message if processing fails. The output file will be named based on the original
156
- image filename with the appropriate extension (.txt, .xml, or .json).
 
 
157
  """
158
  if not image_path:
159
- return "Error: No image provided"
160
 
161
  try:
162
  original_filename = Path(image_path).stem or "output"
@@ -165,7 +165,7 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
165
  try:
166
  config = json.loads(custom_settings)
167
  except json.JSONDecodeError:
168
- return "Error: Invalid JSON in custom_settings parameter"
169
  else:
170
  config = PIPELINE_CONFIGS[document_type]
171
 
@@ -175,7 +175,7 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
175
  try:
176
  processed_collection = pipeline.run(collection)
177
  except Exception as pipeline_error:
178
- return f"Error: Pipeline execution failed: {str(pipeline_error)}"
179
 
180
  temp_dir = Path(tempfile.mkdtemp())
181
  export_dir = temp_dir / output_format
@@ -193,12 +193,40 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
193
  break
194
 
195
  if output_file_path and os.path.exists(output_file_path):
196
- return output_file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  else:
198
- return "Error: Failed to generate output file"
199
 
200
  except Exception as e:
201
- return f"Error: HTR processing failed: {str(e)}"
 
 
 
 
202
 
203
  def extract_text_from_collection(collection: Collection) -> str:
204
  text_lines = []
@@ -209,19 +237,37 @@ def extract_text_from_collection(collection: Collection) -> str:
209
  return "\n".join(text_lines)
210
 
211
  def create_htrflow_mcp_server():
212
- demo = gr.Interface(
213
- fn=htrflow_htr,
214
  inputs=[
215
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
216
  gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_swedish", label="Document Type"),
217
  gr.Dropdown(choices=CHOICES, value=DEFAULT_OUTPUT, label="Output Format"),
218
  gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value=""),
219
  ],
 
 
 
 
 
 
 
 
 
 
 
220
  outputs=gr.File(label="Download Output File"),
221
- title="HTRflow MCP Server",
222
- description="Process handwritten text from uploaded file or URL and get output file in specified format",
223
- api_name="htrflow_htr",
224
  )
 
 
 
 
 
 
 
 
225
  return demo
226
 
227
  if __name__ == "__main__":
 
116
  }
117
 
118
  @spaces.GPU
119
+ def htrflow_htr_url(image_path: str, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_swedish", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None) -> str:
120
  """
121
+ Process handwritten text recognition (HTR) on uploaded images and return both file content and download link.
122
 
123
  This function uses machine learning models to automatically detect, segment, and transcribe handwritten text
124
  from historical documents. It supports different document types and languages, with specialized models
 
130
 
131
  document_type (Literal): The type of document and language processing template to use.
132
  Available options:
133
+ - "letter_english": Single-page English handwritten letters
134
+ - "letter_swedish": Single-page Swedish handwritten letters (default)
135
  - "spread_english": Two-page spread English documents with marginalia
136
  - "spread_swedish": Two-page spread Swedish documents with marginalia
137
  Default: "letter_swedish"
 
143
  - "page": PAGE XML format with structural markup and positioning data
144
  - "json": JSON format with structured text, layout information and metadata
145
  Default: "alto"
 
146
 
147
  custom_settings (Optional[str]): Advanced users can provide custom pipeline configuration as a
148
+ JSON string to override the default processing steps.
 
149
  Default: None (uses predefined configuration for document_type)
150
 
151
  Returns:
152
+ str: JSON string containing both the file content and download link:
153
+ {
154
+ "content": "file_content_here",
155
+ "file_path": "[file_name](http://your-server:port/gradio_api//file=/tmp/gradio/{temp_folder}/{file_name}.{file_format})"
156
+ }
157
  """
158
  if not image_path:
159
+ return json.dumps({"error": "No image provided"})
160
 
161
  try:
162
  original_filename = Path(image_path).stem or "output"
 
165
  try:
166
  config = json.loads(custom_settings)
167
  except json.JSONDecodeError:
168
+ return json.dumps({"error": "Invalid JSON in custom_settings parameter"})
169
  else:
170
  config = PIPELINE_CONFIGS[document_type]
171
 
 
175
  try:
176
  processed_collection = pipeline.run(collection)
177
  except Exception as pipeline_error:
178
+ return json.dumps({"error": f"Pipeline execution failed: {str(pipeline_error)}"})
179
 
180
  temp_dir = Path(tempfile.mkdtemp())
181
  export_dir = temp_dir / output_format
 
193
  break
194
 
195
  if output_file_path and os.path.exists(output_file_path):
196
+ # Read the file content
197
+ try:
198
+ with open(output_file_path, 'r', encoding='utf-8') as f:
199
+ file_content = f.read()
200
+ except UnicodeDecodeError:
201
+ # If UTF-8 fails, try with different encoding or read as binary for certain formats
202
+ try:
203
+ with open(output_file_path, 'r', encoding='latin-1') as f:
204
+ file_content = f.read()
205
+ except:
206
+ with open(output_file_path, 'rb') as f:
207
+ file_content = f.read().decode('utf-8', errors='replace')
208
+
209
+ # Create the markdown link
210
+ file_name = Path(output_file_path).name
211
+ temp_folder = Path(output_file_path).parent.name
212
+ markdown_link = f"[{file_name}](http://your-server:port/gradio_api//file=/tmp/gradio/{temp_folder}/{file_name})"
213
+
214
+ # Return JSON with both content and file path
215
+ result = {
216
+ "content": file_content,
217
+ "file_path": markdown_link
218
+ }
219
+
220
+ return json.dumps(result, ensure_ascii=False, indent=2)
221
  else:
222
+ return json.dumps({"error": "Failed to generate output file"})
223
 
224
  except Exception as e:
225
+ return json.dumps({"error": f"HTR processing failed: {str(e)}"})
226
+
227
+
228
+ def htrflow_visualizer(image: str, htr_document: str) -> str:
229
+ pass
230
 
231
  def extract_text_from_collection(collection: Collection) -> str:
232
  text_lines = []
 
237
  return "\n".join(text_lines)
238
 
239
  def create_htrflow_mcp_server():
240
+ htrflow_url = gr.Interface(
241
+ fn=htrflow_htr_url,
242
  inputs=[
243
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
244
  gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_swedish", label="Document Type"),
245
  gr.Dropdown(choices=CHOICES, value=DEFAULT_OUTPUT, label="Output Format"),
246
  gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value=""),
247
  ],
248
+ outputs=gr.Textbox(label="HTR Result (JSON)", lines=10),
249
+ description="Process handwritten text from uploaded file or URL and get both content and download link in JSON format",
250
+ api_name="htrflow_htr_url",
251
+ )
252
+
253
+ htrflow_viz = gr.Interface(
254
+ fn=htrflow_visualizer,
255
+ inputs=[
256
+ gr.Image(type="filepath", label="Upload Image or Enter URL"),
257
+ gr.Textbox(label="HTR Document content", placeholder="Path to the HTR document file", value=""),
258
+ ],
259
  outputs=gr.File(label="Download Output File"),
260
+ description="Visualize document",
261
+ api_name="htrflow_visualizer"
 
262
  )
263
+
264
+ demo = gr.TabbedInterface(
265
+ [htrflow_url, htrflow_viz],
266
+ ["HTR URL", "HTR Visualizer"],
267
+ title="HTRflow Handwritten Text Recognition",
268
+ description="Extract text and visualize handwritten historical documents using HTRflow",
269
+ )
270
+
271
  return demo
272
 
273
  if __name__ == "__main__":