htrflow_mcp

Running on Zero

App Files Files Community

Gabriel commited on Jun 8

Commit

c6b50f6

verified ·

1 Parent(s): 6d382b7

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -21

app.py CHANGED Viewed

@@ -116,9 +116,9 @@ PIPELINE_CONFIGS = {
 }
 @spaces.GPU
-def htrflow_htr(image_path: str, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_swedish", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None) -> str:
     """
-    Process handwritten text recognition (HTR) on uploaded images and return extracted text in the specified format.
     This function uses machine learning models to automatically detect, segment, and transcribe handwritten text
     from historical documents. It supports different document types and languages, with specialized models
@@ -130,8 +130,8 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
         document_type (Literal): The type of document and language processing template to use.
                                 Available options:
-                                - "letter_english": Single-page English handwritten letters (default: "letter_swedish")
-                                - "letter_swedish": Single-page Swedish handwritten letters
                                 - "spread_english": Two-page spread English documents with marginalia
                                 - "spread_swedish": Two-page spread Swedish documents with marginalia
                                 Default: "letter_swedish"
@@ -143,20 +143,20 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
                                 - "page": PAGE XML format with structural markup and positioning data
                                 - "json": JSON format with structured text, layout information and metadata
                                 Default: "alto"
-                                Note: Both "alto" and "page" formats are XML-based with layout information.
         custom_settings (Optional[str]): Advanced users can provide custom pipeline configuration as a
-                                        JSON string to override the default processing steps. This allows
-                                        fine-tuning of model parameters, batch sizes, and processing workflow.
                                         Default: None (uses predefined configuration for document_type)
     Returns:
-        str: The file path to the generated output file containing the transcribed text in the requested format,
-             or an error message if processing fails. The output file will be named based on the original
-             image filename with the appropriate extension (.txt, .xml, or .json).
     """
     if not image_path:
-        return "Error: No image provided"
     try:
         original_filename = Path(image_path).stem or "output"
@@ -165,7 +165,7 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
             try:
                 config = json.loads(custom_settings)
             except json.JSONDecodeError:
-                return "Error: Invalid JSON in custom_settings parameter"
         else:
             config = PIPELINE_CONFIGS[document_type]
@@ -175,7 +175,7 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
         try:
             processed_collection = pipeline.run(collection)
         except Exception as pipeline_error:
-            return f"Error: Pipeline execution failed: {str(pipeline_error)}"
         temp_dir = Path(tempfile.mkdtemp())
         export_dir = temp_dir / output_format
@@ -193,12 +193,40 @@ def htrflow_htr(image_path: str, document_type: Literal["letter_english", "lette
                 break
         if output_file_path and os.path.exists(output_file_path):
-            return output_file_path
         else:
-            return "Error: Failed to generate output file"
     except Exception as e:
-        return f"Error: HTR processing failed: {str(e)}"
 def extract_text_from_collection(collection: Collection) -> str:
     text_lines = []
@@ -209,19 +237,37 @@ def extract_text_from_collection(collection: Collection) -> str:
     return "\n".join(text_lines)
 def create_htrflow_mcp_server():
-    demo = gr.Interface(
-        fn=htrflow_htr,
         inputs=[
             gr.Image(type="filepath", label="Upload Image or Enter URL"),
             gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_swedish", label="Document Type"),
             gr.Dropdown(choices=CHOICES, value=DEFAULT_OUTPUT, label="Output Format"),
             gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value=""),
         ],
         outputs=gr.File(label="Download Output File"),
-        title="HTRflow MCP Server",
-        description="Process handwritten text from uploaded file or URL and get output file in specified format",
-        api_name="htrflow_htr",
     )
     return demo
 if __name__ == "__main__":

 }
 @spaces.GPU
+def htrflow_htr_url(image_path: str, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_swedish", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None) -> str:
     """
+    Process handwritten text recognition (HTR) on uploaded images and return both file content and download link.
     This function uses machine learning models to automatically detect, segment, and transcribe handwritten text
     from historical documents. It supports different document types and languages, with specialized models
         document_type (Literal): The type of document and language processing template to use.
                                 Available options:
+                                - "letter_english": Single-page English handwritten letters
+                                - "letter_swedish": Single-page Swedish handwritten letters (default)
                                 - "spread_english": Two-page spread English documents with marginalia
                                 - "spread_swedish": Two-page spread Swedish documents with marginalia
                                 Default: "letter_swedish"
                                 - "page": PAGE XML format with structural markup and positioning data
                                 - "json": JSON format with structured text, layout information and metadata
                                 Default: "alto"
         custom_settings (Optional[str]): Advanced users can provide custom pipeline configuration as a
+                                        JSON string to override the default processing steps.
                                         Default: None (uses predefined configuration for document_type)
     Returns:
+        str: JSON string containing both the file content and download link:
+             {
+                 "content": "file_content_here",
+                 "file_path": "[file_name](http://your-server:port/gradio_api//file=/tmp/gradio/{temp_folder}/{file_name}.{file_format})"
+             }
     """
     if not image_path:
+        return json.dumps({"error": "No image provided"})
     try:
         original_filename = Path(image_path).stem or "output"
             try:
                 config = json.loads(custom_settings)
             except json.JSONDecodeError:
+                return json.dumps({"error": "Invalid JSON in custom_settings parameter"})
         else:
             config = PIPELINE_CONFIGS[document_type]
         try:
             processed_collection = pipeline.run(collection)
         except Exception as pipeline_error:
+            return json.dumps({"error": f"Pipeline execution failed: {str(pipeline_error)}"})
         temp_dir = Path(tempfile.mkdtemp())
         export_dir = temp_dir / output_format
                 break
         if output_file_path and os.path.exists(output_file_path):
+            # Read the file content
+            try:
+                with open(output_file_path, 'r', encoding='utf-8') as f:
+                    file_content = f.read()
+            except UnicodeDecodeError:
+                # If UTF-8 fails, try with different encoding or read as binary for certain formats
+                try:
+                    with open(output_file_path, 'r', encoding='latin-1') as f:
+                        file_content = f.read()
+                except:
+                    with open(output_file_path, 'rb') as f:
+                        file_content = f.read().decode('utf-8', errors='replace')
+            # Create the markdown link
+            file_name = Path(output_file_path).name
+            temp_folder = Path(output_file_path).parent.name
+            markdown_link = f"[{file_name}](http://your-server:port/gradio_api//file=/tmp/gradio/{temp_folder}/{file_name})"
+            # Return JSON with both content and file path
+            result = {
+                "content": file_content,
+                "file_path": markdown_link
+            }
+            return json.dumps(result, ensure_ascii=False, indent=2)
         else:
+            return json.dumps({"error": "Failed to generate output file"})
     except Exception as e:
+        return json.dumps({"error": f"HTR processing failed: {str(e)}"})
+def htrflow_visualizer(image: str, htr_document: str) -> str:
+    pass
 def extract_text_from_collection(collection: Collection) -> str:
     text_lines = []
     return "\n".join(text_lines)
 def create_htrflow_mcp_server():
+    htrflow_url = gr.Interface(
+        fn=htrflow_htr_url,
         inputs=[
             gr.Image(type="filepath", label="Upload Image or Enter URL"),
             gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_swedish", label="Document Type"),
             gr.Dropdown(choices=CHOICES, value=DEFAULT_OUTPUT, label="Output Format"),
             gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value=""),
         ],
+        outputs=gr.Textbox(label="HTR Result (JSON)", lines=10),
+        description="Process handwritten text from uploaded file or URL and get both content and download link in JSON format",
+        api_name="htrflow_htr_url",
+    )
+    htrflow_viz = gr.Interface(
+        fn=htrflow_visualizer,
+        inputs=[
+            gr.Image(type="filepath", label="Upload Image or Enter URL"),
+            gr.Textbox(label="HTR Document content", placeholder="Path to the HTR document file", value=""),
+        ],
         outputs=gr.File(label="Download Output File"),
+        description="Visualize document",
+        api_name="htrflow_visualizer"
     )
+    demo = gr.TabbedInterface(
+        [htrflow_url, htrflow_viz],
+        ["HTR URL", "HTR Visualizer"],
+        title="HTRflow Handwritten Text Recognition",
+        description="Extract text and visualize handwritten historical documents using HTRflow",
+    )
     return demo
 if __name__ == "__main__":