Spaces:

levalencia
/

doctorecord

Running

App Files Files Community

levalencia commited on Jun 4

Commit

54478a0

1 Parent(s): 966ffcd

Refactor AzureDIService to enhance document analysis logging and update table extraction logic. Temporarily disable table extraction and improve content type logging. Update TableAgent to reflect changes in context handling.

Browse files

Files changed (5) hide show

src/agents/__pycache__/field_mapper_agent.cpython-312.pyc +0 -0
src/agents/__pycache__/table_agent.cpython-312.pyc +0 -0
src/agents/table_agent.py +2 -2
src/services/__pycache__/azure_di_service.cpython-312.pyc +0 -0
src/services/azure_di_service.py +25 -26

src/agents/__pycache__/field_mapper_agent.cpython-312.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc and b/src/agents/__pycache__/field_mapper_agent.cpython-312.pyc differ

src/agents/__pycache__/table_agent.cpython-312.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/table_agent.cpython-312.pyc and b/src/agents/__pycache__/table_agent.cpython-312.pyc differ

src/agents/table_agent.py CHANGED Viewed

@@ -35,9 +35,9 @@ class TableAgent(BaseAgent):
             # Store both text and tables in context
             ctx["text"] = result["text"]
-            ctx["tables"] = result["tables"]
-            self.logger.info(f"Extracted {len(result['text'])} characters of text and {len(result['tables'])} tables")
             return result
         except Exception as e:

             # Store both text and tables in context
             ctx["text"] = result["text"]
+            #ctx["tables"] = result["tables"]
+            self.logger.info(f"Extracted {len(result['text'])} characters of text including tables")
             return result
         except Exception as e:

src/services/__pycache__/azure_di_service.cpython-312.pyc CHANGED Viewed

Binary files a/src/services/__pycache__/azure_di_service.cpython-312.pyc and b/src/services/__pycache__/azure_di_service.cpython-312.pyc differ

src/services/azure_di_service.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """Real wrapper goes here – currently not used by stub agents."""
 import logging
 from azure.ai.documentintelligence import DocumentIntelligenceClient
 from azure.core.credentials import AzureKeyCredential
 from azure.core.exceptions import HttpResponseError
@@ -14,38 +16,35 @@ class AzureDIService:
             self.logger.info("Starting document analysis with Azure Document Intelligence")
             # Analyze the entire document at once
-            poller = self.client.begin_analyze_document("prebuilt-layout", body=pdf_bytes)
             result = poller.result()
-            # Extract text content
-            text_content = ""
             if hasattr(result, "content"):
-                text_content = result.content
-                self.logger.info(f"Extracted {len(text_content)} characters of text")
-            # Extract tables as HTML strings
-            tables = []
             if hasattr(result, "tables"):
-                self.logger.info(f"Found {len(result.tables)} tables in the document")
-                for table in result.tables:
-                    # Simple HTML rendering for demo; you can improve this
-                    html = "<table>"
-                    for row_idx in range(table.row_count):
-                        html += "<tr>"
-                        for col_idx in range(table.column_count):
-                            cell = next((c for c in table.cells if c.row_index == row_idx and c.column_index == col_idx), None)
-                            html += f"<td>{cell.content if cell else ''}</td>"
-                        html += "</tr>"
-                    html += "</table>"
-                    tables.append(html)
-            else:
-                self.logger.warning("No tables found in the document")
-            # Return both text and tables
-            return {
-                "text": text_content,
-                "tables": tables
-            }
         except HttpResponseError as e:
             self.logger.error(f"Azure Document Intelligence API error: {str(e)}")

 """Real wrapper goes here – currently not used by stub agents."""
 import logging
+import json
 from azure.ai.documentintelligence import DocumentIntelligenceClient
+from azure.ai.documentintelligence.models import DocumentContentFormat
 from azure.core.credentials import AzureKeyCredential
 from azure.core.exceptions import HttpResponseError
             self.logger.info("Starting document analysis with Azure Document Intelligence")
             # Analyze the entire document at once
+            #poller = self.client.begin_analyze_document("prebuilt-layout", body=pdf_bytes)
+            poller = self.client.begin_analyze_document(
+                "prebuilt-layout",
+                body=pdf_bytes,
+                content_type="application/octet-stream",
+                output_content_format=DocumentContentFormat.MARKDOWN
+            )
             result = poller.result()
+            # Log the raw result structure
+            self.logger.info("Inspecting Azure DI result structure:")
+            self.logger.info(f"Result type: {type(result)}")
+            self.logger.info(f"Result attributes: {dir(result)}")
+            # Check if content exists and log its type
             if hasattr(result, "content"):
+                self.logger.info(f"Content type: {type(result.content)}")
+                self.logger.info(f"Content preview: {result.content[:500]}")
+            # Check if tables exist and log their structure
             if hasattr(result, "tables"):
+                self.logger.info(f"Number of tables: {len(result.tables)}")
+                if result.tables:
+                    self.logger.info(f"First table structure: {dir(result.tables[0])}")
+                    self.logger.info(f"First table cells: {[cell.content for cell in result.tables[0].cells]}")
+            # For now, return empty result until we understand the structure
+            return {"text": result.content}
         except HttpResponseError as e:
             self.logger.error(f"Azure Document Intelligence API error: {str(e)}")