Spaces:

Svngoku
/

mistral-ocr-demo

Running

App Files Files Community

Svngoku commited on Mar 6

Commit

96d9245

verified ·

1 Parent(s): 7f3a813

Magic

Browse files

Files changed (1) hide show

app.py +43 -26

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ import tempfile
 from typing import Union, Dict, List
 from contextlib import contextmanager
 import requests
 # Constants
 DEFAULT_LANGUAGE = "English"
@@ -37,8 +38,12 @@ class OCRProcessor:
     @staticmethod
     def _encode_image(image_path: str) -> str:
-        with open(image_path, "rb") as image_file:
-            return base64.b64encode(image_file.read()).decode('utf-8')
     @staticmethod
     @contextmanager
@@ -68,18 +73,27 @@ class OCRProcessor:
             logger.error(f"Chat complete API call failed: {str(e)}")
             raise
-    def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
-        if isinstance(file_input, str):
-            if file_input.startswith("http"):
-                # Handle URLs
-                response = requests.get(file_input)
-                response.raise_for_status()
-                return response.content
-            else:
-                # Handle local file paths
                 with open(file_input, "rb") as f:
                     return f.read()
-        return file_input.read() if hasattr(file_input, 'read') else file_input
     def ocr_pdf_url(self, pdf_url: str) -> str:
         logger.info(f"Processing PDF URL: {pdf_url}")
@@ -89,7 +103,7 @@ class OCRProcessor:
         except Exception as e:
             return self._handle_error("PDF URL processing", e)
-    def ocr_uploaded_pdf(self, pdf_file: Union[str, bytes]) -> str:
         file_name = getattr(pdf_file, 'name', 'unknown')
         logger.info(f"Processing uploaded PDF: {file_name}")
         try:
@@ -113,7 +127,7 @@ class OCRProcessor:
         except Exception as e:
             return self._handle_error("image URL processing", e)
-    def ocr_uploaded_image(self, image_file: Union[str, bytes]) -> str:
         file_name = getattr(image_file, 'name', 'unknown')
         logger.info(f"Processing uploaded image: {file_name}")
         try:
@@ -138,7 +152,7 @@ class OCRProcessor:
         except Exception as e:
             return self._handle_error("document understanding", e)
-    def structured_ocr(self, image_file: Union[str, bytes]) -> str:
         file_name = getattr(image_file, 'name', 'unknown')
         logger.info(f"Processing structured OCR for: {file_name}")
         try:
@@ -165,19 +179,22 @@ class OCRProcessor:
                     temperature=0
                 )
-                # Ensure the response is a dictionary
-                response_content = chat_response.choices[0].message.content
-                if isinstance(response_content, list):
-                    response_content = response_content[0] if response_content else "{}"
-                content = json.loads(response_content)
-                return self._format_structured_response(temp_path, content)
         except Exception as e:
             return self._handle_error("structured OCR", e)
     @staticmethod
     def _extract_markdown(response: OCRResponse) -> str:
-        return response.pages[0].markdown if response.pages else "No text extracted"
     @staticmethod
     def _handle_error(context: str, error: Exception) -> str:
@@ -188,7 +205,7 @@ class OCRProcessor:
     def _format_structured_response(file_path: str, content: Dict) -> str:
         languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
         valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
         response = {
             "file_name": Path(file_path).name,
             "topics": content.get("topics", []),
@@ -217,7 +234,7 @@ def create_interface():
             except Exception as e:
                 return None, f"**Error:** Unexpected error: {str(e)}"
-        processor_state = gr.State()
         api_status = gr.Markdown("API key not set. Please enter and set your key.")
         set_api_button = gr.Button("Set API Key")
@@ -275,4 +292,4 @@ def create_interface():
     return demo
 if __name__ == "__main__":
-    create_interface().launch(share=True, debug=True)

 from typing import Union, Dict, List
 from contextlib import contextmanager
 import requests
+from enum import Enum
 # Constants
 DEFAULT_LANGUAGE = "English"
     @staticmethod
     def _encode_image(image_path: str) -> str:
+        try:
+            with open(image_path, "rb") as image_file:
+                return base64.b64encode(image_file.read()).decode('utf-8')
+        except Exception as e:
+            logger.error(f"Error encoding image {image_path}: {str(e)}")
+            raise
     @staticmethod
     @contextmanager
             logger.error(f"Chat complete API call failed: {str(e)}")
             raise
+    def _get_file_content(self, file_input: Union[str, object]) -> bytes:
+        try:
+            if isinstance(file_input, str):  # File path
                 with open(file_input, "rb") as f:
                     return f.read()
+            elif hasattr(file_input, 'read'):  # File-like object
+                return file_input.read()
+            else:
+                raise ValueError("Invalid file input: must be a path or file-like object")
+        except Exception as e:
+            logger.error(f"Error getting file content: {str(e)}")
+            raise
+    def _fetch_url_content(self, url: str) -> bytes:
+        try:
+            response = requests.get(url, timeout=10)
+            response.raise_for_status()
+            return response.content
+        except requests.RequestException as e:
+            logger.error(f"Error fetching URL {url}: {str(e)}")
+            raise
     def ocr_pdf_url(self, pdf_url: str) -> str:
         logger.info(f"Processing PDF URL: {pdf_url}")
         except Exception as e:
             return self._handle_error("PDF URL processing", e)
+    def ocr_uploaded_pdf(self, pdf_file: Union[str, object]) -> str:
         file_name = getattr(pdf_file, 'name', 'unknown')
         logger.info(f"Processing uploaded PDF: {file_name}")
         try:
         except Exception as e:
             return self._handle_error("image URL processing", e)
+    def ocr_uploaded_image(self, image_file: Union[str, object]) -> str:
         file_name = getattr(image_file, 'name', 'unknown')
         logger.info(f"Processing uploaded image: {file_name}")
         try:
         except Exception as e:
             return self._handle_error("document understanding", e)
+    def structured_ocr(self, image_file: Union[str, object]) -> str:
         file_name = getattr(image_file, 'name', 'unknown')
         logger.info(f"Processing structured OCR for: {file_name}")
         try:
                     temperature=0
                 )
+                content = chat_response.choices[0].message.content if chat_response.choices else "{}"
+                try:
+                    response_dict = json.loads(content)
+                except json.JSONDecodeError:
+                    logger.error("Invalid JSON response from chat API")
+                    response_dict = {}
+                return self._format_structured_response(temp_path, response_dict)
         except Exception as e:
             return self._handle_error("structured OCR", e)
     @staticmethod
     def _extract_markdown(response: OCRResponse) -> str:
+        try:
+            return response.pages[0].markdown if response.pages else "No text extracted"
+        except AttributeError:
+            return "Invalid OCR response format"
     @staticmethod
     def _handle_error(context: str, error: Exception) -> str:
     def _format_structured_response(file_path: str, content: Dict) -> str:
         languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
         valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
         response = {
             "file_name": Path(file_path).name,
             "topics": content.get("topics", []),
             except Exception as e:
                 return None, f"**Error:** Unexpected error: {str(e)}"
+        processor_state = gr.State(value=None)
         api_status = gr.Markdown("API key not set. Please enter and set your key.")
         set_api_button = gr.Button("Set API Key")
     return demo
 if __name__ == "__main__":
+    create_interface().launch(share=True, debug=True)