Spaces:
Running
Running
Magic
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ import tempfile
|
|
13 |
from typing import Union, Dict, List
|
14 |
from contextlib import contextmanager
|
15 |
import requests
|
|
|
16 |
|
17 |
# Constants
|
18 |
DEFAULT_LANGUAGE = "English"
|
@@ -37,8 +38,12 @@ class OCRProcessor:
|
|
37 |
|
38 |
@staticmethod
|
39 |
def _encode_image(image_path: str) -> str:
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
42 |
|
43 |
@staticmethod
|
44 |
@contextmanager
|
@@ -68,18 +73,27 @@ class OCRProcessor:
|
|
68 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
69 |
raise
|
70 |
|
71 |
-
def _get_file_content(self, file_input: Union[str,
|
72 |
-
|
73 |
-
if file_input
|
74 |
-
# Handle URLs
|
75 |
-
response = requests.get(file_input)
|
76 |
-
response.raise_for_status()
|
77 |
-
return response.content
|
78 |
-
else:
|
79 |
-
# Handle local file paths
|
80 |
with open(file_input, "rb") as f:
|
81 |
return f.read()
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
85 |
logger.info(f"Processing PDF URL: {pdf_url}")
|
@@ -89,7 +103,7 @@ class OCRProcessor:
|
|
89 |
except Exception as e:
|
90 |
return self._handle_error("PDF URL processing", e)
|
91 |
|
92 |
-
def ocr_uploaded_pdf(self, pdf_file: Union[str,
|
93 |
file_name = getattr(pdf_file, 'name', 'unknown')
|
94 |
logger.info(f"Processing uploaded PDF: {file_name}")
|
95 |
try:
|
@@ -113,7 +127,7 @@ class OCRProcessor:
|
|
113 |
except Exception as e:
|
114 |
return self._handle_error("image URL processing", e)
|
115 |
|
116 |
-
def ocr_uploaded_image(self, image_file: Union[str,
|
117 |
file_name = getattr(image_file, 'name', 'unknown')
|
118 |
logger.info(f"Processing uploaded image: {file_name}")
|
119 |
try:
|
@@ -138,7 +152,7 @@ class OCRProcessor:
|
|
138 |
except Exception as e:
|
139 |
return self._handle_error("document understanding", e)
|
140 |
|
141 |
-
def structured_ocr(self, image_file: Union[str,
|
142 |
file_name = getattr(image_file, 'name', 'unknown')
|
143 |
logger.info(f"Processing structured OCR for: {file_name}")
|
144 |
try:
|
@@ -165,19 +179,22 @@ class OCRProcessor:
|
|
165 |
temperature=0
|
166 |
)
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
return self._format_structured_response(temp_path,
|
175 |
except Exception as e:
|
176 |
return self._handle_error("structured OCR", e)
|
177 |
|
178 |
@staticmethod
|
179 |
def _extract_markdown(response: OCRResponse) -> str:
|
180 |
-
|
|
|
|
|
|
|
181 |
|
182 |
@staticmethod
|
183 |
def _handle_error(context: str, error: Exception) -> str:
|
@@ -188,7 +205,7 @@ class OCRProcessor:
|
|
188 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
189 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
190 |
valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
|
191 |
-
|
192 |
response = {
|
193 |
"file_name": Path(file_path).name,
|
194 |
"topics": content.get("topics", []),
|
@@ -217,7 +234,7 @@ def create_interface():
|
|
217 |
except Exception as e:
|
218 |
return None, f"**Error:** Unexpected error: {str(e)}"
|
219 |
|
220 |
-
processor_state = gr.State()
|
221 |
api_status = gr.Markdown("API key not set. Please enter and set your key.")
|
222 |
|
223 |
set_api_button = gr.Button("Set API Key")
|
@@ -275,4 +292,4 @@ def create_interface():
|
|
275 |
return demo
|
276 |
|
277 |
if __name__ == "__main__":
|
278 |
-
create_interface().launch(share=True, debug=True)
|
|
|
13 |
from typing import Union, Dict, List
|
14 |
from contextlib import contextmanager
|
15 |
import requests
|
16 |
+
from enum import Enum
|
17 |
|
18 |
# Constants
|
19 |
DEFAULT_LANGUAGE = "English"
|
|
|
38 |
|
39 |
@staticmethod
|
40 |
def _encode_image(image_path: str) -> str:
|
41 |
+
try:
|
42 |
+
with open(image_path, "rb") as image_file:
|
43 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
44 |
+
except Exception as e:
|
45 |
+
logger.error(f"Error encoding image {image_path}: {str(e)}")
|
46 |
+
raise
|
47 |
|
48 |
@staticmethod
|
49 |
@contextmanager
|
|
|
73 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
74 |
raise
|
75 |
|
76 |
+
def _get_file_content(self, file_input: Union[str, object]) -> bytes:
|
77 |
+
try:
|
78 |
+
if isinstance(file_input, str): # File path
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
with open(file_input, "rb") as f:
|
80 |
return f.read()
|
81 |
+
elif hasattr(file_input, 'read'): # File-like object
|
82 |
+
return file_input.read()
|
83 |
+
else:
|
84 |
+
raise ValueError("Invalid file input: must be a path or file-like object")
|
85 |
+
except Exception as e:
|
86 |
+
logger.error(f"Error getting file content: {str(e)}")
|
87 |
+
raise
|
88 |
+
|
89 |
+
def _fetch_url_content(self, url: str) -> bytes:
|
90 |
+
try:
|
91 |
+
response = requests.get(url, timeout=10)
|
92 |
+
response.raise_for_status()
|
93 |
+
return response.content
|
94 |
+
except requests.RequestException as e:
|
95 |
+
logger.error(f"Error fetching URL {url}: {str(e)}")
|
96 |
+
raise
|
97 |
|
98 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
99 |
logger.info(f"Processing PDF URL: {pdf_url}")
|
|
|
103 |
except Exception as e:
|
104 |
return self._handle_error("PDF URL processing", e)
|
105 |
|
106 |
+
def ocr_uploaded_pdf(self, pdf_file: Union[str, object]) -> str:
|
107 |
file_name = getattr(pdf_file, 'name', 'unknown')
|
108 |
logger.info(f"Processing uploaded PDF: {file_name}")
|
109 |
try:
|
|
|
127 |
except Exception as e:
|
128 |
return self._handle_error("image URL processing", e)
|
129 |
|
130 |
+
def ocr_uploaded_image(self, image_file: Union[str, object]) -> str:
|
131 |
file_name = getattr(image_file, 'name', 'unknown')
|
132 |
logger.info(f"Processing uploaded image: {file_name}")
|
133 |
try:
|
|
|
152 |
except Exception as e:
|
153 |
return self._handle_error("document understanding", e)
|
154 |
|
155 |
+
def structured_ocr(self, image_file: Union[str, object]) -> str:
|
156 |
file_name = getattr(image_file, 'name', 'unknown')
|
157 |
logger.info(f"Processing structured OCR for: {file_name}")
|
158 |
try:
|
|
|
179 |
temperature=0
|
180 |
)
|
181 |
|
182 |
+
content = chat_response.choices[0].message.content if chat_response.choices else "{}"
|
183 |
+
try:
|
184 |
+
response_dict = json.loads(content)
|
185 |
+
except json.JSONDecodeError:
|
186 |
+
logger.error("Invalid JSON response from chat API")
|
187 |
+
response_dict = {}
|
188 |
+
return self._format_structured_response(temp_path, response_dict)
|
189 |
except Exception as e:
|
190 |
return self._handle_error("structured OCR", e)
|
191 |
|
192 |
@staticmethod
|
193 |
def _extract_markdown(response: OCRResponse) -> str:
|
194 |
+
try:
|
195 |
+
return response.pages[0].markdown if response.pages else "No text extracted"
|
196 |
+
except AttributeError:
|
197 |
+
return "Invalid OCR response format"
|
198 |
|
199 |
@staticmethod
|
200 |
def _handle_error(context: str, error: Exception) -> str:
|
|
|
205 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
206 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
207 |
valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
|
208 |
+
|
209 |
response = {
|
210 |
"file_name": Path(file_path).name,
|
211 |
"topics": content.get("topics", []),
|
|
|
234 |
except Exception as e:
|
235 |
return None, f"**Error:** Unexpected error: {str(e)}"
|
236 |
|
237 |
+
processor_state = gr.State(value=None)
|
238 |
api_status = gr.Markdown("API key not set. Please enter and set your key.")
|
239 |
|
240 |
set_api_button = gr.Button("Set API Key")
|
|
|
292 |
return demo
|
293 |
|
294 |
if __name__ == "__main__":
|
295 |
+
create_interface().launch(share=True, debug=True)
|