Svngoku commited on
Commit
96d9245
·
verified ·
1 Parent(s): 7f3a813
Files changed (1) hide show
  1. app.py +43 -26
app.py CHANGED
@@ -13,6 +13,7 @@ import tempfile
13
  from typing import Union, Dict, List
14
  from contextlib import contextmanager
15
  import requests
 
16
 
17
  # Constants
18
  DEFAULT_LANGUAGE = "English"
@@ -37,8 +38,12 @@ class OCRProcessor:
37
 
38
  @staticmethod
39
  def _encode_image(image_path: str) -> str:
40
- with open(image_path, "rb") as image_file:
41
- return base64.b64encode(image_file.read()).decode('utf-8')
 
 
 
 
42
 
43
  @staticmethod
44
  @contextmanager
@@ -68,18 +73,27 @@ class OCRProcessor:
68
  logger.error(f"Chat complete API call failed: {str(e)}")
69
  raise
70
 
71
- def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
72
- if isinstance(file_input, str):
73
- if file_input.startswith("http"):
74
- # Handle URLs
75
- response = requests.get(file_input)
76
- response.raise_for_status()
77
- return response.content
78
- else:
79
- # Handle local file paths
80
  with open(file_input, "rb") as f:
81
  return f.read()
82
- return file_input.read() if hasattr(file_input, 'read') else file_input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def ocr_pdf_url(self, pdf_url: str) -> str:
85
  logger.info(f"Processing PDF URL: {pdf_url}")
@@ -89,7 +103,7 @@ class OCRProcessor:
89
  except Exception as e:
90
  return self._handle_error("PDF URL processing", e)
91
 
92
- def ocr_uploaded_pdf(self, pdf_file: Union[str, bytes]) -> str:
93
  file_name = getattr(pdf_file, 'name', 'unknown')
94
  logger.info(f"Processing uploaded PDF: {file_name}")
95
  try:
@@ -113,7 +127,7 @@ class OCRProcessor:
113
  except Exception as e:
114
  return self._handle_error("image URL processing", e)
115
 
116
- def ocr_uploaded_image(self, image_file: Union[str, bytes]) -> str:
117
  file_name = getattr(image_file, 'name', 'unknown')
118
  logger.info(f"Processing uploaded image: {file_name}")
119
  try:
@@ -138,7 +152,7 @@ class OCRProcessor:
138
  except Exception as e:
139
  return self._handle_error("document understanding", e)
140
 
141
- def structured_ocr(self, image_file: Union[str, bytes]) -> str:
142
  file_name = getattr(image_file, 'name', 'unknown')
143
  logger.info(f"Processing structured OCR for: {file_name}")
144
  try:
@@ -165,19 +179,22 @@ class OCRProcessor:
165
  temperature=0
166
  )
167
 
168
- # Ensure the response is a dictionary
169
- response_content = chat_response.choices[0].message.content
170
- if isinstance(response_content, list):
171
- response_content = response_content[0] if response_content else "{}"
172
-
173
- content = json.loads(response_content)
174
- return self._format_structured_response(temp_path, content)
175
  except Exception as e:
176
  return self._handle_error("structured OCR", e)
177
 
178
  @staticmethod
179
  def _extract_markdown(response: OCRResponse) -> str:
180
- return response.pages[0].markdown if response.pages else "No text extracted"
 
 
 
181
 
182
  @staticmethod
183
  def _handle_error(context: str, error: Exception) -> str:
@@ -188,7 +205,7 @@ class OCRProcessor:
188
  def _format_structured_response(file_path: str, content: Dict) -> str:
189
  languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
190
  valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
191
-
192
  response = {
193
  "file_name": Path(file_path).name,
194
  "topics": content.get("topics", []),
@@ -217,7 +234,7 @@ def create_interface():
217
  except Exception as e:
218
  return None, f"**Error:** Unexpected error: {str(e)}"
219
 
220
- processor_state = gr.State()
221
  api_status = gr.Markdown("API key not set. Please enter and set your key.")
222
 
223
  set_api_button = gr.Button("Set API Key")
@@ -275,4 +292,4 @@ def create_interface():
275
  return demo
276
 
277
  if __name__ == "__main__":
278
- create_interface().launch(share=True, debug=True)
 
13
  from typing import Union, Dict, List
14
  from contextlib import contextmanager
15
  import requests
16
+ from enum import Enum
17
 
18
  # Constants
19
  DEFAULT_LANGUAGE = "English"
 
38
 
39
  @staticmethod
40
  def _encode_image(image_path: str) -> str:
41
+ try:
42
+ with open(image_path, "rb") as image_file:
43
+ return base64.b64encode(image_file.read()).decode('utf-8')
44
+ except Exception as e:
45
+ logger.error(f"Error encoding image {image_path}: {str(e)}")
46
+ raise
47
 
48
  @staticmethod
49
  @contextmanager
 
73
  logger.error(f"Chat complete API call failed: {str(e)}")
74
  raise
75
 
76
+ def _get_file_content(self, file_input: Union[str, object]) -> bytes:
77
+ try:
78
+ if isinstance(file_input, str): # File path
 
 
 
 
 
 
79
  with open(file_input, "rb") as f:
80
  return f.read()
81
+ elif hasattr(file_input, 'read'): # File-like object
82
+ return file_input.read()
83
+ else:
84
+ raise ValueError("Invalid file input: must be a path or file-like object")
85
+ except Exception as e:
86
+ logger.error(f"Error getting file content: {str(e)}")
87
+ raise
88
+
89
+ def _fetch_url_content(self, url: str) -> bytes:
90
+ try:
91
+ response = requests.get(url, timeout=10)
92
+ response.raise_for_status()
93
+ return response.content
94
+ except requests.RequestException as e:
95
+ logger.error(f"Error fetching URL {url}: {str(e)}")
96
+ raise
97
 
98
  def ocr_pdf_url(self, pdf_url: str) -> str:
99
  logger.info(f"Processing PDF URL: {pdf_url}")
 
103
  except Exception as e:
104
  return self._handle_error("PDF URL processing", e)
105
 
106
+ def ocr_uploaded_pdf(self, pdf_file: Union[str, object]) -> str:
107
  file_name = getattr(pdf_file, 'name', 'unknown')
108
  logger.info(f"Processing uploaded PDF: {file_name}")
109
  try:
 
127
  except Exception as e:
128
  return self._handle_error("image URL processing", e)
129
 
130
+ def ocr_uploaded_image(self, image_file: Union[str, object]) -> str:
131
  file_name = getattr(image_file, 'name', 'unknown')
132
  logger.info(f"Processing uploaded image: {file_name}")
133
  try:
 
152
  except Exception as e:
153
  return self._handle_error("document understanding", e)
154
 
155
+ def structured_ocr(self, image_file: Union[str, object]) -> str:
156
  file_name = getattr(image_file, 'name', 'unknown')
157
  logger.info(f"Processing structured OCR for: {file_name}")
158
  try:
 
179
  temperature=0
180
  )
181
 
182
+ content = chat_response.choices[0].message.content if chat_response.choices else "{}"
183
+ try:
184
+ response_dict = json.loads(content)
185
+ except json.JSONDecodeError:
186
+ logger.error("Invalid JSON response from chat API")
187
+ response_dict = {}
188
+ return self._format_structured_response(temp_path, response_dict)
189
  except Exception as e:
190
  return self._handle_error("structured OCR", e)
191
 
192
  @staticmethod
193
  def _extract_markdown(response: OCRResponse) -> str:
194
+ try:
195
+ return response.pages[0].markdown if response.pages else "No text extracted"
196
+ except AttributeError:
197
+ return "Invalid OCR response format"
198
 
199
  @staticmethod
200
  def _handle_error(context: str, error: Exception) -> str:
 
205
  def _format_structured_response(file_path: str, content: Dict) -> str:
206
  languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
207
  valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
208
+
209
  response = {
210
  "file_name": Path(file_path).name,
211
  "topics": content.get("topics", []),
 
234
  except Exception as e:
235
  return None, f"**Error:** Unexpected error: {str(e)}"
236
 
237
+ processor_state = gr.State(value=None)
238
  api_status = gr.Markdown("API key not set. Please enter and set your key.")
239
 
240
  set_api_button = gr.Button("Set API Key")
 
292
  return demo
293
 
294
  if __name__ == "__main__":
295
+ create_interface().launch(share=True, debug=True)