Svngoku commited on
Commit
7654aea
·
verified ·
1 Parent(s): 468fb8d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -49
app.py CHANGED
@@ -11,6 +11,7 @@ import json
11
  import logging
12
  from tenacity import retry, stop_after_attempt, wait_fixed
13
  import tempfile
 
14
 
15
  # Set up logging
16
  logging.basicConfig(level=logging.INFO)
@@ -23,25 +24,25 @@ if not api_key:
23
  client = Mistral(api_key=api_key)
24
 
25
  # Helper function to encode image to base64
26
- def encode_image(image_path):
27
  try:
28
  with open(image_path, "rb") as image_file:
29
  return base64.b64encode(image_file.read()).decode('utf-8')
30
  except Exception as e:
31
  logger.error(f"Error encoding image {image_path}: {str(e)}")
32
- return f"Error encoding image: {str(e)}"
33
 
34
  # Retry-enabled API call helpers
35
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
36
- def call_ocr_api(document):
37
  return client.ocr.process(model="mistral-ocr-latest", document=document)
38
 
39
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
40
- def call_chat_complete(model, messages, **kwargs):
41
  return client.chat.complete(model=model, messages=messages, **kwargs)
42
 
43
  # Helper function to get file content (handles both string paths and file-like objects)
44
- def get_file_content(file_input):
45
  if isinstance(file_input, str): # Gradio 3.x: file path
46
  with open(file_input, "rb") as f:
47
  return f.read()
@@ -49,14 +50,11 @@ def get_file_content(file_input):
49
  return file_input.read()
50
 
51
  # OCR with PDF URL
52
- def ocr_pdf_url(pdf_url):
53
  logger.info(f"Processing PDF URL: {pdf_url}")
54
  try:
55
  ocr_response = call_ocr_api({"type": "document_url", "document_url": pdf_url})
56
- try:
57
- markdown = ocr_response.pages[0].markdown
58
- except (IndexError, AttributeError):
59
- markdown = "No text extracted or response invalid."
60
  logger.info("Successfully processed PDF URL")
61
  return markdown
62
  except Exception as e:
@@ -64,13 +62,11 @@ def ocr_pdf_url(pdf_url):
64
  return f"**Error:** {str(e)}"
65
 
66
  # OCR with Uploaded PDF
67
- def ocr_uploaded_pdf(pdf_file):
68
  logger.info(f"Processing uploaded PDF: {getattr(pdf_file, 'name', 'unknown')}")
69
  temp_path = None
70
  try:
71
- # Get file content (handles both string and file-like objects)
72
  content = get_file_content(pdf_file)
73
- # Use tempfile to handle uploaded file securely
74
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
75
  temp_file.write(content)
76
  temp_path = temp_file.name
@@ -80,10 +76,7 @@ def ocr_uploaded_pdf(pdf_file):
80
  )
81
  signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id, expiry=7200) # 2 hours
82
  ocr_response = call_ocr_api({"type": "document_url", "document_url": signed_url.url})
83
- try:
84
- markdown = ocr_response.pages[0].markdown
85
- except (IndexError, AttributeError):
86
- markdown = "No text extracted or response invalid."
87
  logger.info("Successfully processed uploaded PDF")
88
  return markdown
89
  except Exception as e:
@@ -94,14 +87,11 @@ def ocr_uploaded_pdf(pdf_file):
94
  os.remove(temp_path)
95
 
96
  # OCR with Image URL
97
- def ocr_image_url(image_url):
98
  logger.info(f"Processing image URL: {image_url}")
99
  try:
100
  ocr_response = call_ocr_api({"type": "image_url", "image_url": image_url})
101
- try:
102
- markdown = ocr_response.pages[0].markdown
103
- except (IndexError, AttributeError):
104
- markdown = "No text extracted or response invalid."
105
  logger.info("Successfully processed image URL")
106
  return markdown
107
  except Exception as e:
@@ -109,24 +99,18 @@ def ocr_image_url(image_url):
109
  return f"**Error:** {str(e)}"
110
 
111
  # OCR with Uploaded Image
112
- def ocr_uploaded_image(image_file):
113
  logger.info(f"Processing uploaded image: {getattr(image_file, 'name', 'unknown')}")
114
  temp_path = None
115
  try:
116
- # Get file content (handles both string and file-like objects)
117
  content = get_file_content(image_file)
118
  with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
119
  temp_file.write(content)
120
  temp_path = temp_file.name
121
  encoded_image = encode_image(temp_path)
122
- if "Error" in encoded_image:
123
- raise ValueError(encoded_image)
124
  base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
125
  ocr_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
126
- try:
127
- markdown = ocr_response.pages[0].markdown
128
- except (IndexError, AttributeError):
129
- markdown = "No text extracted or response invalid."
130
  logger.info("Successfully processed uploaded image")
131
  return markdown
132
  except Exception as e:
@@ -137,7 +121,7 @@ def ocr_uploaded_image(image_file):
137
  os.remove(temp_path)
138
 
139
  # Document Understanding
140
- def document_understanding(doc_url, question):
141
  logger.info(f"Processing document understanding - URL: {doc_url}, Question: {question}")
142
  try:
143
  messages = [
@@ -147,10 +131,7 @@ def document_understanding(doc_url, question):
147
  ]}
148
  ]
149
  chat_response = call_chat_complete(model="mistral-small-latest", messages=messages)
150
- try:
151
- content = chat_response.choices[0].message.content
152
- except (IndexError, AttributeError):
153
- content = "No response received from the API."
154
  logger.info("Successfully processed document understanding")
155
  return content
156
  except Exception as e:
@@ -175,26 +156,20 @@ class StructuredOCR(BaseModel):
175
  languages: list[Language]
176
  ocr_contents: dict
177
 
178
- def structured_ocr(image_file):
179
  logger.info(f"Processing structured OCR for image: {getattr(image_file, 'name', 'unknown')}")
180
  temp_path = None
181
  try:
182
- # Get file content (handles both string and file-like objects)
183
  content = get_file_content(image_file)
184
  with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
185
  temp_file.write(content)
186
  temp_path = temp_file.name
187
  image_path = Path(temp_path)
188
  encoded_image = encode_image(temp_path)
189
- if "Error" in encoded_image:
190
- raise ValueError(encoded_image)
191
  base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
192
 
193
  image_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
194
- try:
195
- image_ocr_markdown = image_response.pages[0].markdown
196
- except (IndexError, AttributeError):
197
- image_ocr_markdown = "No text extracted."
198
 
199
  chat_response = call_chat_complete(
200
  model="pixtral-12b-latest",
@@ -212,12 +187,8 @@ def structured_ocr(image_file):
212
  temperature=0
213
  )
214
 
215
- try:
216
- content = chat_response.choices[0].message.content
217
- response_dict = json.loads(content)
218
- except (json.JSONDecodeError, IndexError, AttributeError):
219
- logger.error("Failed to parse structured response")
220
- return "Failed to parse structured response. Please try again."
221
 
222
  language_members = {member.value: member for member in Language}
223
  valid_languages = [l for l in response_dict.get("languages", ["English"]) if l in language_members]
 
11
  import logging
12
  from tenacity import retry, stop_after_attempt, wait_fixed
13
  import tempfile
14
+ from typing import Union
15
 
16
  # Set up logging
17
  logging.basicConfig(level=logging.INFO)
 
24
  client = Mistral(api_key=api_key)
25
 
26
  # Helper function to encode image to base64
27
+ def encode_image(image_path: str) -> str:
28
  try:
29
  with open(image_path, "rb") as image_file:
30
  return base64.b64encode(image_file.read()).decode('utf-8')
31
  except Exception as e:
32
  logger.error(f"Error encoding image {image_path}: {str(e)}")
33
+ raise
34
 
35
  # Retry-enabled API call helpers
36
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
37
+ def call_ocr_api(document: dict) -> OCRResponse:
38
  return client.ocr.process(model="mistral-ocr-latest", document=document)
39
 
40
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
41
+ def call_chat_complete(model: str, messages: list, **kwargs) -> dict:
42
  return client.chat.complete(model=model, messages=messages, **kwargs)
43
 
44
  # Helper function to get file content (handles both string paths and file-like objects)
45
+ def get_file_content(file_input: Union[str, bytes]) -> bytes:
46
  if isinstance(file_input, str): # Gradio 3.x: file path
47
  with open(file_input, "rb") as f:
48
  return f.read()
 
50
  return file_input.read()
51
 
52
  # OCR with PDF URL
53
+ def ocr_pdf_url(pdf_url: str) -> str:
54
  logger.info(f"Processing PDF URL: {pdf_url}")
55
  try:
56
  ocr_response = call_ocr_api({"type": "document_url", "document_url": pdf_url})
57
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
 
 
 
58
  logger.info("Successfully processed PDF URL")
59
  return markdown
60
  except Exception as e:
 
62
  return f"**Error:** {str(e)}"
63
 
64
  # OCR with Uploaded PDF
65
+ def ocr_uploaded_pdf(pdf_file: Union[str, bytes]) -> str:
66
  logger.info(f"Processing uploaded PDF: {getattr(pdf_file, 'name', 'unknown')}")
67
  temp_path = None
68
  try:
 
69
  content = get_file_content(pdf_file)
 
70
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
71
  temp_file.write(content)
72
  temp_path = temp_file.name
 
76
  )
77
  signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id, expiry=7200) # 2 hours
78
  ocr_response = call_ocr_api({"type": "document_url", "document_url": signed_url.url})
79
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
 
 
 
80
  logger.info("Successfully processed uploaded PDF")
81
  return markdown
82
  except Exception as e:
 
87
  os.remove(temp_path)
88
 
89
  # OCR with Image URL
90
+ def ocr_image_url(image_url: str) -> str:
91
  logger.info(f"Processing image URL: {image_url}")
92
  try:
93
  ocr_response = call_ocr_api({"type": "image_url", "image_url": image_url})
94
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
 
 
 
95
  logger.info("Successfully processed image URL")
96
  return markdown
97
  except Exception as e:
 
99
  return f"**Error:** {str(e)}"
100
 
101
  # OCR with Uploaded Image
102
+ def ocr_uploaded_image(image_file: Union[str, bytes]) -> str:
103
  logger.info(f"Processing uploaded image: {getattr(image_file, 'name', 'unknown')}")
104
  temp_path = None
105
  try:
 
106
  content = get_file_content(image_file)
107
  with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
108
  temp_file.write(content)
109
  temp_path = temp_file.name
110
  encoded_image = encode_image(temp_path)
 
 
111
  base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
112
  ocr_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
113
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
 
 
 
114
  logger.info("Successfully processed uploaded image")
115
  return markdown
116
  except Exception as e:
 
121
  os.remove(temp_path)
122
 
123
  # Document Understanding
124
+ def document_understanding(doc_url: str, question: str) -> str:
125
  logger.info(f"Processing document understanding - URL: {doc_url}, Question: {question}")
126
  try:
127
  messages = [
 
131
  ]}
132
  ]
133
  chat_response = call_chat_complete(model="mistral-small-latest", messages=messages)
134
+ content = chat_response.choices[0].message.content if chat_response.choices else "No response received from the API."
 
 
 
135
  logger.info("Successfully processed document understanding")
136
  return content
137
  except Exception as e:
 
156
  languages: list[Language]
157
  ocr_contents: dict
158
 
159
+ def structured_ocr(image_file: Union[str, bytes]) -> str:
160
  logger.info(f"Processing structured OCR for image: {getattr(image_file, 'name', 'unknown')}")
161
  temp_path = None
162
  try:
 
163
  content = get_file_content(image_file)
164
  with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
165
  temp_file.write(content)
166
  temp_path = temp_file.name
167
  image_path = Path(temp_path)
168
  encoded_image = encode_image(temp_path)
 
 
169
  base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
170
 
171
  image_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
172
+ image_ocr_markdown = image_response.pages[0].markdown if image_response.pages else "No text extracted."
 
 
 
173
 
174
  chat_response = call_chat_complete(
175
  model="pixtral-12b-latest",
 
187
  temperature=0
188
  )
189
 
190
+ content = chat_response.choices[0].message.content if chat_response.choices else "{}"
191
+ response_dict = json.loads(content)
 
 
 
 
192
 
193
  language_members = {member.value: member for member in Language}
194
  valid_languages = [l for l in response_dict.get("languages", ["English"]) if l in language_members]