Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ import json
|
|
11 |
import logging
|
12 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
13 |
import tempfile
|
|
|
14 |
|
15 |
# Set up logging
|
16 |
logging.basicConfig(level=logging.INFO)
|
@@ -23,25 +24,25 @@ if not api_key:
|
|
23 |
client = Mistral(api_key=api_key)
|
24 |
|
25 |
# Helper function to encode image to base64
|
26 |
-
def encode_image(image_path):
|
27 |
try:
|
28 |
with open(image_path, "rb") as image_file:
|
29 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
30 |
except Exception as e:
|
31 |
logger.error(f"Error encoding image {image_path}: {str(e)}")
|
32 |
-
|
33 |
|
34 |
# Retry-enabled API call helpers
|
35 |
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
36 |
-
def call_ocr_api(document):
|
37 |
return client.ocr.process(model="mistral-ocr-latest", document=document)
|
38 |
|
39 |
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
40 |
-
def call_chat_complete(model, messages, **kwargs):
|
41 |
return client.chat.complete(model=model, messages=messages, **kwargs)
|
42 |
|
43 |
# Helper function to get file content (handles both string paths and file-like objects)
|
44 |
-
def get_file_content(file_input):
|
45 |
if isinstance(file_input, str): # Gradio 3.x: file path
|
46 |
with open(file_input, "rb") as f:
|
47 |
return f.read()
|
@@ -49,14 +50,11 @@ def get_file_content(file_input):
|
|
49 |
return file_input.read()
|
50 |
|
51 |
# OCR with PDF URL
|
52 |
-
def ocr_pdf_url(pdf_url):
|
53 |
logger.info(f"Processing PDF URL: {pdf_url}")
|
54 |
try:
|
55 |
ocr_response = call_ocr_api({"type": "document_url", "document_url": pdf_url})
|
56 |
-
|
57 |
-
markdown = ocr_response.pages[0].markdown
|
58 |
-
except (IndexError, AttributeError):
|
59 |
-
markdown = "No text extracted or response invalid."
|
60 |
logger.info("Successfully processed PDF URL")
|
61 |
return markdown
|
62 |
except Exception as e:
|
@@ -64,13 +62,11 @@ def ocr_pdf_url(pdf_url):
|
|
64 |
return f"**Error:** {str(e)}"
|
65 |
|
66 |
# OCR with Uploaded PDF
|
67 |
-
def ocr_uploaded_pdf(pdf_file):
|
68 |
logger.info(f"Processing uploaded PDF: {getattr(pdf_file, 'name', 'unknown')}")
|
69 |
temp_path = None
|
70 |
try:
|
71 |
-
# Get file content (handles both string and file-like objects)
|
72 |
content = get_file_content(pdf_file)
|
73 |
-
# Use tempfile to handle uploaded file securely
|
74 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
75 |
temp_file.write(content)
|
76 |
temp_path = temp_file.name
|
@@ -80,10 +76,7 @@ def ocr_uploaded_pdf(pdf_file):
|
|
80 |
)
|
81 |
signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id, expiry=7200) # 2 hours
|
82 |
ocr_response = call_ocr_api({"type": "document_url", "document_url": signed_url.url})
|
83 |
-
|
84 |
-
markdown = ocr_response.pages[0].markdown
|
85 |
-
except (IndexError, AttributeError):
|
86 |
-
markdown = "No text extracted or response invalid."
|
87 |
logger.info("Successfully processed uploaded PDF")
|
88 |
return markdown
|
89 |
except Exception as e:
|
@@ -94,14 +87,11 @@ def ocr_uploaded_pdf(pdf_file):
|
|
94 |
os.remove(temp_path)
|
95 |
|
96 |
# OCR with Image URL
|
97 |
-
def ocr_image_url(image_url):
|
98 |
logger.info(f"Processing image URL: {image_url}")
|
99 |
try:
|
100 |
ocr_response = call_ocr_api({"type": "image_url", "image_url": image_url})
|
101 |
-
|
102 |
-
markdown = ocr_response.pages[0].markdown
|
103 |
-
except (IndexError, AttributeError):
|
104 |
-
markdown = "No text extracted or response invalid."
|
105 |
logger.info("Successfully processed image URL")
|
106 |
return markdown
|
107 |
except Exception as e:
|
@@ -109,24 +99,18 @@ def ocr_image_url(image_url):
|
|
109 |
return f"**Error:** {str(e)}"
|
110 |
|
111 |
# OCR with Uploaded Image
|
112 |
-
def ocr_uploaded_image(image_file):
|
113 |
logger.info(f"Processing uploaded image: {getattr(image_file, 'name', 'unknown')}")
|
114 |
temp_path = None
|
115 |
try:
|
116 |
-
# Get file content (handles both string and file-like objects)
|
117 |
content = get_file_content(image_file)
|
118 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
119 |
temp_file.write(content)
|
120 |
temp_path = temp_file.name
|
121 |
encoded_image = encode_image(temp_path)
|
122 |
-
if "Error" in encoded_image:
|
123 |
-
raise ValueError(encoded_image)
|
124 |
base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
|
125 |
ocr_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
|
126 |
-
|
127 |
-
markdown = ocr_response.pages[0].markdown
|
128 |
-
except (IndexError, AttributeError):
|
129 |
-
markdown = "No text extracted or response invalid."
|
130 |
logger.info("Successfully processed uploaded image")
|
131 |
return markdown
|
132 |
except Exception as e:
|
@@ -137,7 +121,7 @@ def ocr_uploaded_image(image_file):
|
|
137 |
os.remove(temp_path)
|
138 |
|
139 |
# Document Understanding
|
140 |
-
def document_understanding(doc_url, question):
|
141 |
logger.info(f"Processing document understanding - URL: {doc_url}, Question: {question}")
|
142 |
try:
|
143 |
messages = [
|
@@ -147,10 +131,7 @@ def document_understanding(doc_url, question):
|
|
147 |
]}
|
148 |
]
|
149 |
chat_response = call_chat_complete(model="mistral-small-latest", messages=messages)
|
150 |
-
|
151 |
-
content = chat_response.choices[0].message.content
|
152 |
-
except (IndexError, AttributeError):
|
153 |
-
content = "No response received from the API."
|
154 |
logger.info("Successfully processed document understanding")
|
155 |
return content
|
156 |
except Exception as e:
|
@@ -175,26 +156,20 @@ class StructuredOCR(BaseModel):
|
|
175 |
languages: list[Language]
|
176 |
ocr_contents: dict
|
177 |
|
178 |
-
def structured_ocr(image_file):
|
179 |
logger.info(f"Processing structured OCR for image: {getattr(image_file, 'name', 'unknown')}")
|
180 |
temp_path = None
|
181 |
try:
|
182 |
-
# Get file content (handles both string and file-like objects)
|
183 |
content = get_file_content(image_file)
|
184 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
185 |
temp_file.write(content)
|
186 |
temp_path = temp_file.name
|
187 |
image_path = Path(temp_path)
|
188 |
encoded_image = encode_image(temp_path)
|
189 |
-
if "Error" in encoded_image:
|
190 |
-
raise ValueError(encoded_image)
|
191 |
base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
|
192 |
|
193 |
image_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
|
194 |
-
|
195 |
-
image_ocr_markdown = image_response.pages[0].markdown
|
196 |
-
except (IndexError, AttributeError):
|
197 |
-
image_ocr_markdown = "No text extracted."
|
198 |
|
199 |
chat_response = call_chat_complete(
|
200 |
model="pixtral-12b-latest",
|
@@ -212,12 +187,8 @@ def structured_ocr(image_file):
|
|
212 |
temperature=0
|
213 |
)
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
response_dict = json.loads(content)
|
218 |
-
except (json.JSONDecodeError, IndexError, AttributeError):
|
219 |
-
logger.error("Failed to parse structured response")
|
220 |
-
return "Failed to parse structured response. Please try again."
|
221 |
|
222 |
language_members = {member.value: member for member in Language}
|
223 |
valid_languages = [l for l in response_dict.get("languages", ["English"]) if l in language_members]
|
|
|
11 |
import logging
|
12 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
13 |
import tempfile
|
14 |
+
from typing import Union
|
15 |
|
16 |
# Set up logging
|
17 |
logging.basicConfig(level=logging.INFO)
|
|
|
24 |
client = Mistral(api_key=api_key)
|
25 |
|
26 |
# Helper function to encode image to base64
|
27 |
+
def encode_image(image_path: str) -> str:
|
28 |
try:
|
29 |
with open(image_path, "rb") as image_file:
|
30 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
31 |
except Exception as e:
|
32 |
logger.error(f"Error encoding image {image_path}: {str(e)}")
|
33 |
+
raise
|
34 |
|
35 |
# Retry-enabled API call helpers
|
36 |
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
37 |
+
def call_ocr_api(document: dict) -> OCRResponse:
|
38 |
return client.ocr.process(model="mistral-ocr-latest", document=document)
|
39 |
|
40 |
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
41 |
+
def call_chat_complete(model: str, messages: list, **kwargs) -> dict:
|
42 |
return client.chat.complete(model=model, messages=messages, **kwargs)
|
43 |
|
44 |
# Helper function to get file content (handles both string paths and file-like objects)
|
45 |
+
def get_file_content(file_input: Union[str, bytes]) -> bytes:
|
46 |
if isinstance(file_input, str): # Gradio 3.x: file path
|
47 |
with open(file_input, "rb") as f:
|
48 |
return f.read()
|
|
|
50 |
return file_input.read()
|
51 |
|
52 |
# OCR with PDF URL
|
53 |
+
def ocr_pdf_url(pdf_url: str) -> str:
|
54 |
logger.info(f"Processing PDF URL: {pdf_url}")
|
55 |
try:
|
56 |
ocr_response = call_ocr_api({"type": "document_url", "document_url": pdf_url})
|
57 |
+
markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
|
|
|
|
|
|
|
58 |
logger.info("Successfully processed PDF URL")
|
59 |
return markdown
|
60 |
except Exception as e:
|
|
|
62 |
return f"**Error:** {str(e)}"
|
63 |
|
64 |
# OCR with Uploaded PDF
|
65 |
+
def ocr_uploaded_pdf(pdf_file: Union[str, bytes]) -> str:
|
66 |
logger.info(f"Processing uploaded PDF: {getattr(pdf_file, 'name', 'unknown')}")
|
67 |
temp_path = None
|
68 |
try:
|
|
|
69 |
content = get_file_content(pdf_file)
|
|
|
70 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
71 |
temp_file.write(content)
|
72 |
temp_path = temp_file.name
|
|
|
76 |
)
|
77 |
signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id, expiry=7200) # 2 hours
|
78 |
ocr_response = call_ocr_api({"type": "document_url", "document_url": signed_url.url})
|
79 |
+
markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
|
|
|
|
|
|
|
80 |
logger.info("Successfully processed uploaded PDF")
|
81 |
return markdown
|
82 |
except Exception as e:
|
|
|
87 |
os.remove(temp_path)
|
88 |
|
89 |
# OCR with Image URL
|
90 |
+
def ocr_image_url(image_url: str) -> str:
|
91 |
logger.info(f"Processing image URL: {image_url}")
|
92 |
try:
|
93 |
ocr_response = call_ocr_api({"type": "image_url", "image_url": image_url})
|
94 |
+
markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
|
|
|
|
|
|
|
95 |
logger.info("Successfully processed image URL")
|
96 |
return markdown
|
97 |
except Exception as e:
|
|
|
99 |
return f"**Error:** {str(e)}"
|
100 |
|
101 |
# OCR with Uploaded Image
|
102 |
+
def ocr_uploaded_image(image_file: Union[str, bytes]) -> str:
|
103 |
logger.info(f"Processing uploaded image: {getattr(image_file, 'name', 'unknown')}")
|
104 |
temp_path = None
|
105 |
try:
|
|
|
106 |
content = get_file_content(image_file)
|
107 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
108 |
temp_file.write(content)
|
109 |
temp_path = temp_file.name
|
110 |
encoded_image = encode_image(temp_path)
|
|
|
|
|
111 |
base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
|
112 |
ocr_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
|
113 |
+
markdown = ocr_response.pages[0].markdown if ocr_response.pages else "No text extracted or response invalid."
|
|
|
|
|
|
|
114 |
logger.info("Successfully processed uploaded image")
|
115 |
return markdown
|
116 |
except Exception as e:
|
|
|
121 |
os.remove(temp_path)
|
122 |
|
123 |
# Document Understanding
|
124 |
+
def document_understanding(doc_url: str, question: str) -> str:
|
125 |
logger.info(f"Processing document understanding - URL: {doc_url}, Question: {question}")
|
126 |
try:
|
127 |
messages = [
|
|
|
131 |
]}
|
132 |
]
|
133 |
chat_response = call_chat_complete(model="mistral-small-latest", messages=messages)
|
134 |
+
content = chat_response.choices[0].message.content if chat_response.choices else "No response received from the API."
|
|
|
|
|
|
|
135 |
logger.info("Successfully processed document understanding")
|
136 |
return content
|
137 |
except Exception as e:
|
|
|
156 |
languages: list[Language]
|
157 |
ocr_contents: dict
|
158 |
|
159 |
+
def structured_ocr(image_file: Union[str, bytes]) -> str:
|
160 |
logger.info(f"Processing structured OCR for image: {getattr(image_file, 'name', 'unknown')}")
|
161 |
temp_path = None
|
162 |
try:
|
|
|
163 |
content = get_file_content(image_file)
|
164 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
165 |
temp_file.write(content)
|
166 |
temp_path = temp_file.name
|
167 |
image_path = Path(temp_path)
|
168 |
encoded_image = encode_image(temp_path)
|
|
|
|
|
169 |
base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
|
170 |
|
171 |
image_response = call_ocr_api({"type": "image_url", "image_url": base64_data_url})
|
172 |
+
image_ocr_markdown = image_response.pages[0].markdown if image_response.pages else "No text extracted."
|
|
|
|
|
|
|
173 |
|
174 |
chat_response = call_chat_complete(
|
175 |
model="pixtral-12b-latest",
|
|
|
187 |
temperature=0
|
188 |
)
|
189 |
|
190 |
+
content = chat_response.choices[0].message.content if chat_response.choices else "{}"
|
191 |
+
response_dict = json.loads(content)
|
|
|
|
|
|
|
|
|
192 |
|
193 |
language_members = {member.value: member for member in Language}
|
194 |
valid_languages = [l for l in response_dict.get("languages", ["English"]) if l in language_members]
|