Spaces:
Running
Running
Pussh from the phone
Browse files
app.py
CHANGED
@@ -3,16 +3,15 @@ import base64
|
|
3 |
import gradio as gr
|
4 |
from mistralai import Mistral
|
5 |
from mistralai.models import OCRResponse
|
6 |
-
from mistralai.exceptions import MistralException
|
7 |
from pathlib import Path
|
8 |
-
from pydantic import BaseModel
|
9 |
import pycountry
|
10 |
import json
|
11 |
import logging
|
12 |
-
from tenacity import retry, stop_after_attempt, wait_fixed
|
13 |
import tempfile
|
14 |
from typing import Union, Dict, List
|
15 |
from contextlib import contextmanager
|
|
|
16 |
|
17 |
# Constants
|
18 |
DEFAULT_LANGUAGE = "English"
|
@@ -32,7 +31,7 @@ class OCRProcessor:
|
|
32 |
self.client = Mistral(api_key=self.api_key)
|
33 |
try:
|
34 |
self.client.models.list() # Validate API key
|
35 |
-
except
|
36 |
raise ValueError(f"Invalid API key: {str(e)}")
|
37 |
|
38 |
@staticmethod
|
@@ -52,26 +51,33 @@ class OCRProcessor:
|
|
52 |
if os.path.exists(temp_file.name):
|
53 |
os.unlink(temp_file.name)
|
54 |
|
55 |
-
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2)
|
56 |
def _call_ocr_api(self, document: Dict) -> OCRResponse:
|
57 |
try:
|
58 |
return self.client.ocr.process(model="mistral-ocr-latest", document=document)
|
59 |
-
except
|
60 |
logger.error(f"OCR API call failed: {str(e)}")
|
61 |
raise
|
62 |
|
63 |
-
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2)
|
64 |
def _call_chat_complete(self, model: str, messages: List[Dict], **kwargs) -> Dict:
|
65 |
try:
|
66 |
return self.client.chat.complete(model=model, messages=messages, **kwargs)
|
67 |
-
except
|
68 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
69 |
raise
|
70 |
|
71 |
def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
|
72 |
if isinstance(file_input, str):
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
return file_input.read() if hasattr(file_input, 'read') else file_input
|
76 |
|
77 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
@@ -158,7 +164,8 @@ class OCRProcessor:
|
|
158 |
temperature=0
|
159 |
)
|
160 |
|
161 |
-
|
|
|
162 |
return self._format_structured_response(temp_path, content)
|
163 |
except Exception as e:
|
164 |
return self._handle_error("structured OCR", e)
|
@@ -176,7 +183,7 @@ class OCRProcessor:
|
|
176 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
177 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
178 |
valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
|
179 |
-
|
180 |
response = {
|
181 |
"file_name": Path(file_path).name,
|
182 |
"topics": content.get("topics", []),
|
@@ -189,13 +196,14 @@ def create_interface():
|
|
189 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
190 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
191 |
gr.Markdown("Enter your Mistral API key below to use the app. Extract text from PDFs and images or get structured JSON output.")
|
|
|
192 |
|
193 |
api_key_input = gr.Textbox(
|
194 |
label="Mistral API Key",
|
195 |
placeholder="Enter your Mistral API key here",
|
196 |
type="password"
|
197 |
)
|
198 |
-
|
199 |
def initialize_processor(api_key):
|
200 |
try:
|
201 |
processor = OCRProcessor(api_key)
|
@@ -263,4 +271,5 @@ def create_interface():
|
|
263 |
return demo
|
264 |
|
265 |
if __name__ == "__main__":
|
|
|
266 |
create_interface().launch(share=True, debug=True)
|
|
|
3 |
import gradio as gr
|
4 |
from mistralai import Mistral
|
5 |
from mistralai.models import OCRResponse
|
|
|
6 |
from pathlib import Path
|
|
|
7 |
import pycountry
|
8 |
import json
|
9 |
import logging
|
10 |
+
from tenacity import retry, stop_after_attempt, wait_fixed
|
11 |
import tempfile
|
12 |
from typing import Union, Dict, List
|
13 |
from contextlib import contextmanager
|
14 |
+
import requests
|
15 |
|
16 |
# Constants
|
17 |
DEFAULT_LANGUAGE = "English"
|
|
|
31 |
self.client = Mistral(api_key=self.api_key)
|
32 |
try:
|
33 |
self.client.models.list() # Validate API key
|
34 |
+
except Exception as e:
|
35 |
raise ValueError(f"Invalid API key: {str(e)}")
|
36 |
|
37 |
@staticmethod
|
|
|
51 |
if os.path.exists(temp_file.name):
|
52 |
os.unlink(temp_file.name)
|
53 |
|
54 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
55 |
def _call_ocr_api(self, document: Dict) -> OCRResponse:
|
56 |
try:
|
57 |
return self.client.ocr.process(model="mistral-ocr-latest", document=document)
|
58 |
+
except Exception as e:
|
59 |
logger.error(f"OCR API call failed: {str(e)}")
|
60 |
raise
|
61 |
|
62 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
|
63 |
def _call_chat_complete(self, model: str, messages: List[Dict], **kwargs) -> Dict:
|
64 |
try:
|
65 |
return self.client.chat.complete(model=model, messages=messages, **kwargs)
|
66 |
+
except Exception as e:
|
67 |
logger.error(f"Chat complete API call failed: {str(e)}")
|
68 |
raise
|
69 |
|
70 |
def _get_file_content(self, file_input: Union[str, bytes]) -> bytes:
|
71 |
if isinstance(file_input, str):
|
72 |
+
if file_input.startswith("http"):
|
73 |
+
# Handle URLs
|
74 |
+
response = requests.get(file_input)
|
75 |
+
response.raise_for_status()
|
76 |
+
return response.content
|
77 |
+
else:
|
78 |
+
# Handle local file paths
|
79 |
+
with open(file_input, "rb") as f:
|
80 |
+
return f.read()
|
81 |
return file_input.read() if hasattr(file_input, 'read') else file_input
|
82 |
|
83 |
def ocr_pdf_url(self, pdf_url: str) -> str:
|
|
|
164 |
temperature=0
|
165 |
)
|
166 |
|
167 |
+
response_content = chat_response.choices[0].message.content
|
168 |
+
content = json.loads(response_content)
|
169 |
return self._format_structured_response(temp_path, content)
|
170 |
except Exception as e:
|
171 |
return self._handle_error("structured OCR", e)
|
|
|
183 |
def _format_structured_response(file_path: str, content: Dict) -> str:
|
184 |
languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
|
185 |
valid_langs = [l for l in content.get("languages", [DEFAULT_LANGUAGE]) if l in languages.values()]
|
186 |
+
|
187 |
response = {
|
188 |
"file_name": Path(file_path).name,
|
189 |
"topics": content.get("topics", []),
|
|
|
196 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
197 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
198 |
gr.Markdown("Enter your Mistral API key below to use the app. Extract text from PDFs and images or get structured JSON output.")
|
199 |
+
gr.Markdown("**Note:** After entering your API key, click 'Set API Key' to validate and use it.")
|
200 |
|
201 |
api_key_input = gr.Textbox(
|
202 |
label="Mistral API Key",
|
203 |
placeholder="Enter your Mistral API key here",
|
204 |
type="password"
|
205 |
)
|
206 |
+
|
207 |
def initialize_processor(api_key):
|
208 |
try:
|
209 |
processor = OCRProcessor(api_key)
|
|
|
271 |
return demo
|
272 |
|
273 |
if __name__ == "__main__":
|
274 |
+
print(f"===== Application Startup at {os.environ.get('START_TIME', 'Unknown')} =====")
|
275 |
create_interface().launch(share=True, debug=True)
|