Spaces:
Running
Running
Fork for a better experience
Browse files
app.py
CHANGED
@@ -4,14 +4,13 @@ import gradio as gr
|
|
4 |
from mistralai import Mistral
|
5 |
from mistralai.models import OCRResponse
|
6 |
from pathlib import Path
|
7 |
-
from enum import Enum
|
8 |
from pydantic import BaseModel
|
9 |
import pycountry
|
10 |
import json
|
11 |
import logging
|
12 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
13 |
import tempfile
|
14 |
-
from typing import Union,
|
15 |
from contextlib import contextmanager
|
16 |
|
17 |
# Constants
|
@@ -25,10 +24,10 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(level
|
|
25 |
logger = logging.getLogger(__name__)
|
26 |
|
27 |
class OCRProcessor:
|
28 |
-
def __init__(self):
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
self.client = Mistral(api_key=self.api_key)
|
33 |
|
34 |
@staticmethod
|
@@ -174,35 +173,83 @@ class OCRProcessor:
|
|
174 |
return f"```json\n{json.dumps(response, indent=4)}\n```"
|
175 |
|
176 |
def create_interface():
|
177 |
-
processor = OCRProcessor()
|
178 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
179 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
180 |
-
gr.Markdown("Extract text from PDFs and images or get structured JSON output")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
|
182 |
tabs = [
|
183 |
-
("OCR with PDF URL", gr.Textbox,
|
184 |
-
("OCR with Uploaded PDF", gr.File,
|
185 |
-
("OCR with Image URL", gr.Textbox,
|
186 |
-
("OCR with Uploaded Image", gr.File,
|
187 |
-
("Structured OCR", gr.File,
|
188 |
]
|
189 |
|
190 |
-
for name, input_type,
|
191 |
with gr.Tab(name):
|
192 |
if input_type == gr.Textbox:
|
193 |
inputs = input_type(label=label, placeholder=f"e.g., https://example.com/{label.lower().replace(' ', '')}")
|
194 |
else: # gr.File
|
195 |
inputs = input_type(label=label, file_types=file_types)
|
196 |
output = gr.Markdown(label="Result")
|
197 |
-
# Use a more reliable way to get the button label
|
198 |
button_label = name.replace("OCR with ", "").replace("Structured ", "Get Structured ")
|
199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
with gr.Tab("Document Understanding"):
|
202 |
doc_url = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
|
203 |
question = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
|
204 |
output = gr.Markdown(label="Answer")
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
return demo
|
208 |
|
|
|
4 |
from mistralai import Mistral
|
5 |
from mistralai.models import OCRResponse
|
6 |
from pathlib import Path
|
|
|
7 |
from pydantic import BaseModel
|
8 |
import pycountry
|
9 |
import json
|
10 |
import logging
|
11 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
12 |
import tempfile
|
13 |
+
from typing import Union, Dict, List
|
14 |
from contextlib import contextmanager
|
15 |
|
16 |
# Constants
|
|
|
24 |
logger = logging.getLogger(__name__)
|
25 |
|
26 |
class OCRProcessor:
|
27 |
+
def __init__(self, api_key: str):
|
28 |
+
if not api_key:
|
29 |
+
raise ValueError("API key must be provided")
|
30 |
+
self.api_key = api_key
|
31 |
self.client = Mistral(api_key=self.api_key)
|
32 |
|
33 |
@staticmethod
|
|
|
173 |
return f"```json\n{json.dumps(response, indent=4)}\n```"
|
174 |
|
175 |
def create_interface():
|
|
|
176 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
177 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
178 |
+
gr.Markdown("Enter your Mistral API key below to use the app. Extract text from PDFs and images or get structured JSON output.")
|
179 |
+
|
180 |
+
# API Key input
|
181 |
+
api_key_input = gr.Textbox(
|
182 |
+
label="Mistral API Key",
|
183 |
+
placeholder="Enter your Mistral API key here",
|
184 |
+
type="password" # Hide the API key for security
|
185 |
+
)
|
186 |
+
|
187 |
+
# Function to initialize processor with API key
|
188 |
+
def initialize_processor(api_key):
|
189 |
+
try:
|
190 |
+
return OCRProcessor(api_key)
|
191 |
+
except Exception as e:
|
192 |
+
return str(e)
|
193 |
+
|
194 |
+
# Store processor state
|
195 |
+
processor_state = gr.State()
|
196 |
+
|
197 |
+
# Button to set API key
|
198 |
+
set_api_button = gr.Button("Set API Key")
|
199 |
+
api_status = gr.Markdown("API key not set. Please enter and set your key.")
|
200 |
+
|
201 |
+
# Update processor and status when API key is set
|
202 |
+
set_api_button.click(
|
203 |
+
fn=lambda key: (initialize_processor(key), "**Success:** API key set!" if not isinstance(initialize_processor(key), str) else f"**Error:** {initialize_processor(key)}"),
|
204 |
+
inputs=api_key_input,
|
205 |
+
outputs=[processor_state, api_status]
|
206 |
+
)
|
207 |
|
208 |
tabs = [
|
209 |
+
("OCR with PDF URL", gr.Textbox, "ocr_pdf_url", "PDF URL", None),
|
210 |
+
("OCR with Uploaded PDF", gr.File, "ocr_uploaded_pdf", "Upload PDF", SUPPORTED_PDF_TYPES),
|
211 |
+
("OCR with Image URL", gr.Textbox, "ocr_image_url", "Image URL", None),
|
212 |
+
("OCR with Uploaded Image", gr.File, "ocr_uploaded_image", "Upload Image", SUPPORTED_IMAGE_TYPES),
|
213 |
+
("Structured OCR", gr.File, "structured_ocr", "Upload Image", SUPPORTED_IMAGE_TYPES),
|
214 |
]
|
215 |
|
216 |
+
for name, input_type, fn_name, label, file_types in tabs:
|
217 |
with gr.Tab(name):
|
218 |
if input_type == gr.Textbox:
|
219 |
inputs = input_type(label=label, placeholder=f"e.g., https://example.com/{label.lower().replace(' ', '')}")
|
220 |
else: # gr.File
|
221 |
inputs = input_type(label=label, file_types=file_types)
|
222 |
output = gr.Markdown(label="Result")
|
|
|
223 |
button_label = name.replace("OCR with ", "").replace("Structured ", "Get Structured ")
|
224 |
+
|
225 |
+
# Wrapper function to use processor from state
|
226 |
+
def process_with_api(processor, input_data):
|
227 |
+
if not processor or isinstance(processor, str):
|
228 |
+
return "**Error:** Please set a valid API key first."
|
229 |
+
fn = getattr(processor, fn_name)
|
230 |
+
return fn(input_data)
|
231 |
+
|
232 |
+
gr.Button(f"Process {button_label}").click(
|
233 |
+
fn=process_with_api,
|
234 |
+
inputs=[processor_state, inputs],
|
235 |
+
outputs=output
|
236 |
+
)
|
237 |
|
238 |
with gr.Tab("Document Understanding"):
|
239 |
doc_url = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
|
240 |
question = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
|
241 |
output = gr.Markdown(label="Answer")
|
242 |
+
|
243 |
+
def doc_understanding_with_api(processor, url, q):
|
244 |
+
if not processor or isinstance(processor, str):
|
245 |
+
return "**Error:** Please set a valid API key first."
|
246 |
+
return processor.document_understanding(url, q)
|
247 |
+
|
248 |
+
gr.Button("Ask Question").click(
|
249 |
+
fn=doc_understanding_with_api,
|
250 |
+
inputs=[processor_state, doc_url, question],
|
251 |
+
outputs=output
|
252 |
+
)
|
253 |
|
254 |
return demo
|
255 |
|