Spaces:
Running
Running
Fork for a better experience
Browse files
app.py
CHANGED
|
@@ -4,14 +4,13 @@ import gradio as gr
|
|
| 4 |
from mistralai import Mistral
|
| 5 |
from mistralai.models import OCRResponse
|
| 6 |
from pathlib import Path
|
| 7 |
-
from enum import Enum
|
| 8 |
from pydantic import BaseModel
|
| 9 |
import pycountry
|
| 10 |
import json
|
| 11 |
import logging
|
| 12 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
| 13 |
import tempfile
|
| 14 |
-
from typing import Union,
|
| 15 |
from contextlib import contextmanager
|
| 16 |
|
| 17 |
# Constants
|
|
@@ -25,10 +24,10 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(level
|
|
| 25 |
logger = logging.getLogger(__name__)
|
| 26 |
|
| 27 |
class OCRProcessor:
|
| 28 |
-
def __init__(self):
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
self.client = Mistral(api_key=self.api_key)
|
| 33 |
|
| 34 |
@staticmethod
|
|
@@ -174,35 +173,83 @@ class OCRProcessor:
|
|
| 174 |
return f"```json\n{json.dumps(response, indent=4)}\n```"
|
| 175 |
|
| 176 |
def create_interface():
|
| 177 |
-
processor = OCRProcessor()
|
| 178 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
| 179 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
| 180 |
-
gr.Markdown("Extract text from PDFs and images or get structured JSON output")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
tabs = [
|
| 183 |
-
("OCR with PDF URL", gr.Textbox,
|
| 184 |
-
("OCR with Uploaded PDF", gr.File,
|
| 185 |
-
("OCR with Image URL", gr.Textbox,
|
| 186 |
-
("OCR with Uploaded Image", gr.File,
|
| 187 |
-
("Structured OCR", gr.File,
|
| 188 |
]
|
| 189 |
|
| 190 |
-
for name, input_type,
|
| 191 |
with gr.Tab(name):
|
| 192 |
if input_type == gr.Textbox:
|
| 193 |
inputs = input_type(label=label, placeholder=f"e.g., https://example.com/{label.lower().replace(' ', '')}")
|
| 194 |
else: # gr.File
|
| 195 |
inputs = input_type(label=label, file_types=file_types)
|
| 196 |
output = gr.Markdown(label="Result")
|
| 197 |
-
# Use a more reliable way to get the button label
|
| 198 |
button_label = name.replace("OCR with ", "").replace("Structured ", "Get Structured ")
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
with gr.Tab("Document Understanding"):
|
| 202 |
doc_url = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
|
| 203 |
question = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
|
| 204 |
output = gr.Markdown(label="Answer")
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
return demo
|
| 208 |
|
|
|
|
| 4 |
from mistralai import Mistral
|
| 5 |
from mistralai.models import OCRResponse
|
| 6 |
from pathlib import Path
|
|
|
|
| 7 |
from pydantic import BaseModel
|
| 8 |
import pycountry
|
| 9 |
import json
|
| 10 |
import logging
|
| 11 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
| 12 |
import tempfile
|
| 13 |
+
from typing import Union, Dict, List
|
| 14 |
from contextlib import contextmanager
|
| 15 |
|
| 16 |
# Constants
|
|
|
|
| 24 |
logger = logging.getLogger(__name__)
|
| 25 |
|
| 26 |
class OCRProcessor:
|
| 27 |
+
def __init__(self, api_key: str):
|
| 28 |
+
if not api_key:
|
| 29 |
+
raise ValueError("API key must be provided")
|
| 30 |
+
self.api_key = api_key
|
| 31 |
self.client = Mistral(api_key=self.api_key)
|
| 32 |
|
| 33 |
@staticmethod
|
|
|
|
| 173 |
return f"```json\n{json.dumps(response, indent=4)}\n```"
|
| 174 |
|
| 175 |
def create_interface():
|
|
|
|
| 176 |
with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
|
| 177 |
gr.Markdown("# Mistral OCR & Structured Output App")
|
| 178 |
+
gr.Markdown("Enter your Mistral API key below to use the app. Extract text from PDFs and images or get structured JSON output.")
|
| 179 |
+
|
| 180 |
+
# API Key input
|
| 181 |
+
api_key_input = gr.Textbox(
|
| 182 |
+
label="Mistral API Key",
|
| 183 |
+
placeholder="Enter your Mistral API key here",
|
| 184 |
+
type="password" # Hide the API key for security
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Function to initialize processor with API key
|
| 188 |
+
def initialize_processor(api_key):
|
| 189 |
+
try:
|
| 190 |
+
return OCRProcessor(api_key)
|
| 191 |
+
except Exception as e:
|
| 192 |
+
return str(e)
|
| 193 |
+
|
| 194 |
+
# Store processor state
|
| 195 |
+
processor_state = gr.State()
|
| 196 |
+
|
| 197 |
+
# Button to set API key
|
| 198 |
+
set_api_button = gr.Button("Set API Key")
|
| 199 |
+
api_status = gr.Markdown("API key not set. Please enter and set your key.")
|
| 200 |
+
|
| 201 |
+
# Update processor and status when API key is set
|
| 202 |
+
set_api_button.click(
|
| 203 |
+
fn=lambda key: (initialize_processor(key), "**Success:** API key set!" if not isinstance(initialize_processor(key), str) else f"**Error:** {initialize_processor(key)}"),
|
| 204 |
+
inputs=api_key_input,
|
| 205 |
+
outputs=[processor_state, api_status]
|
| 206 |
+
)
|
| 207 |
|
| 208 |
tabs = [
|
| 209 |
+
("OCR with PDF URL", gr.Textbox, "ocr_pdf_url", "PDF URL", None),
|
| 210 |
+
("OCR with Uploaded PDF", gr.File, "ocr_uploaded_pdf", "Upload PDF", SUPPORTED_PDF_TYPES),
|
| 211 |
+
("OCR with Image URL", gr.Textbox, "ocr_image_url", "Image URL", None),
|
| 212 |
+
("OCR with Uploaded Image", gr.File, "ocr_uploaded_image", "Upload Image", SUPPORTED_IMAGE_TYPES),
|
| 213 |
+
("Structured OCR", gr.File, "structured_ocr", "Upload Image", SUPPORTED_IMAGE_TYPES),
|
| 214 |
]
|
| 215 |
|
| 216 |
+
for name, input_type, fn_name, label, file_types in tabs:
|
| 217 |
with gr.Tab(name):
|
| 218 |
if input_type == gr.Textbox:
|
| 219 |
inputs = input_type(label=label, placeholder=f"e.g., https://example.com/{label.lower().replace(' ', '')}")
|
| 220 |
else: # gr.File
|
| 221 |
inputs = input_type(label=label, file_types=file_types)
|
| 222 |
output = gr.Markdown(label="Result")
|
|
|
|
| 223 |
button_label = name.replace("OCR with ", "").replace("Structured ", "Get Structured ")
|
| 224 |
+
|
| 225 |
+
# Wrapper function to use processor from state
|
| 226 |
+
def process_with_api(processor, input_data):
|
| 227 |
+
if not processor or isinstance(processor, str):
|
| 228 |
+
return "**Error:** Please set a valid API key first."
|
| 229 |
+
fn = getattr(processor, fn_name)
|
| 230 |
+
return fn(input_data)
|
| 231 |
+
|
| 232 |
+
gr.Button(f"Process {button_label}").click(
|
| 233 |
+
fn=process_with_api,
|
| 234 |
+
inputs=[processor_state, inputs],
|
| 235 |
+
outputs=output
|
| 236 |
+
)
|
| 237 |
|
| 238 |
with gr.Tab("Document Understanding"):
|
| 239 |
doc_url = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
|
| 240 |
question = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
|
| 241 |
output = gr.Markdown(label="Answer")
|
| 242 |
+
|
| 243 |
+
def doc_understanding_with_api(processor, url, q):
|
| 244 |
+
if not processor or isinstance(processor, str):
|
| 245 |
+
return "**Error:** Please set a valid API key first."
|
| 246 |
+
return processor.document_understanding(url, q)
|
| 247 |
+
|
| 248 |
+
gr.Button("Ask Question").click(
|
| 249 |
+
fn=doc_understanding_with_api,
|
| 250 |
+
inputs=[processor_state, doc_url, question],
|
| 251 |
+
outputs=output
|
| 252 |
+
)
|
| 253 |
|
| 254 |
return demo
|
| 255 |
|