Svngoku commited on
Commit
e851339
·
verified ·
1 Parent(s): ef7763d

Fork for a better experience

Browse files
Files changed (1) hide show
  1. app.py +64 -17
app.py CHANGED
@@ -4,14 +4,13 @@ import gradio as gr
4
  from mistralai import Mistral
5
  from mistralai.models import OCRResponse
6
  from pathlib import Path
7
- from enum import Enum
8
  from pydantic import BaseModel
9
  import pycountry
10
  import json
11
  import logging
12
  from tenacity import retry, stop_after_attempt, wait_fixed
13
  import tempfile
14
- from typing import Union, Optional, Dict, List
15
  from contextlib import contextmanager
16
 
17
  # Constants
@@ -25,10 +24,10 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(level
25
  logger = logging.getLogger(__name__)
26
 
27
  class OCRProcessor:
28
- def __init__(self):
29
- self.api_key = os.environ.get("MISTRAL_API_KEY")
30
- if not self.api_key:
31
- raise ValueError("MISTRAL_API_KEY environment variable is not set")
32
  self.client = Mistral(api_key=self.api_key)
33
 
34
  @staticmethod
@@ -174,35 +173,83 @@ class OCRProcessor:
174
  return f"```json\n{json.dumps(response, indent=4)}\n```"
175
 
176
  def create_interface():
177
- processor = OCRProcessor()
178
  with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
179
  gr.Markdown("# Mistral OCR & Structured Output App")
180
- gr.Markdown("Extract text from PDFs and images or get structured JSON output")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  tabs = [
183
- ("OCR with PDF URL", gr.Textbox, processor.ocr_pdf_url, "PDF URL", None),
184
- ("OCR with Uploaded PDF", gr.File, processor.ocr_uploaded_pdf, "Upload PDF", SUPPORTED_PDF_TYPES),
185
- ("OCR with Image URL", gr.Textbox, processor.ocr_image_url, "Image URL", None),
186
- ("OCR with Uploaded Image", gr.File, processor.ocr_uploaded_image, "Upload Image", SUPPORTED_IMAGE_TYPES),
187
- ("Structured OCR", gr.File, processor.structured_ocr, "Upload Image", SUPPORTED_IMAGE_TYPES),
188
  ]
189
 
190
- for name, input_type, fn, label, file_types in tabs:
191
  with gr.Tab(name):
192
  if input_type == gr.Textbox:
193
  inputs = input_type(label=label, placeholder=f"e.g., https://example.com/{label.lower().replace(' ', '')}")
194
  else: # gr.File
195
  inputs = input_type(label=label, file_types=file_types)
196
  output = gr.Markdown(label="Result")
197
- # Use a more reliable way to get the button label
198
  button_label = name.replace("OCR with ", "").replace("Structured ", "Get Structured ")
199
- gr.Button(f"Process {button_label}").click(fn, inputs=inputs, outputs=output)
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  with gr.Tab("Document Understanding"):
202
  doc_url = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
203
  question = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
204
  output = gr.Markdown(label="Answer")
205
- gr.Button("Ask Question").click(processor.document_understanding, inputs=[doc_url, question], outputs=output)
 
 
 
 
 
 
 
 
 
 
206
 
207
  return demo
208
 
 
4
  from mistralai import Mistral
5
  from mistralai.models import OCRResponse
6
  from pathlib import Path
 
7
  from pydantic import BaseModel
8
  import pycountry
9
  import json
10
  import logging
11
  from tenacity import retry, stop_after_attempt, wait_fixed
12
  import tempfile
13
+ from typing import Union, Dict, List
14
  from contextlib import contextmanager
15
 
16
  # Constants
 
24
  logger = logging.getLogger(__name__)
25
 
26
  class OCRProcessor:
27
+ def __init__(self, api_key: str):
28
+ if not api_key:
29
+ raise ValueError("API key must be provided")
30
+ self.api_key = api_key
31
  self.client = Mistral(api_key=self.api_key)
32
 
33
  @staticmethod
 
173
  return f"```json\n{json.dumps(response, indent=4)}\n```"
174
 
175
  def create_interface():
 
176
  with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
177
  gr.Markdown("# Mistral OCR & Structured Output App")
178
+ gr.Markdown("Enter your Mistral API key below to use the app. Extract text from PDFs and images or get structured JSON output.")
179
+
180
+ # API Key input
181
+ api_key_input = gr.Textbox(
182
+ label="Mistral API Key",
183
+ placeholder="Enter your Mistral API key here",
184
+ type="password" # Hide the API key for security
185
+ )
186
+
187
+ # Function to initialize processor with API key
188
+ def initialize_processor(api_key):
189
+ try:
190
+ return OCRProcessor(api_key)
191
+ except Exception as e:
192
+ return str(e)
193
+
194
+ # Store processor state
195
+ processor_state = gr.State()
196
+
197
+ # Button to set API key
198
+ set_api_button = gr.Button("Set API Key")
199
+ api_status = gr.Markdown("API key not set. Please enter and set your key.")
200
+
201
+ # Update processor and status when API key is set
202
+ set_api_button.click(
203
+ fn=lambda key: (initialize_processor(key), "**Success:** API key set!" if not isinstance(initialize_processor(key), str) else f"**Error:** {initialize_processor(key)}"),
204
+ inputs=api_key_input,
205
+ outputs=[processor_state, api_status]
206
+ )
207
 
208
  tabs = [
209
+ ("OCR with PDF URL", gr.Textbox, "ocr_pdf_url", "PDF URL", None),
210
+ ("OCR with Uploaded PDF", gr.File, "ocr_uploaded_pdf", "Upload PDF", SUPPORTED_PDF_TYPES),
211
+ ("OCR with Image URL", gr.Textbox, "ocr_image_url", "Image URL", None),
212
+ ("OCR with Uploaded Image", gr.File, "ocr_uploaded_image", "Upload Image", SUPPORTED_IMAGE_TYPES),
213
+ ("Structured OCR", gr.File, "structured_ocr", "Upload Image", SUPPORTED_IMAGE_TYPES),
214
  ]
215
 
216
+ for name, input_type, fn_name, label, file_types in tabs:
217
  with gr.Tab(name):
218
  if input_type == gr.Textbox:
219
  inputs = input_type(label=label, placeholder=f"e.g., https://example.com/{label.lower().replace(' ', '')}")
220
  else: # gr.File
221
  inputs = input_type(label=label, file_types=file_types)
222
  output = gr.Markdown(label="Result")
 
223
  button_label = name.replace("OCR with ", "").replace("Structured ", "Get Structured ")
224
+
225
+ # Wrapper function to use processor from state
226
+ def process_with_api(processor, input_data):
227
+ if not processor or isinstance(processor, str):
228
+ return "**Error:** Please set a valid API key first."
229
+ fn = getattr(processor, fn_name)
230
+ return fn(input_data)
231
+
232
+ gr.Button(f"Process {button_label}").click(
233
+ fn=process_with_api,
234
+ inputs=[processor_state, inputs],
235
+ outputs=output
236
+ )
237
 
238
  with gr.Tab("Document Understanding"):
239
  doc_url = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
240
  question = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
241
  output = gr.Markdown(label="Answer")
242
+
243
+ def doc_understanding_with_api(processor, url, q):
244
+ if not processor or isinstance(processor, str):
245
+ return "**Error:** Please set a valid API key first."
246
+ return processor.document_understanding(url, q)
247
+
248
+ gr.Button("Ask Question").click(
249
+ fn=doc_understanding_with_api,
250
+ inputs=[processor_state, doc_url, question],
251
+ outputs=output
252
+ )
253
 
254
  return demo
255