Svngoku commited on
Commit
fd84b98
·
verified ·
1 Parent(s): 4cce203

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -21
app.py CHANGED
@@ -31,9 +31,10 @@ def ocr_pdf_url(pdf_url):
31
  document={"type": "document_url", "document_url": pdf_url},
32
  include_image_base64=True
33
  )
34
- return ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
 
35
  except Exception as e:
36
- return f"Error: {str(e)}"
37
 
38
  # OCR with Uploaded PDF
39
  def ocr_uploaded_pdf(pdf_file):
@@ -48,9 +49,10 @@ def ocr_uploaded_pdf(pdf_file):
48
  document={"type": "document_url", "document_url": signed_url.url},
49
  include_image_base64=True
50
  )
51
- return ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
 
52
  except Exception as e:
53
- return f"Error: {str(e)}"
54
 
55
  # OCR with Image URL
56
  def ocr_image_url(image_url):
@@ -59,23 +61,25 @@ def ocr_image_url(image_url):
59
  model="mistral-ocr-latest",
60
  document={"type": "image_url", "image_url": image_url}
61
  )
62
- return ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
 
63
  except Exception as e:
64
- return f"Error: {str(e)}"
65
 
66
  # OCR with Uploaded Image
67
  def ocr_uploaded_image(image_file):
68
  try:
69
  base64_image = encode_image(image_file.name)
70
  if "Error" in base64_image:
71
- return base64_image
72
  ocr_response = client.ocr.process(
73
  model="mistral-ocr-latest",
74
  document={"type": "image_url", "image_url": f"data:image/jpeg;base64,{base64_image}"}
75
  )
76
- return ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
 
77
  except Exception as e:
78
- return f"Error: {str(e)}"
79
 
80
  # Document Understanding
81
  def document_understanding(doc_url, question):
@@ -90,9 +94,9 @@ def document_understanding(doc_url, question):
90
  model="mistral-small-latest",
91
  messages=messages
92
  )
93
- return chat_response.choices[0].message.content
94
  except Exception as e:
95
- return f"Error: {str(e)}"
96
 
97
  # Structured OCR Setup
98
  languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
@@ -117,7 +121,7 @@ def structured_ocr(image_file):
117
  image_path = Path(image_file.name)
118
  encoded_image = encode_image(image_path)
119
  if "Error" in encoded_image:
120
- return encoded_image
121
  base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
122
 
123
  # OCR processing
@@ -151,49 +155,50 @@ def structured_ocr(image_file):
151
  "languages": [Language[l] for l in response_dict.get("languages", ["English"]) if l in languages.values()],
152
  "ocr_contents": response_dict.get("ocr_contents", {})
153
  })
154
- return json.dumps(structured_response.dict(), indent=4)
 
155
  except Exception as e:
156
- return f"Error: {str(e)}"
157
 
158
  # Gradio Interface
159
  with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
160
  gr.Markdown("# Mistral OCR & Structured Output App")
161
- gr.Markdown("Extract text from PDFs and images, ask questions about documents, or get structured JSON output!")
162
 
163
  with gr.Tab("OCR with PDF URL"):
164
  pdf_url_input = gr.Textbox(label="PDF URL", placeholder="e.g., https://arxiv.org/pdf/2201.04234")
165
- pdf_url_output = gr.Textbox(label="OCR Result (Markdown)")
166
  pdf_url_button = gr.Button("Process PDF")
167
  pdf_url_button.click(ocr_pdf_url, inputs=pdf_url_input, outputs=pdf_url_output)
168
 
169
  with gr.Tab("OCR with Uploaded PDF"):
170
  pdf_file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
171
- pdf_file_output = gr.Textbox(label="OCR Result (Markdown)")
172
  pdf_file_button = gr.Button("Process Uploaded PDF")
173
  pdf_file_button.click(ocr_uploaded_pdf, inputs=pdf_file_input, outputs=pdf_file_output)
174
 
175
  with gr.Tab("OCR with Image URL"):
176
  image_url_input = gr.Textbox(label="Image URL", placeholder="e.g., https://example.com/image.jpg")
177
- image_url_output = gr.Textbox(label="OCR Result (Markdown)")
178
  image_url_button = gr.Button("Process Image")
179
  image_url_button.click(ocr_image_url, inputs=image_url_input, outputs=image_url_output)
180
 
181
  with gr.Tab("OCR with Uploaded Image"):
182
  image_file_input = gr.File(label="Upload Image", file_types=[".jpg", ".png"])
183
- image_file_output = gr.Textbox(label="OCR Result (Markdown)")
184
  image_file_button = gr.Button("Process Uploaded Image")
185
  image_file_button.click(ocr_uploaded_image, inputs=image_file_input, outputs=image_file_output)
186
 
187
  with gr.Tab("Document Understanding"):
188
  doc_url_input = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
189
  question_input = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
190
- doc_output = gr.Textbox(label="Answer")
191
  doc_button = gr.Button("Ask Question")
192
  doc_button.click(document_understanding, inputs=[doc_url_input, question_input], outputs=doc_output)
193
 
194
  with gr.Tab("Structured OCR"):
195
  struct_image_input = gr.File(label="Upload Image", file_types=[".jpg", ".png"])
196
- struct_output = gr.Textbox(label="Structured JSON Output")
197
  struct_button = gr.Button("Get Structured Output")
198
  struct_button.click(structured_ocr, inputs=struct_image_input, outputs=struct_output)
199
 
 
31
  document={"type": "document_url", "document_url": pdf_url},
32
  include_image_base64=True
33
  )
34
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
35
+ return markdown # Return raw markdown for gr.Markdown to render
36
  except Exception as e:
37
+ return f"**Error:** {str(e)}"
38
 
39
  # OCR with Uploaded PDF
40
  def ocr_uploaded_pdf(pdf_file):
 
49
  document={"type": "document_url", "document_url": signed_url.url},
50
  include_image_base64=True
51
  )
52
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
53
+ return markdown
54
  except Exception as e:
55
+ return f"**Error:** {str(e)}"
56
 
57
  # OCR with Image URL
58
  def ocr_image_url(image_url):
 
61
  model="mistral-ocr-latest",
62
  document={"type": "image_url", "image_url": image_url}
63
  )
64
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
65
+ return markdown
66
  except Exception as e:
67
+ return f"**Error:** {str(e)}"
68
 
69
  # OCR with Uploaded Image
70
  def ocr_uploaded_image(image_file):
71
  try:
72
  base64_image = encode_image(image_file.name)
73
  if "Error" in base64_image:
74
+ return f"**Error:** {base64_image}"
75
  ocr_response = client.ocr.process(
76
  model="mistral-ocr-latest",
77
  document={"type": "image_url", "image_url": f"data:image/jpeg;base64,{base64_image}"}
78
  )
79
+ markdown = ocr_response.pages[0].markdown if ocr_response.pages else str(ocr_response)
80
+ return markdown
81
  except Exception as e:
82
+ return f"**Error:** {str(e)}"
83
 
84
  # Document Understanding
85
  def document_understanding(doc_url, question):
 
94
  model="mistral-small-latest",
95
  messages=messages
96
  )
97
+ return chat_response.choices[0].message.content # Plain text output
98
  except Exception as e:
99
+ return f"**Error:** {str(e)}"
100
 
101
  # Structured OCR Setup
102
  languages = {lang.alpha_2: lang.name for lang in pycountry.languages if hasattr(lang, 'alpha_2')}
 
121
  image_path = Path(image_file.name)
122
  encoded_image = encode_image(image_path)
123
  if "Error" in encoded_image:
124
+ return f"**Error:** {encoded_image}"
125
  base64_data_url = f"data:image/jpeg;base64,{encoded_image}"
126
 
127
  # OCR processing
 
155
  "languages": [Language[l] for l in response_dict.get("languages", ["English"]) if l in languages.values()],
156
  "ocr_contents": response_dict.get("ocr_contents", {})
157
  })
158
+ # Return as Markdown code block
159
+ return f"```json\n{json.dumps(structured_response.dict(), indent=4)}\n```"
160
  except Exception as e:
161
+ return f"**Error:** {str(e)}"
162
 
163
  # Gradio Interface
164
  with gr.Blocks(title="Mistral OCR & Structured Output App") as demo:
165
  gr.Markdown("# Mistral OCR & Structured Output App")
166
+ gr.Markdown("Extract text from PDFs and images, ask questions about documents, or get structured JSON output in Markdown format!")
167
 
168
  with gr.Tab("OCR with PDF URL"):
169
  pdf_url_input = gr.Textbox(label="PDF URL", placeholder="e.g., https://arxiv.org/pdf/2201.04234")
170
+ pdf_url_output = gr.Markdown(label="OCR Result (Markdown)")
171
  pdf_url_button = gr.Button("Process PDF")
172
  pdf_url_button.click(ocr_pdf_url, inputs=pdf_url_input, outputs=pdf_url_output)
173
 
174
  with gr.Tab("OCR with Uploaded PDF"):
175
  pdf_file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
176
+ pdf_file_output = gr.Markdown(label="OCR Result (Markdown)")
177
  pdf_file_button = gr.Button("Process Uploaded PDF")
178
  pdf_file_button.click(ocr_uploaded_pdf, inputs=pdf_file_input, outputs=pdf_file_output)
179
 
180
  with gr.Tab("OCR with Image URL"):
181
  image_url_input = gr.Textbox(label="Image URL", placeholder="e.g., https://example.com/image.jpg")
182
+ image_url_output = gr.Markdown(label="OCR Result (Markdown)")
183
  image_url_button = gr.Button("Process Image")
184
  image_url_button.click(ocr_image_url, inputs=image_url_input, outputs=image_url_output)
185
 
186
  with gr.Tab("OCR with Uploaded Image"):
187
  image_file_input = gr.File(label="Upload Image", file_types=[".jpg", ".png"])
188
+ image_file_output = gr.Markdown(label="OCR Result (Markdown)")
189
  image_file_button = gr.Button("Process Uploaded Image")
190
  image_file_button.click(ocr_uploaded_image, inputs=image_file_input, outputs=image_file_output)
191
 
192
  with gr.Tab("Document Understanding"):
193
  doc_url_input = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
194
  question_input = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
195
+ doc_output = gr.Textbox(label="Answer") # Keep as Textbox for plain text
196
  doc_button = gr.Button("Ask Question")
197
  doc_button.click(document_understanding, inputs=[doc_url_input, question_input], outputs=doc_output)
198
 
199
  with gr.Tab("Structured OCR"):
200
  struct_image_input = gr.File(label="Upload Image", file_types=[".jpg", ".png"])
201
+ struct_output = gr.Markdown(label="Structured JSON Output (Markdown)")
202
  struct_button = gr.Button("Get Structured Output")
203
  struct_button.click(structured_ocr, inputs=struct_image_input, outputs=struct_output)
204