Svngoku commited on
Commit
f1996dd
·
verified ·
1 Parent(s): c74579f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import gradio as gr
4
+ from mistralai import Mistral
5
+
6
+ # Initialize Mistral client with API key
7
+ api_key = os.environ.get("MISTRAL_API_KEY")
8
+ if not api_key:
9
+ raise ValueError("Please set the MISTRAL_API_KEY environment variable.")
10
+ client = Mistral(api_key=api_key)
11
+
12
+ # Helper function to encode image to base64
13
+ def encode_image(image_path):
14
+ try:
15
+ with open(image_path, "rb") as image_file:
16
+ return base64.b64encode(image_file.read()).decode('utf-8')
17
+ except Exception as e:
18
+ return f"Error encoding image: {str(e)}"
19
+
20
+ # OCR with PDF URL
21
+ def ocr_pdf_url(pdf_url):
22
+ try:
23
+ ocr_response = client.ocr.process(
24
+ model="mistral-ocr-latest",
25
+ document={
26
+ "type": "document_url",
27
+ "document_url": pdf_url
28
+ }
29
+ )
30
+ return str(ocr_response) # Convert response to string for display
31
+ except Exception as e:
32
+ return f"Error: {str(e)}"
33
+
34
+ # OCR with Uploaded PDF
35
+ def ocr_uploaded_pdf(pdf_file):
36
+ try:
37
+ # Upload the PDF
38
+ uploaded_pdf = client.files.upload(
39
+ file={
40
+ "file_name": pdf_file.name,
41
+ "content": open(pdf_file.name, "rb")
42
+ },
43
+ purpose="ocr"
44
+ )
45
+ # Get signed URL
46
+ signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id)
47
+ # Process OCR
48
+ ocr_response = client.ocr.process(
49
+ model="mistral-ocr-latest",
50
+ document={
51
+ "type": "document_url",
52
+ "document_url": signed_url.url
53
+ }
54
+ )
55
+ return str(ocr_response)
56
+ except Exception as e:
57
+ return f"Error: {str(e)}"
58
+
59
+ # OCR with Image URL
60
+ def ocr_image_url(image_url):
61
+ try:
62
+ ocr_response = client.ocr.process(
63
+ model="mistral-ocr-latest",
64
+ document={
65
+ "type": "image_url",
66
+ "image_url": image_url
67
+ }
68
+ )
69
+ return str(ocr_response)
70
+ except Exception as e:
71
+ return f"Error: {str(e)}"
72
+
73
+ # OCR with Uploaded Image
74
+ def ocr_uploaded_image(image_file):
75
+ try:
76
+ base64_image = encode_image(image_file.name)
77
+ if "Error" in base64_image:
78
+ return base64_image
79
+ ocr_response = client.ocr.process(
80
+ model="mistral-ocr-latest",
81
+ document={
82
+ "type": "image_url",
83
+ "image_url": f"data:image/jpeg;base64,{base64_image}"
84
+ }
85
+ )
86
+ return str(ocr_response)
87
+ except Exception as e:
88
+ return f"Error: {str(e)}"
89
+
90
+ # Document Understanding
91
+ def document_understanding(doc_url, question):
92
+ try:
93
+ messages = [
94
+ {
95
+ "role": "user",
96
+ "content": [
97
+ {"type": "text", "text": question},
98
+ {"type": "document_url", "document_url": doc_url}
99
+ ]
100
+ }
101
+ ]
102
+ chat_response = client.chat.complete(
103
+ model="mistral-small-latest",
104
+ messages=messages
105
+ )
106
+ return chat_response.choices[0].message.content
107
+ except Exception as e:
108
+ return f"Error: {str(e)}"
109
+
110
+ # Gradio Interface
111
+ with gr.Blocks(title="Mistral OCR & Document Understanding App") as demo:
112
+ gr.Markdown("# Mistral OCR & Document Understanding App")
113
+ gr.Markdown("Use this app to extract text from PDFs and images or ask questions about documents!")
114
+
115
+ with gr.Tab("OCR with PDF URL"):
116
+ pdf_url_input = gr.Textbox(label="PDF URL", placeholder="e.g., https://arxiv.org/pdf/2201.04234")
117
+ pdf_url_output = gr.Textbox(label="OCR Result")
118
+ pdf_url_button = gr.Button("Process PDF")
119
+ pdf_url_button.click(ocr_pdf_url, inputs=pdf_url_input, outputs=pdf_url_output)
120
+
121
+ with gr.Tab("OCR with Uploaded PDF"):
122
+ pdf_file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
123
+ pdf_file_output = gr.Textbox(label="OCR Result")
124
+ pdf_file_button = gr.Button("Process Uploaded PDF")
125
+ pdf_file_button.click(ocr_uploaded_pdf, inputs=pdf_file_input, outputs=pdf_file_output)
126
+
127
+ with gr.Tab("OCR with Image URL"):
128
+ image_url_input = gr.Textbox(label="Image URL", placeholder="e.g., https://example.com/image.jpg")
129
+ image_url_output = gr.Textbox(label="OCR Result")
130
+ image_url_button = gr.Button("Process Image")
131
+ image_url_button.click(ocr_image_url, inputs=image_url_input, outputs=image_url_output)
132
+
133
+ with gr.Tab("OCR with Uploaded Image"):
134
+ image_file_input = gr.File(label="Upload Image", file_types=[".jpg", ".png"])
135
+ image_file_output = gr.Textbox(label="OCR Result")
136
+ image_file_button = gr.Button("Process Uploaded Image")
137
+ image_file_button.click(ocr_uploaded_image, inputs=image_file_input, outputs=image_file_output)
138
+
139
+ with gr.Tab("Document Understanding"):
140
+ doc_url_input = gr.Textbox(label="Document URL", placeholder="e.g., https://arxiv.org/pdf/1805.04770")
141
+ question_input = gr.Textbox(label="Question", placeholder="e.g., What is the last sentence?")
142
+ doc_output = gr.Textbox(label="Answer")
143
+ doc_button = gr.Button("Ask Question")
144
+ doc_button.click(document_understanding, inputs=[doc_url_input, question_input], outputs=doc_output)
145
+
146
+ # Launch the app
147
+ demo.launch()