Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,24 @@
|
|
1 |
import re
|
2 |
import pandas as pd
|
|
|
3 |
import gradio as gr
|
4 |
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
def extract_po_data(text):
|
7 |
"""
|
8 |
Extracts purchase order data from the text into structured rows with ITEM, DESCRIPTION, QTY, UNIT, UNIT PRICE, TOTAL PRICE.
|
@@ -92,35 +108,41 @@ def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
|
92 |
return output_path
|
93 |
|
94 |
|
95 |
-
def
|
96 |
"""
|
97 |
-
Processes the
|
98 |
Args:
|
99 |
-
|
100 |
Returns:
|
101 |
-
|
102 |
"""
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
|
110 |
# Gradio Interface
|
111 |
def create_interface():
|
112 |
"""
|
113 |
-
Creates a Gradio interface for processing PO data.
|
114 |
"""
|
115 |
interface = gr.Interface(
|
116 |
-
fn=
|
117 |
-
inputs=gr.
|
118 |
outputs=[
|
119 |
gr.File(label="Download Extracted Excel"),
|
120 |
gr.Textbox(label="Status"),
|
121 |
],
|
122 |
title="PO Data Extraction",
|
123 |
-
description="
|
124 |
)
|
125 |
return interface
|
126 |
|
@@ -128,4 +150,4 @@ def create_interface():
|
|
128 |
if __name__ == "__main__":
|
129 |
# Run the Gradio app
|
130 |
app = create_interface()
|
131 |
-
app.launch()
|
|
|
1 |
import re
|
2 |
import pandas as pd
|
3 |
+
import pdfplumber
|
4 |
import gradio as gr
|
5 |
|
6 |
|
7 |
+
def extract_text_from_pdf(pdf_file):
|
8 |
+
"""
|
9 |
+
Extracts text from an uploaded PDF file.
|
10 |
+
Args:
|
11 |
+
pdf_file: The uploaded PDF file.
|
12 |
+
Returns:
|
13 |
+
str: The extracted text from the PDF.
|
14 |
+
"""
|
15 |
+
with pdfplumber.open(pdf_file.name) as pdf:
|
16 |
+
text = ""
|
17 |
+
for page in pdf.pages:
|
18 |
+
text += page.extract_text() + "\n"
|
19 |
+
return text
|
20 |
+
|
21 |
+
|
22 |
def extract_po_data(text):
|
23 |
"""
|
24 |
Extracts purchase order data from the text into structured rows with ITEM, DESCRIPTION, QTY, UNIT, UNIT PRICE, TOTAL PRICE.
|
|
|
108 |
return output_path
|
109 |
|
110 |
|
111 |
+
def process_pdf(file):
|
112 |
"""
|
113 |
+
Processes the uploaded PDF file, extracts data, and saves it to an Excel file.
|
114 |
Args:
|
115 |
+
file: The uploaded PDF file.
|
116 |
Returns:
|
117 |
+
tuple: Path to the saved Excel file and a status message.
|
118 |
"""
|
119 |
+
try:
|
120 |
+
# Extract text from the uploaded PDF
|
121 |
+
text = extract_text_from_pdf(file)
|
122 |
+
# Extract structured data
|
123 |
+
df, status = extract_po_data(text)
|
124 |
+
if df is not None:
|
125 |
+
output_path = save_to_excel(df)
|
126 |
+
return output_path, status
|
127 |
+
return None, status
|
128 |
+
except Exception as e:
|
129 |
+
return None, f"Error: {str(e)}"
|
130 |
|
131 |
|
132 |
# Gradio Interface
|
133 |
def create_interface():
|
134 |
"""
|
135 |
+
Creates a Gradio interface for processing PO data from PDF files.
|
136 |
"""
|
137 |
interface = gr.Interface(
|
138 |
+
fn=process_pdf,
|
139 |
+
inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
|
140 |
outputs=[
|
141 |
gr.File(label="Download Extracted Excel"),
|
142 |
gr.Textbox(label="Status"),
|
143 |
],
|
144 |
title="PO Data Extraction",
|
145 |
+
description="Upload a purchase order PDF file to extract data into an Excel file.",
|
146 |
)
|
147 |
return interface
|
148 |
|
|
|
150 |
if __name__ == "__main__":
|
151 |
# Run the Gradio app
|
152 |
app = create_interface()
|
153 |
+
app.launch()
|