Update app.py
Browse files
app.py
CHANGED
@@ -36,8 +36,7 @@ def clean_description(description, item_number=None):
|
|
36 |
# Function: Parse PO Items
|
37 |
def parse_po_items_with_filters(text):
|
38 |
"""
|
39 |
-
Parses purchase order items from the extracted text
|
40 |
-
Ensures items are not merged and handles split descriptions across lines.
|
41 |
"""
|
42 |
lines = text.splitlines()
|
43 |
data = []
|
@@ -92,10 +91,10 @@ def parse_po_items_with_filters(text):
|
|
92 |
)
|
93 |
data.append(current_item)
|
94 |
|
95 |
-
#
|
96 |
for i, row in enumerate(data):
|
97 |
if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
|
98 |
-
# Dynamically split merged descriptions
|
99 |
item_3_match = re.search(r"As per Drg. to. G000810.*Mfd:-2022", row["Description"])
|
100 |
if item_3_match:
|
101 |
data.insert(
|
@@ -111,7 +110,7 @@ def parse_po_items_with_filters(text):
|
|
111 |
)
|
112 |
row["Description"] = row["Description"].replace(item_3_match.group(), "").strip()
|
113 |
|
114 |
-
#
|
115 |
data = [row for row in data if row["Description"]]
|
116 |
|
117 |
# Return data as a DataFrame
|
@@ -121,6 +120,7 @@ def parse_po_items_with_filters(text):
|
|
121 |
return df, "Data extracted successfully."
|
122 |
|
123 |
|
|
|
124 |
# Function: Save to Excel
|
125 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
126 |
df.to_excel(output_path, index=False)
|
|
|
36 |
# Function: Parse PO Items
|
37 |
def parse_po_items_with_filters(text):
|
38 |
"""
|
39 |
+
Parses purchase order items from the extracted text systematically, avoiding merging issues.
|
|
|
40 |
"""
|
41 |
lines = text.splitlines()
|
42 |
data = []
|
|
|
91 |
)
|
92 |
data.append(current_item)
|
93 |
|
94 |
+
# Final cleanup to handle special cases (e.g., splitting merged items)
|
95 |
for i, row in enumerate(data):
|
96 |
if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
|
97 |
+
# Dynamically split merged descriptions
|
98 |
item_3_match = re.search(r"As per Drg. to. G000810.*Mfd:-2022", row["Description"])
|
99 |
if item_3_match:
|
100 |
data.insert(
|
|
|
110 |
)
|
111 |
row["Description"] = row["Description"].replace(item_3_match.group(), "").strip()
|
112 |
|
113 |
+
# Filter out invalid rows
|
114 |
data = [row for row in data if row["Description"]]
|
115 |
|
116 |
# Return data as a DataFrame
|
|
|
120 |
return df, "Data extracted successfully."
|
121 |
|
122 |
|
123 |
+
|
124 |
# Function: Save to Excel
|
125 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
126 |
df.to_excel(output_path, index=False)
|