Update app.py
Browse files
app.py
CHANGED
@@ -35,6 +35,13 @@ def clean_description(description, item_number=None):
|
|
35 |
|
36 |
return description.strip()
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# Function to extract PO items with better error handling and checks
|
39 |
def parse_po_items_with_filters(text):
|
40 |
"""
|
@@ -60,7 +67,7 @@ def parse_po_items_with_filters(text):
|
|
60 |
# Save the previous item if current_item is not None
|
61 |
if current_item is not None:
|
62 |
# Clean and add the description to the current item
|
63 |
-
current_item["Description"] = clean_description(" ".join(description_accumulator).strip()
|
64 |
data.append(current_item)
|
65 |
description_accumulator = [] # Reset description accumulator
|
66 |
print(f"Item {current_item['Item']} added to data.") # Debugging
|
@@ -100,7 +107,7 @@ def parse_po_items_with_filters(text):
|
|
100 |
# Finalize the last item
|
101 |
if current_item is not None:
|
102 |
# Clean and add the description to the current item
|
103 |
-
current_item["Description"] = clean_description(" ".join(description_accumulator).strip()
|
104 |
data.append(current_item)
|
105 |
print(f"Finalized Item {current_item['Item']}") # Debugging
|
106 |
|
@@ -112,6 +119,7 @@ def parse_po_items_with_filters(text):
|
|
112 |
row["Description"]
|
113 |
)
|
114 |
if item_3_match:
|
|
|
115 |
data.insert(
|
116 |
i + 1,
|
117 |
{
|
@@ -137,6 +145,20 @@ def parse_po_items_with_filters(text):
|
|
137 |
df = pd.DataFrame(data)
|
138 |
return df, "Data extracted successfully."
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
# Function: Save to Excel
|
141 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
142 |
"""
|
|
|
35 |
|
36 |
return description.strip()
|
37 |
|
38 |
+
# Function to clean item description
|
39 |
+
def clean_description(description):
|
40 |
+
"""
|
41 |
+
Cleans up the description for an item to ensure it's correctly formatted.
|
42 |
+
"""
|
43 |
+
return description.strip()
|
44 |
+
|
45 |
# Function to extract PO items with better error handling and checks
|
46 |
def parse_po_items_with_filters(text):
|
47 |
"""
|
|
|
67 |
# Save the previous item if current_item is not None
|
68 |
if current_item is not None:
|
69 |
# Clean and add the description to the current item
|
70 |
+
current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
|
71 |
data.append(current_item)
|
72 |
description_accumulator = [] # Reset description accumulator
|
73 |
print(f"Item {current_item['Item']} added to data.") # Debugging
|
|
|
107 |
# Finalize the last item
|
108 |
if current_item is not None:
|
109 |
# Clean and add the description to the current item
|
110 |
+
current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
|
111 |
data.append(current_item)
|
112 |
print(f"Finalized Item {current_item['Item']}") # Debugging
|
113 |
|
|
|
119 |
row["Description"]
|
120 |
)
|
121 |
if item_3_match:
|
122 |
+
# Split item 2 description and assign item 3
|
123 |
data.insert(
|
124 |
i + 1,
|
125 |
{
|
|
|
145 |
df = pd.DataFrame(data)
|
146 |
return df, "Data extracted successfully."
|
147 |
|
148 |
+
# Example text (as provided)
|
149 |
+
text = """
|
150 |
+
ITEM 1 Stainless Steel RATING AND DIAGRAM PLATE 24 Nos. 3.00 72.00
|
151 |
+
As per Drg.No. G 000822 RI RDP 50KVA NT001 51 SIZE : 150mm X 160mm X 1.00mm Thick With Serial No:NT00151 97 to 121 Mfd:-2022
|
152 |
+
ITEM 2 Stainless Steel RATING AND DIAGRAM PLATE 12 Nos. 3.80 45.60
|
153 |
+
As per Drg.to.G 000816 R2 RDP 600KVA NT00152 SIZE : 150mm X 260mm X 1.00mm Thick With Serial No:NT00I53 38 to 50 Mfd:-2022
|
154 |
+
"""
|
155 |
+
|
156 |
+
# Running the function
|
157 |
+
df, status = parse_po_items_with_filters(text)
|
158 |
+
print(status)
|
159 |
+
if df is not None:
|
160 |
+
print(df)
|
161 |
+
|
162 |
# Function: Save to Excel
|
163 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
164 |
"""
|