Spaces:

dschandra
/

ALNISFPO

Sleeping

dschandra commited on Dec 3, 2024

Commit

e31a32d

verified ·

1 Parent(s): 36448eb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,8 +36,7 @@ def clean_description(description, item_number=None):
 # Function: Parse PO Items
 def parse_po_items_with_filters(text):
     """
-    Parses purchase order items from the extracted text using regex with filters.
-    Ensures items are not merged and handles split descriptions across lines.
     """
     lines = text.splitlines()
     data = []
@@ -92,10 +91,10 @@ def parse_po_items_with_filters(text):
         )
         data.append(current_item)
-    # Split merged descriptions
     for i, row in enumerate(data):
         if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
-            # Dynamically split merged descriptions for item 3
             item_3_match = re.search(r"As per Drg. to. G000810.*Mfd:-2022", row["Description"])
             if item_3_match:
                 data.insert(
@@ -111,7 +110,7 @@ def parse_po_items_with_filters(text):
                 )
                 row["Description"] = row["Description"].replace(item_3_match.group(), "").strip()
-    # Remove invalid rows
     data = [row for row in data if row["Description"]]
     # Return data as a DataFrame
@@ -121,6 +120,7 @@ def parse_po_items_with_filters(text):
     return df, "Data extracted successfully."
 # Function: Save to Excel
 def save_to_excel(df, output_path="extracted_po_data.xlsx"):
     df.to_excel(output_path, index=False)

 # Function: Parse PO Items
 def parse_po_items_with_filters(text):
     """
+    Parses purchase order items from the extracted text systematically, avoiding merging issues.
     """
     lines = text.splitlines()
     data = []
         )
         data.append(current_item)
+    # Final cleanup to handle special cases (e.g., splitting merged items)
     for i, row in enumerate(data):
         if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
+            # Dynamically split merged descriptions
             item_3_match = re.search(r"As per Drg. to. G000810.*Mfd:-2022", row["Description"])
             if item_3_match:
                 data.insert(
                 )
                 row["Description"] = row["Description"].replace(item_3_match.group(), "").strip()
+    # Filter out invalid rows
     data = [row for row in data if row["Description"]]
     # Return data as a DataFrame
     return df, "Data extracted successfully."
 # Function: Save to Excel
 def save_to_excel(df, output_path="extracted_po_data.xlsx"):
     df.to_excel(output_path, index=False)