dschandra commited on
Commit
e31a32d
·
verified ·
1 Parent(s): 36448eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -36,8 +36,7 @@ def clean_description(description, item_number=None):
36
  # Function: Parse PO Items
37
  def parse_po_items_with_filters(text):
38
  """
39
- Parses purchase order items from the extracted text using regex with filters.
40
- Ensures items are not merged and handles split descriptions across lines.
41
  """
42
  lines = text.splitlines()
43
  data = []
@@ -92,10 +91,10 @@ def parse_po_items_with_filters(text):
92
  )
93
  data.append(current_item)
94
 
95
- # Split merged descriptions
96
  for i, row in enumerate(data):
97
  if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
98
- # Dynamically split merged descriptions for item 3
99
  item_3_match = re.search(r"As per Drg. to. G000810.*Mfd:-2022", row["Description"])
100
  if item_3_match:
101
  data.insert(
@@ -111,7 +110,7 @@ def parse_po_items_with_filters(text):
111
  )
112
  row["Description"] = row["Description"].replace(item_3_match.group(), "").strip()
113
 
114
- # Remove invalid rows
115
  data = [row for row in data if row["Description"]]
116
 
117
  # Return data as a DataFrame
@@ -121,6 +120,7 @@ def parse_po_items_with_filters(text):
121
  return df, "Data extracted successfully."
122
 
123
 
 
124
  # Function: Save to Excel
125
  def save_to_excel(df, output_path="extracted_po_data.xlsx"):
126
  df.to_excel(output_path, index=False)
 
36
  # Function: Parse PO Items
37
  def parse_po_items_with_filters(text):
38
  """
39
+ Parses purchase order items from the extracted text systematically, avoiding merging issues.
 
40
  """
41
  lines = text.splitlines()
42
  data = []
 
91
  )
92
  data.append(current_item)
93
 
94
+ # Final cleanup to handle special cases (e.g., splitting merged items)
95
  for i, row in enumerate(data):
96
  if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
97
+ # Dynamically split merged descriptions
98
  item_3_match = re.search(r"As per Drg. to. G000810.*Mfd:-2022", row["Description"])
99
  if item_3_match:
100
  data.insert(
 
110
  )
111
  row["Description"] = row["Description"].replace(item_3_match.group(), "").strip()
112
 
113
+ # Filter out invalid rows
114
  data = [row for row in data if row["Description"]]
115
 
116
  # Return data as a DataFrame
 
120
  return df, "Data extracted successfully."
121
 
122
 
123
+
124
  # Function: Save to Excel
125
  def save_to_excel(df, output_path="extracted_po_data.xlsx"):
126
  df.to_excel(output_path, index=False)