dschandra commited on
Commit
c0dd2b0
·
verified ·
1 Parent(s): e7e2d2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -35,14 +35,14 @@ def clean_description(description, item_number=None):
35
 
36
  return description.strip()
37
 
38
- # Function: Clean Description (Basic cleaning logic)
39
- def clean_description(description, item_number):
40
  """
41
  Cleans up the description for an item to ensure it's correctly formatted.
42
  """
43
  return description.strip()
44
 
45
- # Function to extract PO Items with filters and better error handling
46
  def parse_po_items_with_filters(text):
47
  """
48
  Parses purchase order items from the extracted text systematically.
@@ -52,6 +52,10 @@ def parse_po_items_with_filters(text):
52
  current_item = None
53
  description_accumulator = []
54
 
 
 
 
 
55
  for line in lines:
56
  print(f"Processing Line: {line}") # Debugging
57
 
@@ -62,10 +66,8 @@ def parse_po_items_with_filters(text):
62
 
63
  # Save the previous item if current_item is not None
64
  if current_item is not None:
65
- current_item["Description"] = clean_description(
66
- " ".join(description_accumulator).strip(),
67
- item_number=int(current_item["Item"]),
68
- )
69
  data.append(current_item)
70
  description_accumulator = [] # Reset description accumulator
71
  print(f"Item {current_item['Item']} added to data.") # Debugging
@@ -84,8 +86,8 @@ def parse_po_items_with_filters(text):
84
  # Accumulate additional lines for the current item's description
85
  description_accumulator.append(line.strip())
86
 
87
- # Match Qty, Unit, Unit Price, and Total Price
88
- qty_match = re.search(r"(?P<Qty>\d+)\s+(Nos\.|Set|pcs)", line)
89
  if qty_match:
90
  print(f"Qty match found: {qty_match.group('Qty')} {qty_match.group(2)}") # Debugging
91
  current_item["Qty"] = qty_match.group("Qty")
@@ -94,7 +96,7 @@ def parse_po_items_with_filters(text):
94
  print(f"No Qty match found in line: {line}") # Debugging
95
 
96
  # Match Unit Price and Total Price
97
- price_match = re.search(r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$", line)
98
  if price_match:
99
  print(f"Price match found: {price_match.group('UnitPrice')} {price_match.group('TotalPrice')}") # Debugging
100
  current_item["Unit Price"] = price_match.group("UnitPrice")
@@ -104,14 +106,12 @@ def parse_po_items_with_filters(text):
104
 
105
  # Finalize the last item
106
  if current_item is not None:
107
- current_item["Description"] = clean_description(
108
- " ".join(description_accumulator).strip(),
109
- item_number=int(current_item["Item"]),
110
- )
111
  data.append(current_item)
112
  print(f"Finalized Item {current_item['Item']}") # Debugging
113
 
114
- # Split merged descriptions and assign items
115
  for i, row in enumerate(data):
116
  if row["Item"] == "2" and "Mfd:-2022" in row["Description"]: # Find the item description boundary
117
  item_3_match = re.search(
 
35
 
36
  return description.strip()
37
 
38
+ # Function to clean item description
39
+ def clean_description(description):
40
  """
41
  Cleans up the description for an item to ensure it's correctly formatted.
42
  """
43
  return description.strip()
44
 
45
+ # Function to extract PO items with better error handling and checks
46
  def parse_po_items_with_filters(text):
47
  """
48
  Parses purchase order items from the extracted text systematically.
 
52
  current_item = None
53
  description_accumulator = []
54
 
55
+ # Regex patterns
56
+ qty_pattern = r"(?P<Qty>\d+)\s+(Nos\.|Set|pcs)" # Match quantities
57
+ price_pattern = r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$" # Match prices
58
+
59
  for line in lines:
60
  print(f"Processing Line: {line}") # Debugging
61
 
 
66
 
67
  # Save the previous item if current_item is not None
68
  if current_item is not None:
69
+ # Clean and add the description to the current item
70
+ current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
 
 
71
  data.append(current_item)
72
  description_accumulator = [] # Reset description accumulator
73
  print(f"Item {current_item['Item']} added to data.") # Debugging
 
86
  # Accumulate additional lines for the current item's description
87
  description_accumulator.append(line.strip())
88
 
89
+ # Match Qty (quantity) in the line
90
+ qty_match = re.search(qty_pattern, line)
91
  if qty_match:
92
  print(f"Qty match found: {qty_match.group('Qty')} {qty_match.group(2)}") # Debugging
93
  current_item["Qty"] = qty_match.group("Qty")
 
96
  print(f"No Qty match found in line: {line}") # Debugging
97
 
98
  # Match Unit Price and Total Price
99
+ price_match = re.search(price_pattern, line)
100
  if price_match:
101
  print(f"Price match found: {price_match.group('UnitPrice')} {price_match.group('TotalPrice')}") # Debugging
102
  current_item["Unit Price"] = price_match.group("UnitPrice")
 
106
 
107
  # Finalize the last item
108
  if current_item is not None:
109
+ # Clean and add the description to the current item
110
+ current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
 
 
111
  data.append(current_item)
112
  print(f"Finalized Item {current_item['Item']}") # Debugging
113
 
114
+ # Split merged descriptions and assign items (if necessary)
115
  for i, row in enumerate(data):
116
  if row["Item"] == "2" and "Mfd:-2022" in row["Description"]: # Find the item description boundary
117
  item_3_match = re.search(