dschandra commited on
Commit
f4c6e9e
·
verified ·
1 Parent(s): 2736e3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -14,14 +14,6 @@ def extract_text_from_pdf(pdf_file):
14
  text += page.extract_text()
15
  return text
16
 
17
- # Function: Clean Description (Basic cleaning logic)
18
- def clean_description(description, item_number):
19
- """
20
- Cleans up the description for an item to ensure it's correctly formatted.
21
- """
22
- # Placeholder for actual cleaning process (e.g., removing unwanted characters)
23
- return description.strip()
24
-
25
  # Function: Clean Description
26
  def clean_description(description, item_number=None):
27
  """
@@ -43,7 +35,15 @@ def clean_description(description, item_number=None):
43
 
44
  return description.strip()
45
 
46
- # Function to extract PO Items with splitting
 
 
 
 
 
 
 
 
47
  def parse_po_items_with_filters(text):
48
  """
49
  Parses purchase order items from the extracted text systematically.
@@ -91,12 +91,16 @@ def parse_po_items_with_filters(text):
91
  print(f"Qty match found: {qty_match.group('Qty')} {qty_match.group(2)}") # Debugging
92
  current_item["Qty"] = qty_match.group("Qty")
93
  current_item["Unit"] = qty_match.group(2)
 
 
94
 
95
  price_match = re.search(r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$", line)
96
  if price_match:
97
  print(f"Price match found: {price_match.group('UnitPrice')} {price_match.group('TotalPrice')}") # Debugging
98
  current_item["Unit Price"] = price_match.group("UnitPrice")
99
  current_item["Total Price"] = price_match.group("TotalPrice")
 
 
100
 
101
  # Finalize the last item
102
  if current_item is not None:
 
14
  text += page.extract_text()
15
  return text
16
 
 
 
 
 
 
 
 
 
17
  # Function: Clean Description
18
  def clean_description(description, item_number=None):
19
  """
 
35
 
36
  return description.strip()
37
 
38
+ # Function: Clean Description (Basic cleaning logic)
39
+ def clean_description(description, item_number):
40
+ """
41
+ Cleans up the description for an item to ensure it's correctly formatted.
42
+ """
43
+ # Placeholder for actual cleaning process (e.g., removing unwanted characters)
44
+ return description.strip()
45
+
46
+ # Function to extract PO Items with filters and better error handling
47
  def parse_po_items_with_filters(text):
48
  """
49
  Parses purchase order items from the extracted text systematically.
 
91
  print(f"Qty match found: {qty_match.group('Qty')} {qty_match.group(2)}") # Debugging
92
  current_item["Qty"] = qty_match.group("Qty")
93
  current_item["Unit"] = qty_match.group(2)
94
+ else:
95
+ print(f"No Qty match found in line: {line}") # Debugging
96
 
97
  price_match = re.search(r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$", line)
98
  if price_match:
99
  print(f"Price match found: {price_match.group('UnitPrice')} {price_match.group('TotalPrice')}") # Debugging
100
  current_item["Unit Price"] = price_match.group("UnitPrice")
101
  current_item["Total Price"] = price_match.group("TotalPrice")
102
+ else:
103
+ print(f"No price match found in line: {line}") # Debugging
104
 
105
  # Finalize the last item
106
  if current_item is not None: