dschandra commited on
Commit
8074612
·
verified ·
1 Parent(s): 1f6beaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -2
app.py CHANGED
@@ -35,6 +35,13 @@ def clean_description(description, item_number=None):
35
 
36
  return description.strip()
37
 
 
 
 
 
 
 
 
38
  # Function to extract PO items with better error handling and checks
39
  def parse_po_items_with_filters(text):
40
  """
@@ -60,7 +67,7 @@ def parse_po_items_with_filters(text):
60
  # Save the previous item if current_item is not None
61
  if current_item is not None:
62
  # Clean and add the description to the current item
63
- current_item["Description"] = clean_description(" ".join(description_accumulator).strip(), current_item["Item"])
64
  data.append(current_item)
65
  description_accumulator = [] # Reset description accumulator
66
  print(f"Item {current_item['Item']} added to data.") # Debugging
@@ -100,7 +107,7 @@ def parse_po_items_with_filters(text):
100
  # Finalize the last item
101
  if current_item is not None:
102
  # Clean and add the description to the current item
103
- current_item["Description"] = clean_description(" ".join(description_accumulator).strip(), current_item["Item"])
104
  data.append(current_item)
105
  print(f"Finalized Item {current_item['Item']}") # Debugging
106
 
@@ -112,6 +119,7 @@ def parse_po_items_with_filters(text):
112
  row["Description"]
113
  )
114
  if item_3_match:
 
115
  data.insert(
116
  i + 1,
117
  {
@@ -137,6 +145,20 @@ def parse_po_items_with_filters(text):
137
  df = pd.DataFrame(data)
138
  return df, "Data extracted successfully."
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  # Function: Save to Excel
141
  def save_to_excel(df, output_path="extracted_po_data.xlsx"):
142
  """
 
35
 
36
  return description.strip()
37
 
38
+ # Function to clean item description
39
+ def clean_description(description):
40
+ """
41
+ Cleans up the description for an item to ensure it's correctly formatted.
42
+ """
43
+ return description.strip()
44
+
45
  # Function to extract PO items with better error handling and checks
46
  def parse_po_items_with_filters(text):
47
  """
 
67
  # Save the previous item if current_item is not None
68
  if current_item is not None:
69
  # Clean and add the description to the current item
70
+ current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
71
  data.append(current_item)
72
  description_accumulator = [] # Reset description accumulator
73
  print(f"Item {current_item['Item']} added to data.") # Debugging
 
107
  # Finalize the last item
108
  if current_item is not None:
109
  # Clean and add the description to the current item
110
+ current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
111
  data.append(current_item)
112
  print(f"Finalized Item {current_item['Item']}") # Debugging
113
 
 
119
  row["Description"]
120
  )
121
  if item_3_match:
122
+ # Split item 2 description and assign item 3
123
  data.insert(
124
  i + 1,
125
  {
 
145
  df = pd.DataFrame(data)
146
  return df, "Data extracted successfully."
147
 
148
+ # Example text (as provided)
149
+ text = """
150
+ ITEM 1 Stainless Steel RATING AND DIAGRAM PLATE 24 Nos. 3.00 72.00
151
+ As per Drg.No. G 000822 RI RDP 50KVA NT001 51 SIZE : 150mm X 160mm X 1.00mm Thick With Serial No:NT00151 97 to 121 Mfd:-2022
152
+ ITEM 2 Stainless Steel RATING AND DIAGRAM PLATE 12 Nos. 3.80 45.60
153
+ As per Drg.to.G 000816 R2 RDP 600KVA NT00152 SIZE : 150mm X 260mm X 1.00mm Thick With Serial No:NT00I53 38 to 50 Mfd:-2022
154
+ """
155
+
156
+ # Running the function
157
+ df, status = parse_po_items_with_filters(text)
158
+ print(status)
159
+ if df is not None:
160
+ print(df)
161
+
162
  # Function: Save to Excel
163
  def save_to_excel(df, output_path="extracted_po_data.xlsx"):
164
  """