Ayesha352 commited on
Commit
d221195
·
verified ·
1 Parent(s): 72325a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -122,7 +122,6 @@ def extract_numeric_values(pdf_file, schedule1_file=None, client_name="Unknown C
122
 
123
 
124
  def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
125
-
126
  # Define the directory path explicitly
127
  if csv_path is None:
128
  csv_path = "./Clients_Output_Data_Form_1040.csv"
@@ -147,24 +146,16 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
147
  else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
148
  ]
149
 
150
- # If file doesn't exist, create new DataFrame and write headers
151
- # if os.path.exists(csv_path):
152
- # df = pd.read_csv(csv_path)
153
- # else:
154
- # df = pd.DataFrame(columns=flat_columns)
155
-
156
  if os.path.exists(csv_path):
157
- print(f"Reading existing CSV file: {csv_path}")
158
  df = pd.read_csv(csv_path)
159
- print(f"CSV columns: {df.columns.tolist()}")
160
- print(f"CSV rows before append: {len(df)}")
161
  else:
162
- print("CSV does not exist. Creating new DataFrame.")
163
  df = pd.DataFrame(columns=flat_columns)
164
 
165
- # Create new row with None
166
  new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
167
  new_row.iloc[0] = client_name
 
168
  # Map Page 1-2 values
169
  line_mapping = {
170
  "Taxable Wages - Line 1": 0,
@@ -193,12 +184,35 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
193
  new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
194
  new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
195
 
196
- # Append and save
197
- if not ((df == new_row.iloc[0]).all(axis=1)).any():
198
- df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
199
- print(f"CSV rows after append: {len(df)}")
200
- df.to_csv(csv_path, index=False)
201
- print(f" Data saved to CSV: {csv_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  # Gradio UI
204
  iface = gr.Interface(
 
122
 
123
 
124
  def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
 
125
  # Define the directory path explicitly
126
  if csv_path is None:
127
  csv_path = "./Clients_Output_Data_Form_1040.csv"
 
146
  else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
147
  ]
148
 
149
+ # Read existing CSV or create new DataFrame
 
 
 
 
 
150
  if os.path.exists(csv_path):
 
151
  df = pd.read_csv(csv_path)
 
 
152
  else:
 
153
  df = pd.DataFrame(columns=flat_columns)
154
 
155
+ # Create new row
156
  new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
157
  new_row.iloc[0] = client_name
158
+
159
  # Map Page 1-2 values
160
  line_mapping = {
161
  "Taxable Wages - Line 1": 0,
 
184
  new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
185
  new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
186
 
187
+ # Check for duplicates - improved method
188
+ is_duplicate = False
189
+
190
+ # Convert new_row to DataFrame for comparison
191
+ new_row_df = pd.DataFrame([new_row])
192
+
193
+ # Compare all columns except possibly the client name
194
+ comparison_cols = [col for col in flat_columns if col != "Client Name"]
195
+
196
+ if not df.empty:
197
+ # Check if any existing row matches the new data
198
+ for _, existing_row in df.iterrows():
199
+ match = True
200
+ for col in comparison_cols:
201
+ if str(existing_row[col]) != str(new_row[col]):
202
+ match = False
203
+ break
204
+ if match:
205
+ is_duplicate = True
206
+ print("Duplicate found - not adding new row")
207
+ break
208
+
209
+ # Append if not duplicate
210
+ if not is_duplicate:
211
+ df = pd.concat([df, new_row_df], ignore_index=True)
212
+ df.to_csv(csv_path, index=False)
213
+ print(f"New data saved to CSV: {csv_path}")
214
+ else:
215
+ print("Duplicate data detected - no changes made to CSV")
216
 
217
  # Gradio UI
218
  iface = gr.Interface(