Ayesha352 commited on
Commit
ba33017
·
verified ·
1 Parent(s): d221195

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -21
app.py CHANGED
@@ -184,35 +184,35 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
184
  new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
185
  new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
186
 
187
- # Check for duplicates - improved method
188
- is_duplicate = False
189
-
190
- # Convert new_row to DataFrame for comparison
191
  new_row_df = pd.DataFrame([new_row])
192
-
193
- # Compare all columns except possibly the client name
194
- comparison_cols = [col for col in flat_columns if col != "Client Name"]
195
-
 
 
 
196
  if not df.empty:
197
- # Check if any existing row matches the new data
198
- for _, existing_row in df.iterrows():
199
- match = True
200
- for col in comparison_cols:
201
- if str(existing_row[col]) != str(new_row[col]):
202
- match = False
203
- break
204
- if match:
205
- is_duplicate = True
206
- print("Duplicate found - not adding new row")
207
- break
208
 
209
  # Append if not duplicate
210
  if not is_duplicate:
211
  df = pd.concat([df, new_row_df], ignore_index=True)
212
  df.to_csv(csv_path, index=False)
213
- print(f"New data saved to CSV: {csv_path}")
214
  else:
215
- print("Duplicate data detected - no changes made to CSV")
 
 
216
 
217
  # Gradio UI
218
  iface = gr.Interface(
 
184
  new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
185
  new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
186
 
187
+ # Convert new_row to DataFrame
 
 
 
188
  new_row_df = pd.DataFrame([new_row])
189
+
190
+ # Convert all values to strings for consistent comparison
191
+ df_str = df.astype(str)
192
+ new_row_str = new_row_df.astype(str)
193
+
194
+ # Check for duplicates by comparing all columns
195
+ is_duplicate = False
196
  if not df.empty:
197
+ # Merge check (efficient method)
198
+ merged = df_str.merge(new_row_str)
199
+ is_duplicate = not merged.empty
200
+
201
+ # Alternative method (row-by-row comparison)
202
+ # for _, row in df_str.iterrows():
203
+ # if row.equals(new_row_str.iloc[0]):
204
+ # is_duplicate = True
205
+ # break
 
 
206
 
207
  # Append if not duplicate
208
  if not is_duplicate:
209
  df = pd.concat([df, new_row_df], ignore_index=True)
210
  df.to_csv(csv_path, index=False)
211
+ print(f"New data saved for client: {client_name}")
212
  else:
213
+ print(f"⚠️ Duplicate data detected for client: {client_name} - no changes made")
214
+
215
+ return df
216
 
217
  # Gradio UI
218
  iface = gr.Interface(