Ayesha352 commited on
Commit
a3f6db3
·
verified ·
1 Parent(s): ba33017

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -32
app.py CHANGED
@@ -122,6 +122,7 @@ def extract_numeric_values(pdf_file, schedule1_file=None, client_name="Unknown C
122
 
123
 
124
  def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
 
125
  # Define the directory path explicitly
126
  if csv_path is None:
127
  csv_path = "./Clients_Output_Data_Form_1040.csv"
@@ -146,16 +147,24 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
146
  else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
147
  ]
148
 
149
- # Read existing CSV or create new DataFrame
 
 
 
 
 
150
  if os.path.exists(csv_path):
 
151
  df = pd.read_csv(csv_path)
 
 
152
  else:
 
153
  df = pd.DataFrame(columns=flat_columns)
154
 
155
- # Create new row
156
  new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
157
  new_row.iloc[0] = client_name
158
-
159
  # Map Page 1-2 values
160
  line_mapping = {
161
  "Taxable Wages - Line 1": 0,
@@ -184,35 +193,12 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
184
  new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
185
  new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
186
 
187
- # Convert new_row to DataFrame
188
- new_row_df = pd.DataFrame([new_row])
189
-
190
- # Convert all values to strings for consistent comparison
191
- df_str = df.astype(str)
192
- new_row_str = new_row_df.astype(str)
193
-
194
- # Check for duplicates by comparing all columns
195
- is_duplicate = False
196
- if not df.empty:
197
- # Merge check (efficient method)
198
- merged = df_str.merge(new_row_str)
199
- is_duplicate = not merged.empty
200
-
201
- # Alternative method (row-by-row comparison)
202
- # for _, row in df_str.iterrows():
203
- # if row.equals(new_row_str.iloc[0]):
204
- # is_duplicate = True
205
- # break
206
-
207
- # Append if not duplicate
208
- if not is_duplicate:
209
- df = pd.concat([df, new_row_df], ignore_index=True)
210
- df.to_csv(csv_path, index=False)
211
- print(f"✅ New data saved for client: {client_name}")
212
- else:
213
- print(f"⚠️ Duplicate data detected for client: {client_name} - no changes made")
214
-
215
- return df
216
 
217
  # Gradio UI
218
  iface = gr.Interface(
@@ -270,3 +256,4 @@ iface = gr.Interface(
270
 
271
 
272
  iface.launch(share=True)
 
 
122
 
123
 
124
  def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
125
+
126
  # Define the directory path explicitly
127
  if csv_path is None:
128
  csv_path = "./Clients_Output_Data_Form_1040.csv"
 
147
  else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
148
  ]
149
 
150
+ # If file doesn't exist, create new DataFrame and write headers
151
+ # if os.path.exists(csv_path):
152
+ # df = pd.read_csv(csv_path)
153
+ # else:
154
+ # df = pd.DataFrame(columns=flat_columns)
155
+
156
  if os.path.exists(csv_path):
157
+ print(f"Reading existing CSV file: {csv_path}")
158
  df = pd.read_csv(csv_path)
159
+ print(f"CSV columns: {df.columns.tolist()}")
160
+ print(f"CSV rows before append: {len(df)}")
161
  else:
162
+ print("CSV does not exist. Creating new DataFrame.")
163
  df = pd.DataFrame(columns=flat_columns)
164
 
165
+ # Create new row with None
166
  new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
167
  new_row.iloc[0] = client_name
 
168
  # Map Page 1-2 values
169
  line_mapping = {
170
  "Taxable Wages - Line 1": 0,
 
193
  new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
194
  new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
195
 
196
+ # Append and save
197
+ if not ((df == new_row.iloc[0]).all(axis=1)).any():
198
+ df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
199
+ print(f"CSV rows after append: {len(df)}")
200
+ df.to_csv(csv_path, index=False)
201
+ print(f" Data saved to CSV: {csv_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  # Gradio UI
204
  iface = gr.Interface(
 
256
 
257
 
258
  iface.launch(share=True)
259
+