Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -122,7 +122,6 @@ def extract_numeric_values(pdf_file, schedule1_file=None, client_name="Unknown C
|
|
122 |
|
123 |
|
124 |
def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
|
125 |
-
|
126 |
# Define the directory path explicitly
|
127 |
if csv_path is None:
|
128 |
csv_path = "./Clients_Output_Data_Form_1040.csv"
|
@@ -147,24 +146,16 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
|
|
147 |
else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
|
148 |
]
|
149 |
|
150 |
-
#
|
151 |
-
# if os.path.exists(csv_path):
|
152 |
-
# df = pd.read_csv(csv_path)
|
153 |
-
# else:
|
154 |
-
# df = pd.DataFrame(columns=flat_columns)
|
155 |
-
|
156 |
if os.path.exists(csv_path):
|
157 |
-
print(f"Reading existing CSV file: {csv_path}")
|
158 |
df = pd.read_csv(csv_path)
|
159 |
-
print(f"CSV columns: {df.columns.tolist()}")
|
160 |
-
print(f"CSV rows before append: {len(df)}")
|
161 |
else:
|
162 |
-
print("CSV does not exist. Creating new DataFrame.")
|
163 |
df = pd.DataFrame(columns=flat_columns)
|
164 |
|
165 |
-
# Create new row
|
166 |
new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
|
167 |
new_row.iloc[0] = client_name
|
|
|
168 |
# Map Page 1-2 values
|
169 |
line_mapping = {
|
170 |
"Taxable Wages - Line 1": 0,
|
@@ -193,12 +184,35 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
|
|
193 |
new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
|
194 |
new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
|
195 |
|
196 |
-
#
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
# Gradio UI
|
204 |
iface = gr.Interface(
|
|
|
122 |
|
123 |
|
124 |
def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
|
|
|
125 |
# Define the directory path explicitly
|
126 |
if csv_path is None:
|
127 |
csv_path = "./Clients_Output_Data_Form_1040.csv"
|
|
|
146 |
else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
|
147 |
]
|
148 |
|
149 |
+
# Read existing CSV or create new DataFrame
|
|
|
|
|
|
|
|
|
|
|
150 |
if os.path.exists(csv_path):
|
|
|
151 |
df = pd.read_csv(csv_path)
|
|
|
|
|
152 |
else:
|
|
|
153 |
df = pd.DataFrame(columns=flat_columns)
|
154 |
|
155 |
+
# Create new row
|
156 |
new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
|
157 |
new_row.iloc[0] = client_name
|
158 |
+
|
159 |
# Map Page 1-2 values
|
160 |
line_mapping = {
|
161 |
"Taxable Wages - Line 1": 0,
|
|
|
184 |
new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
|
185 |
new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
|
186 |
|
187 |
+
# Check for duplicates - improved method
|
188 |
+
is_duplicate = False
|
189 |
+
|
190 |
+
# Convert new_row to DataFrame for comparison
|
191 |
+
new_row_df = pd.DataFrame([new_row])
|
192 |
+
|
193 |
+
# Compare all columns except possibly the client name
|
194 |
+
comparison_cols = [col for col in flat_columns if col != "Client Name"]
|
195 |
+
|
196 |
+
if not df.empty:
|
197 |
+
# Check if any existing row matches the new data
|
198 |
+
for _, existing_row in df.iterrows():
|
199 |
+
match = True
|
200 |
+
for col in comparison_cols:
|
201 |
+
if str(existing_row[col]) != str(new_row[col]):
|
202 |
+
match = False
|
203 |
+
break
|
204 |
+
if match:
|
205 |
+
is_duplicate = True
|
206 |
+
print("Duplicate found - not adding new row")
|
207 |
+
break
|
208 |
+
|
209 |
+
# Append if not duplicate
|
210 |
+
if not is_duplicate:
|
211 |
+
df = pd.concat([df, new_row_df], ignore_index=True)
|
212 |
+
df.to_csv(csv_path, index=False)
|
213 |
+
print(f"New data saved to CSV: {csv_path}")
|
214 |
+
else:
|
215 |
+
print("Duplicate data detected - no changes made to CSV")
|
216 |
|
217 |
# Gradio UI
|
218 |
iface = gr.Interface(
|