Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -122,6 +122,7 @@ def extract_numeric_values(pdf_file, schedule1_file=None, client_name="Unknown C
|
|
122 |
|
123 |
|
124 |
def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
|
|
|
125 |
# Define the directory path explicitly
|
126 |
if csv_path is None:
|
127 |
csv_path = "./Clients_Output_Data_Form_1040.csv"
|
@@ -146,16 +147,24 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
|
|
146 |
else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
|
147 |
]
|
148 |
|
149 |
-
#
|
|
|
|
|
|
|
|
|
|
|
150 |
if os.path.exists(csv_path):
|
|
|
151 |
df = pd.read_csv(csv_path)
|
|
|
|
|
152 |
else:
|
|
|
153 |
df = pd.DataFrame(columns=flat_columns)
|
154 |
|
155 |
-
# Create new row
|
156 |
new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
|
157 |
new_row.iloc[0] = client_name
|
158 |
-
|
159 |
# Map Page 1-2 values
|
160 |
line_mapping = {
|
161 |
"Taxable Wages - Line 1": 0,
|
@@ -184,35 +193,12 @@ def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknow
|
|
184 |
new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
|
185 |
new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
|
186 |
|
187 |
-
#
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
# Check for duplicates by comparing all columns
|
195 |
-
is_duplicate = False
|
196 |
-
if not df.empty:
|
197 |
-
# Merge check (efficient method)
|
198 |
-
merged = df_str.merge(new_row_str)
|
199 |
-
is_duplicate = not merged.empty
|
200 |
-
|
201 |
-
# Alternative method (row-by-row comparison)
|
202 |
-
# for _, row in df_str.iterrows():
|
203 |
-
# if row.equals(new_row_str.iloc[0]):
|
204 |
-
# is_duplicate = True
|
205 |
-
# break
|
206 |
-
|
207 |
-
# Append if not duplicate
|
208 |
-
if not is_duplicate:
|
209 |
-
df = pd.concat([df, new_row_df], ignore_index=True)
|
210 |
-
df.to_csv(csv_path, index=False)
|
211 |
-
print(f"✅ New data saved for client: {client_name}")
|
212 |
-
else:
|
213 |
-
print(f"⚠️ Duplicate data detected for client: {client_name} - no changes made")
|
214 |
-
|
215 |
-
return df
|
216 |
|
217 |
# Gradio UI
|
218 |
iface = gr.Interface(
|
@@ -270,3 +256,4 @@ iface = gr.Interface(
|
|
270 |
|
271 |
|
272 |
iface.launch(share=True)
|
|
|
|
122 |
|
123 |
|
124 |
def save_to_csv_flat(all_extracted_values, schedule1_values, client_name="Unknown Client", csv_path=None):
|
125 |
+
|
126 |
# Define the directory path explicitly
|
127 |
if csv_path is None:
|
128 |
csv_path = "./Clients_Output_Data_Form_1040.csv"
|
|
|
147 |
else (h1.strip() + h2.strip()) for h1, h2 in zip(header_level_1, header_level_2)
|
148 |
]
|
149 |
|
150 |
+
# If file doesn't exist, create new DataFrame and write headers
|
151 |
+
# if os.path.exists(csv_path):
|
152 |
+
# df = pd.read_csv(csv_path)
|
153 |
+
# else:
|
154 |
+
# df = pd.DataFrame(columns=flat_columns)
|
155 |
+
|
156 |
if os.path.exists(csv_path):
|
157 |
+
print(f"Reading existing CSV file: {csv_path}")
|
158 |
df = pd.read_csv(csv_path)
|
159 |
+
print(f"CSV columns: {df.columns.tolist()}")
|
160 |
+
print(f"CSV rows before append: {len(df)}")
|
161 |
else:
|
162 |
+
print("CSV does not exist. Creating new DataFrame.")
|
163 |
df = pd.DataFrame(columns=flat_columns)
|
164 |
|
165 |
+
# Create new row with None
|
166 |
new_row = pd.Series([None] * len(flat_columns), index=flat_columns)
|
167 |
new_row.iloc[0] = client_name
|
|
|
168 |
# Map Page 1-2 values
|
169 |
line_mapping = {
|
170 |
"Taxable Wages - Line 1": 0,
|
|
|
193 |
new_row["Rent/ Royalty (Schedule E) - Schedule 1, Line 5"] = schedule1_values[1] if schedule1_values[1] != '' else '0'
|
194 |
new_row["Other Income - Schedule 1, Line 8"] = schedule1_values[2] if schedule1_values[2] != '' else '0'
|
195 |
|
196 |
+
# Append and save
|
197 |
+
if not ((df == new_row.iloc[0]).all(axis=1)).any():
|
198 |
+
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
199 |
+
print(f"CSV rows after append: {len(df)}")
|
200 |
+
df.to_csv(csv_path, index=False)
|
201 |
+
print(f" Data saved to CSV: {csv_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
# Gradio UI
|
204 |
iface = gr.Interface(
|
|
|
256 |
|
257 |
|
258 |
iface.launch(share=True)
|
259 |
+
|