Spaces:
Runtime error
Runtime error
Commit
·
60e8b74
1
Parent(s):
fdc859a
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from zipfile import ZipFile
|
3 |
+
|
4 |
+
def zip_two_files(data1, data2):
|
5 |
+
with ZipFile('my_csvs.zip', 'w') as csv_zip:
|
6 |
+
csv_zip.writestr("primary_data.csv", data1.to_csv(index=False))
|
7 |
+
csv_zip.writestr("secondary_data.csv", data2.to_csv(index=False))
|
8 |
+
return 'my_csvs.zip'
|
9 |
+
|
10 |
+
def get_split(csv_file,target_columns,primary_cols,combination_of ):
|
11 |
+
df = pd.read_csv(csv_file.name, delimiter=",")
|
12 |
+
target_columns = [target_columns]
|
13 |
+
primary_cols = primary_cols.split(',') + target_columns
|
14 |
+
combination_of = combination_of.split(',')
|
15 |
+
secondary_cols = list(set(df.columns.tolist()) - set(primary_cols))
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
df["Comb"] = (
|
20 |
+
df[combination_of]
|
21 |
+
.astype(str)
|
22 |
+
.agg(lambda x: ",".join(x.values), axis=1)
|
23 |
+
.T
|
24 |
+
)
|
25 |
+
|
26 |
+
secondary_df = pd.DataFrame({'Id_Apres': range(1, len(df['Comb'].unique())+1), 'Comb': df['Comb'].unique()})
|
27 |
+
secondary_df = secondary_df.merge(df[['Comb']+secondary_cols], on=['Comb']).drop_duplicates(subset=['Comb']).drop(columns=['Comb'])
|
28 |
+
secondary_df = secondary_df.reset_index(drop=True)
|
29 |
+
|
30 |
+
primary_df = df.merge(secondary_df, on=combination_of).drop(columns=combination_of)
|
31 |
+
primary_df = primary_df[primary_cols+['Id_Apres']]
|
32 |
+
primary_df = primary_df.reset_index()
|
33 |
+
|
34 |
+
return zip_two_files(primary_df,secondary_df)
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
iface = gr.Interface(fn = get_split,
|
39 |
+
inputs = [
|
40 |
+
gr.inputs.File(label='CSV file') ,
|
41 |
+
gr.inputs.Textbox(label='Target Column') ,
|
42 |
+
gr.inputs.Textbox(label='Primary Column') ,
|
43 |
+
gr.inputs.Textbox(label='Combination of Column')
|
44 |
+
],
|
45 |
+
outputs = [
|
46 |
+
#gr.outputs.Dataframe(label='Primary data'),
|
47 |
+
#gr.outputs.Dataframe(label='Secondary data'),
|
48 |
+
'file'
|
49 |
+
],
|
50 |
+
title = 'Data Splitter ',
|
51 |
+
description="Split your data into 2 parts")
|
52 |
+
|
53 |
+
iface.launch( debug=True)
|