hellorahulk commited on
Commit
60e8b74
·
1 Parent(s): fdc859a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from zipfile import ZipFile
3
+
4
+ def zip_two_files(data1, data2):
5
+ with ZipFile('my_csvs.zip', 'w') as csv_zip:
6
+ csv_zip.writestr("primary_data.csv", data1.to_csv(index=False))
7
+ csv_zip.writestr("secondary_data.csv", data2.to_csv(index=False))
8
+ return 'my_csvs.zip'
9
+
10
+ def get_split(csv_file,target_columns,primary_cols,combination_of ):
11
+ df = pd.read_csv(csv_file.name, delimiter=",")
12
+ target_columns = [target_columns]
13
+ primary_cols = primary_cols.split(',') + target_columns
14
+ combination_of = combination_of.split(',')
15
+ secondary_cols = list(set(df.columns.tolist()) - set(primary_cols))
16
+
17
+
18
+
19
+ df["Comb"] = (
20
+ df[combination_of]
21
+ .astype(str)
22
+ .agg(lambda x: ",".join(x.values), axis=1)
23
+ .T
24
+ )
25
+
26
+ secondary_df = pd.DataFrame({'Id_Apres': range(1, len(df['Comb'].unique())+1), 'Comb': df['Comb'].unique()})
27
+ secondary_df = secondary_df.merge(df[['Comb']+secondary_cols], on=['Comb']).drop_duplicates(subset=['Comb']).drop(columns=['Comb'])
28
+ secondary_df = secondary_df.reset_index(drop=True)
29
+
30
+ primary_df = df.merge(secondary_df, on=combination_of).drop(columns=combination_of)
31
+ primary_df = primary_df[primary_cols+['Id_Apres']]
32
+ primary_df = primary_df.reset_index()
33
+
34
+ return zip_two_files(primary_df,secondary_df)
35
+
36
+
37
+
38
+ iface = gr.Interface(fn = get_split,
39
+ inputs = [
40
+ gr.inputs.File(label='CSV file') ,
41
+ gr.inputs.Textbox(label='Target Column') ,
42
+ gr.inputs.Textbox(label='Primary Column') ,
43
+ gr.inputs.Textbox(label='Combination of Column')
44
+ ],
45
+ outputs = [
46
+ #gr.outputs.Dataframe(label='Primary data'),
47
+ #gr.outputs.Dataframe(label='Secondary data'),
48
+ 'file'
49
+ ],
50
+ title = 'Data Splitter ',
51
+ description="Split your data into 2 parts")
52
+
53
+ iface.launch( debug=True)