Spaces:
Sleeping
Sleeping
| import numpy as np | |
| def standardize(df, get_columns): | |
| cols_to_change = ['sasa', 'domaindistance3D', 'disulfide', 'intMet', 'intramembrane', | |
| 'naturalVariant', 'dnaBinding', 'activeSite', 'nucleotideBinding', | |
| 'lipidation', 'site', 'transmembrane', 'crosslink', 'mutagenesis', | |
| 'strand', 'helix', 'turn', 'metalBinding', 'repeat', 'caBinding', | |
| 'topologicalDomain', 'bindingSite', 'region', 'signalPeptide', | |
| 'modifiedResidue', 'zincFinger', 'motif', 'coiledCoil', 'peptide', | |
| 'transitPeptide', 'glycosylation', 'propeptide'] | |
| for col in cols_to_change: # because in the other ones, they are 3D distance. Here, no distance calculated. | |
| df[col] = 'nan' | |
| df = df[get_columns.columns] | |
| return df | |
| def finalTouch(data): | |
| for i in data.index: | |
| if '*' in data.at[i, 'sasa']: | |
| data.at[i, 'sasa'] = data.at[i, 'sasa'].split('*')[0] | |
| data.sasa = data.sasa.replace({'N/A': np.NaN}) | |
| data.replace({' N/A': np.NaN}, inplace=True) | |
| data.replace({'None': np.NaN, '':np.NaN}, inplace=True) | |
| data.sasa = data.sasa.astype(float) | |
| data = data.astype(str) | |
| for i in data.index: | |
| if float(data.at[i, 'sasa']) < 5: | |
| data.at[i, 'trsh4'] = 'core' | |
| elif float(data.at[i, 'sasa']) >= 5: | |
| data.at[i, 'trsh4'] = 'surface' | |
| elif data.at[i, 'sasa'] == 'nan': | |
| data.at[i, 'trsh4'] = 'nan' | |
| return data |