nonzeroexit commited on
Commit
f3b700a
·
verified ·
1 Parent(s): fb0b33c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -49,13 +49,10 @@ def extract_features(sequence):
49
 
50
  all_features_dict = {}
51
 
52
- # Calculate all dipeptide features
53
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
54
-
55
- # Add all dipeptide features
56
  all_features_dict.update(dipeptide_features)
57
 
58
-
59
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
60
  all_features_dict.update(auto_features)
61
 
@@ -65,23 +62,20 @@ def extract_features(sequence):
65
  pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
66
  all_features_dict.update(pseudo_features)
67
 
68
- # Convert feature dictionary to DataFrame, handling missing features
69
- feature_df = pd.DataFrame([all_features_dict])
70
-
71
- # Select features and handle missing columns
72
- feature_df_selected = feature_df[selected_features].copy() # Use .copy() to avoid SettingWithCopyWarning
73
-
74
- # Fill missing features with 0 (or another appropriate value)
75
- feature_df_selected = feature_df_selected.fillna(0)
76
-
77
 
78
- feature_array = feature_df_selected.values # Get numpy array directly
 
 
79
 
 
 
 
 
80
 
81
- # Normalize the features
82
- normalized_features = scaler.transform(feature_array)
83
 
84
- return normalized_features
85
 
86
 
87
  def predict(sequence):
 
49
 
50
  all_features_dict = {}
51
 
52
+ # Calculate all features
53
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
 
 
54
  all_features_dict.update(dipeptide_features)
55
 
 
56
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
57
  all_features_dict.update(auto_features)
58
 
 
62
  pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
63
  all_features_dict.update(pseudo_features)
64
 
65
+ # Convert all features to DataFrame
66
+ feature_df_all = pd.DataFrame([all_features_dict])
 
 
 
 
 
 
 
67
 
68
+ # Normalize ALL features
69
+ normalized_feature_array = scaler.transform(feature_df_all.values) # Normalize the numpy array
70
+ normalized_feature_df = pd.DataFrame(normalized_feature_array, columns=feature_df_all.columns) # Convert back to DataFrame with original column names
71
 
72
+ # Select features AFTER normalization
73
+ feature_df_selected = normalized_feature_df[selected_features].copy()
74
+ feature_df_selected = feature_df_selected.fillna(0) # Fill missing if any after selection (though unlikely now)
75
+ feature_array = feature_df_selected.values
76
 
 
 
77
 
78
+ return feature_array
79
 
80
 
81
  def predict(sequence):