nonzeroexit commited on
Commit
a52fdd6
·
verified ·
1 Parent(s): 601b6fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -19
app.py CHANGED
@@ -51,9 +51,14 @@ def extract_features(sequence):
51
  return "Error: Protein sequence must be longer than 9 amino acids to extract features (for lamda=9)."
52
 
53
  all_features_dict = {}
54
-
 
55
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
56
- all_features_dict.update(dipeptide_features)
 
 
 
 
57
 
58
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
59
  all_features_dict.update(auto_features)
@@ -64,26 +69,20 @@ def extract_features(sequence):
64
  pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
65
  all_features_dict.update(pseudo_features)
66
 
67
- # Create an ordered list of feature values based on selected_features
68
- ordered_feature_values = []
69
- missing_features = []
70
- for feature_name in selected_features:
71
- if feature_name in all_features_dict:
72
- ordered_feature_values.append(all_features_dict[feature_name])
73
- else:
74
- missing_features.append(feature_name)
75
- ordered_feature_values.append(0) # Pad with 0 for missing features - important for consistent input size
76
-
77
- if missing_features:
78
- print(f"Warning: The following features were missing from extraction and padded with 0: {missing_features}")
79
-
80
-
81
- feature_array = np.array(ordered_feature_values).reshape(1, -1) # Reshape to (1, n_features) for single sample
82
 
83
- normalized_features = scaler.transform(feature_array) # Normalize the ordered feature array
 
 
 
84
 
 
 
85
 
86
- return normalized_features # Return the normalized features as a 2D numpy array
87
 
88
 
89
  def predict(sequence):
 
51
  return "Error: Protein sequence must be longer than 9 amino acids to extract features (for lamda=9)."
52
 
53
  all_features_dict = {}
54
+
55
+ # Calculate all dipeptide features
56
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
57
+
58
+ first_420_keys = list(dipeptide_features.keys())[:420]
59
+ filtered_dipeptide_features = {key: dipeptide_features[key] for key in first_420_keys}
60
+
61
+ all_features_dict.update(filtered_dipeptide_features)
62
 
63
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
64
  all_features_dict.update(auto_features)
 
69
  pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
70
  all_features_dict.update(pseudo_features)
71
 
72
+ feature_values = list(all_features_dict.values())
73
+ feature_array = np.array(feature_values).reshape(-1, 1)
74
+ normalized_features = scaler.transform(feature_array.T)
75
+ normalized_features = normalized_features.flatten()
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ selected_feature_dict = {}
78
+ for i, feature in enumerate(selected_features):
79
+ if feature in all_features_dict:
80
+ selected_feature_dict[feature] = normalized_features[i]
81
 
82
+ selected_feature_df = pd.DataFrame([selected_feature_dict])
83
+ selected_feature_array = selected_feature_df.T.to_numpy()
84
 
85
+ return selected_feature_array
86
 
87
 
88
  def predict(sequence):