nonzeroexit commited on
Commit
fb0b33c
·
verified ·
1 Parent(s): 10a71e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -20
app.py CHANGED
@@ -46,20 +46,15 @@ selected_features = [
46
  ]
47
 
48
  def extract_features(sequence):
49
- """Extract selected features, ensure order matches trained features, and normalize them."""
50
- if len(sequence) <= 9:
51
- return "Error: Protein sequence must be longer than 9 amino acids to extract features (for lamda=9)."
52
 
53
  all_features_dict = {}
54
 
55
  # Calculate all dipeptide features
56
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
57
-
58
- # Add only the first 420 features to the dictionary
59
- first_420_keys = list(dipeptide_features.keys())[:420] # Get the first 420 keys
60
- filtered_dipeptide_features = {key: dipeptide_features[key] for key in first_420_keys}
61
-
62
- all_features_dict.update(filtered_dipeptide_features)
63
 
64
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
65
  all_features_dict.update(auto_features)
@@ -70,20 +65,23 @@ def extract_features(sequence):
70
  pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
71
  all_features_dict.update(pseudo_features)
72
 
73
- feature_values = list(all_features_dict.values())
74
- feature_array = np.array(feature_values).reshape(-1, 1) # Reshape to (1, n_features) - CORRECT SHAPE
75
- print(f"Shape of feature_array before normalization: {feature_array.shape}") # Debug print
 
 
 
 
 
76
 
77
- normalized_features = scaler.transform(feature_array.T) # Normalize - NO TRANSPOSE
78
- normalized_features = normalized_features.flatten() # Flatten AFTER normalization if needed
79
 
 
80
 
81
- selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features) if feature in all_features_dict}
82
 
83
- selected_feature_df = pd.DataFrame([selected_feature_dict])
84
- selected_feature_array = selected_feature_df.T.to_numpy()
85
 
86
- return selected_feature_array
87
 
88
 
89
  def predict(sequence):
@@ -92,8 +90,8 @@ def predict(sequence):
92
  if isinstance(features, str) and features.startswith("Error:"):
93
  return features
94
 
95
- prediction = model.predict(features.T)[0]
96
- probabilities = model.predict_proba(features.T)[0]
97
 
98
  if prediction == 0:
99
  return f"{probabilities[0] * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)"
 
46
  ]
47
 
48
  def extract_features(sequence):
 
 
 
49
 
50
  all_features_dict = {}
51
 
52
  # Calculate all dipeptide features
53
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
54
+
55
+ # Add all dipeptide features
56
+ all_features_dict.update(dipeptide_features)
57
+
 
 
58
 
59
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
60
  all_features_dict.update(auto_features)
 
65
  pseudo_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
66
  all_features_dict.update(pseudo_features)
67
 
68
+ # Convert feature dictionary to DataFrame, handling missing features
69
+ feature_df = pd.DataFrame([all_features_dict])
70
+
71
+ # Select features and handle missing columns
72
+ feature_df_selected = feature_df[selected_features].copy() # Use .copy() to avoid SettingWithCopyWarning
73
+
74
+ # Fill missing features with 0 (or another appropriate value)
75
+ feature_df_selected = feature_df_selected.fillna(0)
76
 
 
 
77
 
78
+ feature_array = feature_df_selected.values # Get numpy array directly
79
 
 
80
 
81
+ # Normalize the features
82
+ normalized_features = scaler.transform(feature_array)
83
 
84
+ return normalized_features
85
 
86
 
87
  def predict(sequence):
 
90
  if isinstance(features, str) and features.startswith("Error:"):
91
  return features
92
 
93
+ prediction = model.predict(features)[0]
94
+ probabilities = model.predict_proba(features)[0]
95
 
96
  if prediction == 0:
97
  return f"{probabilities[0] * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)"