nonzeroexit commited on
Commit
aa6838a
·
verified ·
1 Parent(s): a359627

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -19
app.py CHANGED
@@ -46,42 +46,41 @@ selected_features = [
46
  ]
47
 
48
  def extract_features(sequence):
 
49
  if len(sequence) < 3: # Ensure sequence is long enough
50
  return None # Return None if sequence is too short
51
 
 
 
52
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
53
- auto_features = Autocorrelation.CalculateAutoTotal(sequence)
54
- ctd_features = CTD.CalculateCTD(sequence)
55
 
56
- try:
57
- pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
58
- except ZeroDivisionError:
59
- pseudo_features = {} # Ignore PseudoAAC features if they fail
60
 
61
- all_features = {**auto_features, **ctd_features, **pseudo_features, **dipeptide_features}
 
62
 
63
- # Ensure we only keep features that were used during scaler training
64
- feature_names = list(all_features.keys()) # Extracted feature names
65
- feature_values = np.array(list(all_features.values())).reshape(1, -1) # Reshape for scaler
66
 
67
- if feature_values.shape[1] != 145: # Check expected feature count
68
- print(f"Warning: Extracted {feature_values.shape[1]} features, expected 145. Skipping normalization.")
69
- return None # Skip this sequence
70
 
71
- # Normalize the feature values
72
- normalized_features = scaler.transform(feature_values)
 
73
  normalized_features = normalized_features.flatten()
74
 
75
- selected_feature_dict = {feature_names[i]: normalized_features[i] for i in range(len(feature_names))}
 
 
 
 
76
  selected_feature_df = pd.DataFrame([selected_feature_dict])
77
  selected_feature_array = selected_feature_df.T.to_numpy()
78
 
79
  return selected_feature_array
80
 
81
 
82
-
83
-
84
-
85
  def predict(sequence):
86
  """Predicts whether the input sequence is an AMP."""
87
  features = extract_features(sequence)
 
46
  ]
47
 
48
  def extract_features(sequence):
49
+ """Extract selected features and normalize them."""
50
  if len(sequence) < 3: # Ensure sequence is long enough
51
  return None # Return None if sequence is too short
52
 
53
+ all_features_dict = {}
54
+
55
  dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
56
+ all_features_dict.update(dipeptide_features) # Use update instead of reassignment
 
57
 
58
+ auto_features = Autocorrelation.CalculateAutoTotal(sequence)
59
+ all_features_dict.update(auto_features) # Use update
 
 
60
 
61
+ ctd_features = CTD.CalculateCTD(sequence)
62
+ all_features_dict.update(ctd_features) # Use update
63
 
64
+ pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
65
+ all_features_dict.update(pseudo_features) # Use update
 
66
 
 
 
 
67
 
68
+ feature_values = list(all_features_dict.values()) # Use all_features_dict
69
+ feature_array = np.array(feature_values).reshape(-1, 1)
70
+ normalized_features = scaler.transform(feature_array.T)
71
  normalized_features = normalized_features.flatten()
72
 
73
+ selected_feature_dict = {}
74
+ for i, feature in enumerate(selected_features):
75
+ if feature in all_features_dict: # Use all_features_dict
76
+ selected_feature_dict[feature] = normalized_features[i]
77
+
78
  selected_feature_df = pd.DataFrame([selected_feature_dict])
79
  selected_feature_array = selected_feature_df.T.to_numpy()
80
 
81
  return selected_feature_array
82
 
83
 
 
 
 
84
  def predict(sequence):
85
  """Predicts whether the input sequence is an AMP."""
86
  features = extract_features(sequence)