nonzeroexit commited on
Commit
47bb3e1
·
verified ·
1 Parent(s): e199881

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -45,11 +45,8 @@ selected_features = [
45
 
46
  def extract_features(sequence):
47
  aa_features = AAComposition.CalculateAADipeptideComposition(sequence)
48
-
49
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
50
-
51
  ctd_features = CTD.CalculateCTD(sequence)
52
-
53
  pseaac_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
54
 
55
  all_features = {**aa_features, **auto_features, **ctd_features, **pseaac_features}
@@ -57,7 +54,23 @@ def extract_features(sequence):
57
  # Convert to DataFrame
58
  feature_df = pd.DataFrame([all_features])
59
 
60
- # Select features that match training data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  feature_df = feature_df[selected_features]
62
 
63
  # Normalize
@@ -65,6 +78,7 @@ def extract_features(sequence):
65
 
66
  return normalized_features
67
 
 
68
  def predict(sequence):
69
  """Predict if the sequence is an AMP or not."""
70
  features = extract_features(sequence)
 
45
 
46
  def extract_features(sequence):
47
  aa_features = AAComposition.CalculateAADipeptideComposition(sequence)
 
48
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
 
49
  ctd_features = CTD.CalculateCTD(sequence)
 
50
  pseaac_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
51
 
52
  all_features = {**aa_features, **auto_features, **ctd_features, **pseaac_features}
 
54
  # Convert to DataFrame
55
  feature_df = pd.DataFrame([all_features])
56
 
57
+ print("Extracted Features:", feature_df.columns.tolist()) # Debugging line
58
+
59
+ # Ensure all selected features are present
60
+ missing_features = [f for f in selected_features if f not in feature_df.columns]
61
+ extra_features = [f for f in feature_df.columns if f not in selected_features]
62
+
63
+ if missing_features:
64
+ print(f"Missing Features ({len(missing_features)}):", missing_features)
65
+
66
+ if extra_features:
67
+ print(f"Extra Features ({len(extra_features)}):", extra_features)
68
+
69
+ # Fix missing columns by adding them with default values (0)
70
+ for feature in missing_features:
71
+ feature_df[feature] = 0
72
+
73
+ # Select only the required features
74
  feature_df = feature_df[selected_features]
75
 
76
  # Normalize
 
78
 
79
  return normalized_features
80
 
81
+
82
  def predict(sequence):
83
  """Predict if the sequence is an AMP or not."""
84
  features = extract_features(sequence)