nonzeroexit commited on
Commit
81bcfb3
·
verified ·
1 Parent(s): 98a1e1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -28
app.py CHANGED
@@ -5,9 +5,11 @@ import pandas as pd
5
  from propy import AAComposition, Autocorrelation, CTD, PseudoAAC
6
  from sklearn.preprocessing import MinMaxScaler
7
 
 
8
  model = joblib.load("RF.joblib")
9
  scaler = joblib.load("norm (1).joblib")
10
 
 
11
  selected_features = [
12
  "_SolventAccessibilityC3", "_SecondaryStrC1", "_SecondaryStrC3", "_ChargeC1", "_PolarityC1",
13
  "_NormalizedVDWVC1", "_HydrophobicityC3", "_SecondaryStrT23", "_PolarizabilityD1001",
@@ -46,60 +48,66 @@ selected_features = [
46
 
47
 
48
  def extract_features(sequence):
 
49
  try:
 
50
  comp_features = AAComposition.CalculateAAComposition(sequence)
51
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
52
  ctd_features = CTD.CalculateCTD(sequence)
53
- pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
54
-
55
- # Combine all features into a dictionary
56
- all_features = {**comp_features, **auto_features, **ctd_features, **pseudo_features}
57
-
58
- # Convert to DataFrame
59
- all_features_df = pd.DataFrame([all_features])
60
 
61
- # Ensure all required features are present
62
- missing_features = [feat for feat in selected_features if feat not in all_features_df.columns]
63
- if missing_features:
64
- print(f"Warning: Missing features - {missing_features}")
65
- for feat in missing_features:
66
- all_features_df[feat] = 0 # Fill missing features with 0
67
 
68
- # Select only required features
 
69
  all_features_df = all_features_df[selected_features]
70
 
71
- # Normalize the features
 
72
  normalized_features = scaler.transform(all_features_df)
73
 
74
  return normalized_features
 
75
  except ZeroDivisionError:
76
- print("Error: Division by zero encountered in Moran autocorrelation calculation.")
77
- return None
 
 
 
78
  except Exception as e:
79
- print(f"Feature extraction error: {e}")
80
- return None
81
-
82
 
83
 
84
 
85
  def predict(sequence):
86
- """Predict if the sequence is an AMP or not."""
87
  features = extract_features(sequence)
88
- features = np.array(features).reshape(1, -1) # Reshape for a single sample
 
 
 
 
 
89
  prediction = model.predict(features)[0]
90
  probabilities = model.predict_proba(features)[0]
91
-
92
- prob_amp = probabilities[0]
93
- prob_non_amp = probabilities[1]
94
 
95
- return f"{prob_amp * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)" if prediction == 0 else f"{prob_non_amp * 100:.2f}% chance of being Non-AMP"
 
 
 
 
 
96
 
 
97
  iface = gr.Interface(
98
  fn=predict,
99
  inputs=gr.Textbox(label="Enter Protein Sequence"),
100
  outputs=gr.Label(label="Prediction"),
101
  title="AMP Classifier",
102
- description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
103
  )
104
 
105
- iface.launch(share=True)
 
5
  from propy import AAComposition, Autocorrelation, CTD, PseudoAAC
6
  from sklearn.preprocessing import MinMaxScaler
7
 
8
+ # Load the pre-trained model and scaler
9
  model = joblib.load("RF.joblib")
10
  scaler = joblib.load("norm (1).joblib")
11
 
12
+ # Define the list of selected features (IMPORTANT: Keep this consistent with training)
13
  selected_features = [
14
  "_SolventAccessibilityC3", "_SecondaryStrC1", "_SecondaryStrC3", "_ChargeC1", "_PolarityC1",
15
  "_NormalizedVDWVC1", "_HydrophobicityC3", "_SecondaryStrT23", "_PolarizabilityD1001",
 
48
 
49
 
50
  def extract_features(sequence):
51
+ """Extracts features from a protein sequence and returns them as a NumPy array."""
52
  try:
53
+ # Calculate features from different ProPy modules
54
  comp_features = AAComposition.CalculateAAComposition(sequence)
55
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
56
  ctd_features = CTD.CalculateCTD(sequence)
57
+ pseudo_features = PseudoAAC.GetAPseudoAAC(sequence) # Use default parameters
 
 
 
 
 
 
58
 
59
+ # Combine all features into a single dictionary
60
+ all_features = {**comp_features, **auto_features, **ctd_features, **pseudo_features}
61
+ #print(len(all_features)) # debugging
 
 
 
62
 
63
+ # Convert to DataFrame, selecting only the required features
64
+ all_features_df = pd.DataFrame([all_features])
65
  all_features_df = all_features_df[selected_features]
66
 
67
+
68
+ # Normalize the features using the pre-fitted scaler
69
  normalized_features = scaler.transform(all_features_df)
70
 
71
  return normalized_features
72
+
73
  except ZeroDivisionError:
74
+ print("Error: Division by zero encountered in feature calculation. Check your input sequence.")
75
+ return None # Or handle appropriately
76
+ except KeyError as e:
77
+ print(f"Error: Missing feature {e}. Check feature name consistency and ProPy version.")
78
+ return None # Or handle appropriately
79
  except Exception as e:
80
+ print(f"An unexpected error occurred during feature extraction: {e}")
81
+ return None # Or handle appropriately
 
82
 
83
 
84
 
85
  def predict(sequence):
86
+ """Predicts whether the input sequence is an AMP and returns the prediction."""
87
  features = extract_features(sequence)
88
+
89
+ # Check if feature extraction was successful
90
+ if features is None:
91
+ return "Error: Could not extract features. Please check the input sequence."
92
+
93
+ # No need to reshape here; extract_features already returns the correct shape
94
  prediction = model.predict(features)[0]
95
  probabilities = model.predict_proba(features)[0]
 
 
 
96
 
97
+ # Determine output string based on prediction
98
+ if prediction == 0:
99
+ return f"{probabilities[0] * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)"
100
+ else:
101
+ return f"{probabilities[1] * 100:.2f}% chance of being Non-AMP"
102
+
103
 
104
+ # Gradio interface setup
105
  iface = gr.Interface(
106
  fn=predict,
107
  inputs=gr.Textbox(label="Enter Protein Sequence"),
108
  outputs=gr.Label(label="Prediction"),
109
  title="AMP Classifier",
110
+ description="Enter an amino acid sequence (e.g., FLPVLAGGL) to predict whether it's an antimicrobial peptide (AMP) or not."
111
  )
112
 
113
+ iface.launch(share=True)