nonzeroexit commited on
Commit
48574c5
·
verified ·
1 Parent(s): 68ded6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -46
app.py CHANGED
@@ -75,18 +75,6 @@ def extract_features(sequence):
75
  selected_df = normalized_df[selected_features].fillna(0)
76
  return selected_df.values
77
 
78
- # AMP Classifier
79
- def predict(sequence):
80
- features = extract_features(sequence)
81
- if isinstance(features, str):
82
- return features
83
- prediction = model.predict(features)[0]
84
- probabilities = model.predict_proba(features)[0]
85
- if prediction == 0:
86
- return f"{probabilities[0] * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)"
87
- else:
88
- return f"{probabilities[1] * 100:.2f}% chance of being Non-AMP"
89
-
90
  # MIC Predictor
91
  def predictmic(sequence):
92
  sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
@@ -99,37 +87,17 @@ def predictmic(sequence):
99
  outputs = protbert_model(**tokens)
100
  embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy().reshape(1, -1)
101
  bacteria_config = {
102
- "E.coli": {
103
- "model": "coli_xgboost_model.pkl",
104
- "scaler": "coli_scaler.pkl",
105
- "pca": None
106
- },
107
- "S.aureus": {
108
- "model": "aur_xgboost_model.pkl",
109
- "scaler": "aur_scaler.pkl",
110
- "pca": None
111
- },
112
- "P.aeruginosa": {
113
- "model": "arg_xgboost_model.pkl",
114
- "scaler": "arg_scaler.pkl",
115
- "pca": None
116
- },
117
- "K.Pneumonia": {
118
- "model": "pne_mlp_model.pkl",
119
- "scaler": "pne_scaler.pkl",
120
- "pca": "pne_pca.pkl"
121
- }
122
  }
123
  mic_results = {}
124
  for bacterium, cfg in bacteria_config.items():
125
  try:
126
  scaler = joblib.load(cfg["scaler"])
127
  scaled = scaler.transform(embedding)
128
- if cfg["pca"]:
129
- pca = joblib.load(cfg["pca"])
130
- transformed = pca.transform(scaled)
131
- else:
132
- transformed = scaled
133
  model = joblib.load(cfg["model"])
134
  mic_log = model.predict(transformed)[0]
135
  mic = round(expm1(mic_log), 3)
@@ -138,29 +106,29 @@ def predictmic(sequence):
138
  mic_results[bacterium] = f"Error: {str(e)}"
139
  return mic_results
140
 
141
- # Combined Prediction
142
  def full_prediction(sequence):
143
  features = extract_features(sequence)
144
  if isinstance(features, str):
145
- return "Error", "0%", {}
146
  prediction = model.predict(features)[0]
147
  probabilities = model.predict_proba(features)[0]
148
  amp_result = "Antimicrobial Peptide (AMP)" if prediction == 0 else "Non-AMP"
149
  confidence = round(probabilities[0 if prediction == 0 else 1] * 100, 2)
150
  mic_values = predictmic(sequence)
151
- return amp_result, f"{confidence}%", mic_values
 
 
 
152
 
153
- # Gradio Interface
154
  iface = gr.Interface(
155
  fn=full_prediction,
156
  inputs=gr.Textbox(label="Enter Protein Sequence"),
157
- outputs=[
158
- gr.Label(label="AMP Classification"),
159
- gr.Label(label="Confidence"),
160
- gr.JSON(label="Predicted MIC (µM) for Each Bacterium")
161
- ],
162
  title="AMP & MIC Predictor",
163
  description="Enter an amino acid sequence (≥10 valid letters) to predict AMP class and MIC values."
164
  )
165
 
166
  iface.launch(share=True)
 
 
75
  selected_df = normalized_df[selected_features].fillna(0)
76
  return selected_df.values
77
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  # MIC Predictor
79
  def predictmic(sequence):
80
  sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
 
87
  outputs = protbert_model(**tokens)
88
  embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy().reshape(1, -1)
89
  bacteria_config = {
90
+ "E.coli": {"model": "coli_xgboost_model.pkl", "scaler": "coli_scaler.pkl", "pca": None},
91
+ "S.aureus": {"model": "aur_xgboost_model.pkl", "scaler": "aur_scaler.pkl", "pca": None},
92
+ "P.aeruginosa": {"model": "arg_xgboost_model.pkl", "scaler": "arg_scaler.pkl", "pca": None},
93
+ "K.Pneumonia": {"model": "pne_mlp_model.pkl", "scaler": "pne_scaler.pkl", "pca": "pne_pca.pkl"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
95
  mic_results = {}
96
  for bacterium, cfg in bacteria_config.items():
97
  try:
98
  scaler = joblib.load(cfg["scaler"])
99
  scaled = scaler.transform(embedding)
100
+ transformed = joblib.load(cfg["pca"]).transform(scaled) if cfg["pca"] else scaled
 
 
 
 
101
  model = joblib.load(cfg["model"])
102
  mic_log = model.predict(transformed)[0]
103
  mic = round(expm1(mic_log), 3)
 
106
  mic_results[bacterium] = f"Error: {str(e)}"
107
  return mic_results
108
 
109
+ # Combined Output as Single String
110
  def full_prediction(sequence):
111
  features = extract_features(sequence)
112
  if isinstance(features, str):
113
+ return features
114
  prediction = model.predict(features)[0]
115
  probabilities = model.predict_proba(features)[0]
116
  amp_result = "Antimicrobial Peptide (AMP)" if prediction == 0 else "Non-AMP"
117
  confidence = round(probabilities[0 if prediction == 0 else 1] * 100, 2)
118
  mic_values = predictmic(sequence)
119
+ result = f"Prediction: {amp_result}\nConfidence: {confidence}%\n\nPredicted MIC Values (µM):\n"
120
+ for organism, mic in mic_values.items():
121
+ result += f"- {organism}: {mic}\n"
122
+ return result
123
 
124
+ # Gradio Interface (Single Label Output)
125
  iface = gr.Interface(
126
  fn=full_prediction,
127
  inputs=gr.Textbox(label="Enter Protein Sequence"),
128
+ outputs=gr.Textbox(label="AMP & MIC Prediction Summary"),
 
 
 
 
129
  title="AMP & MIC Predictor",
130
  description="Enter an amino acid sequence (≥10 valid letters) to predict AMP class and MIC values."
131
  )
132
 
133
  iface.launch(share=True)
134
+