Spaces:

nonzeroexit
/

AMP-Classifier

Running

App Files Files Community

nonzeroexit commited on May 26

Commit

0c1f1e9

verified ·

1 Parent(s): 25d4105

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -1

app.py CHANGED Viewed

@@ -93,8 +93,89 @@ def predict(sequence):
         return f"{probabilities[0] * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)"
     else:
         return f"{probabilities[1] * 100:.2f}% chance of being Non-AMP"
 def predictmic(sequence):
-    features = 0
 # Gradio interface
 iface = gr.Interface(
     fn=predict,

         return f"{probabilities[0] * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)"
     else:
         return f"{probabilities[1] * 100:.2f}% chance of being Non-AMP"
 def predictmic(sequence):
+    import torch
+    from transformers import BertTokenizer, BertModel
+    import numpy as np
+    import pickle
+    from math import expm1
+    # === Load ProtBert model ===
+    tokenizer = BertTokenizer.from_pretrained("Rostlab/prot_bert", do_lower_case=False)
+    model = BertModel.from_pretrained("Rostlab/prot_bert")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = model.to(device).eval()
+    # === Preprocess input sequence ===
+    sequence = ''.join([aa for aa in sequence.upper() if aa in "ACDEFGHIKLMNPQRSTVWY"])
+    if len(sequence) < 10:
+        return {"Error": "Sequence too short or invalid. Must contain at least 10 valid amino acids."}
+    # === Tokenize & embed using mean pooling ===
+    seq_spaced = ' '.join(list(sequence))
+    tokens = tokenizer(seq_spaced, return_tensors="pt", padding='max_length', truncation=True, max_length=512)
+    tokens = {k: v.to(device) for k, v in tokens.items()}
+    with torch.no_grad():
+        outputs = model(**tokens)
+        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy().reshape(1, -1)  # Shape: (1, 1024)
+    # === MIC models and scalers for each bacterium ===
+    bacteria_config = {
+        "E.coli": {
+            "model": "coli_xgboost_model.pkl",
+            "scaler": "coli_scaler.pkl",
+            "pca": None
+        },
+        "S.aureus": {
+            "model": "aur_xgboost_model.pkl",
+            "scaler": "aur_scaler.pkl",
+            "pca": None
+        },
+        "P.aeruginosa": {
+            "model": "arg_xgboost_model.pkl",
+            "scaler": "arg_scaler.pkl",
+            "pca": None
+        },
+        "K.Pneumonia": {
+            "model": "pne_mlp_model.pkl",
+            "scaler": "pne_scaler.pkl",
+            "pca": "pne_pca"
+        }
+    }
+    mic_results = {}
+    for bacterium, cfg in bacteria_config.items():
+        try:
+            # === Load scaler and transform ===
+            with open(cfg["scaler"], "rb") as f:
+                scaler = pickle.load(f)
+            scaled = scaler.transform(embedding)
+            # === Apply PCA if exists ===
+            if cfg["pca"] is not None:
+                with open(cfg["pca"], "rb") as f:
+                    pca = pickle.load(f)
+                transformed = pca.transform(scaled)
+            else:
+                transformed = scaled
+            # === Load model and predict ===
+            with open(cfg["model"], "rb") as f:
+                mic_model = pickle.load(f)
+            mic_log = mic_model.predict(transformed)[0]
+            mic = round(expm1(mic_log), 3)  # Inverse of log1p used in training
+            mic_results[bacterium] = mic
+        except Exception as e:
+            mic_results[bacterium] = f"Error: {str(e)}"
+    return mic_results
 # Gradio interface
 iface = gr.Interface(
     fn=predict,