AMP-Classifier / app.py
nonzeroexit's picture
Update app.py
9748994 verified
raw
history blame
4.91 kB
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition, Autocorrelation, CTD, PseudoAAC
from sklearn.preprocessing import MinMaxScaler
model = joblib.load("RF.joblib")
scaler = joblib.load("norm (1).joblib")
selected_features = [
"_SolventAccessibilityC3", "_SecondaryStrC1", "_SecondaryStrC3", "_ChargeC1", "_PolarityC1",
"_NormalizedVDWVC1", "_HydrophobicityC3", "_SecondaryStrT23", "_PolarizabilityD1001",
"_PolarizabilityD2001", "_PolarizabilityD3001", "_SolventAccessibilityD1001",
"_SolventAccessibilityD2001", "_SolventAccessibilityD3001", "_SecondaryStrD1001",
"_SecondaryStrD1075", "_SecondaryStrD2001", "_SecondaryStrD3001", "_ChargeD1001",
"_ChargeD1025", "_ChargeD2001", "_ChargeD3075", "_ChargeD3100", "_PolarityD1001",
"_PolarityD1050", "_PolarityD2001", "_PolarityD3001", "_NormalizedVDWVD1001",
"_NormalizedVDWVD2001", "_NormalizedVDWVD2025", "_NormalizedVDWVD2050", "_NormalizedVDWVD3001",
"_HydrophobicityD1001", "_HydrophobicityD2001", "_HydrophobicityD3001", "_HydrophobicityD3025",
"A", "R", "D", "C", "E", "Q", "H", "I", "M", "P", "Y", "V",
"AR", "AV", "RC", "RL", "RV", "CR", "CC", "CL", "CK", "EE", "EI", "EL",
"HC", "IA", "IL", "IV", "LA", "LC", "LE", "LI", "LT", "LV", "KC", "MA",
"MS", "SC", "TC", "TV", "YC", "VC", "VE", "VL", "VK", "VV",
"MoreauBrotoAuto_FreeEnergy30", "MoranAuto_Hydrophobicity2", "MoranAuto_Hydrophobicity4",
"GearyAuto_Hydrophobicity20", "GearyAuto_Hydrophobicity24", "GearyAuto_Hydrophobicity26",
"GearyAuto_Hydrophobicity27", "GearyAuto_Hydrophobicity28", "GearyAuto_Hydrophobicity29",
"GearyAuto_Hydrophobicity30", "GearyAuto_AvFlexibility22", "GearyAuto_AvFlexibility26",
"GearyAuto_AvFlexibility27", "GearyAuto_AvFlexibility28", "GearyAuto_AvFlexibility29",
"GearyAuto_AvFlexibility30", "GearyAuto_Polarizability22", "GearyAuto_Polarizability24",
"GearyAuto_Polarizability25", "GearyAuto_Polarizability27", "GearyAuto_Polarizability28",
"GearyAuto_Polarizability29", "GearyAuto_Polarizability30", "GearyAuto_FreeEnergy24",
"GearyAuto_FreeEnergy25", "GearyAuto_FreeEnergy30", "GearyAuto_ResidueASA21",
"GearyAuto_ResidueASA22", "GearyAuto_ResidueASA23", "GearyAuto_ResidueASA24",
"GearyAuto_ResidueASA30", "GearyAuto_ResidueVol21", "GearyAuto_ResidueVol24",
"GearyAuto_ResidueVol25", "GearyAuto_ResidueVol26", "GearyAuto_ResidueVol28",
"GearyAuto_ResidueVol29", "GearyAuto_ResidueVol30", "GearyAuto_Steric18",
"GearyAuto_Steric21", "GearyAuto_Steric26", "GearyAuto_Steric27", "GearyAuto_Steric28",
"GearyAuto_Steric29", "GearyAuto_Steric30", "GearyAuto_Mutability23", "GearyAuto_Mutability25",
"GearyAuto_Mutability26", "GearyAuto_Mutability27", "GearyAuto_Mutability28",
"GearyAuto_Mutability29", "GearyAuto_Mutability30", "APAAC1", "APAAC4", "APAAC5",
"APAAC6", "APAAC8", "APAAC9", "APAAC12", "APAAC13", "APAAC15", "APAAC18", "APAAC19",
"APAAC24"
]
def extract_features(sequence):
try:
comp_features = AAComposition.CalculateAAComposition(sequence)
auto_features = Autocorrelation.CalculateAutoTotal(sequence)
ctd_features = CTD.CalculateCTD(sequence)
pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
# Combine all features into a dictionary
all_features = {**comp_features, **auto_features, **ctd_features, **pseudo_features}
# Convert to DataFrame
all_features_df = pd.DataFrame([all_features])
# Select only required features
all_features_df = all_features_df[selected_features]
# Ensure correct shape
if all_features_df.shape[0] == 0:
raise ValueError("No features extracted. Check the input sequence.")
# Normalize the features
normalized_features = scaler.transform(all_features_df)
return normalized_features
except ZeroDivisionError:
print("Error: Division by zero encountered in Moran autocorrelation calculation.")
return None
except Exception as e:
print(f"Feature extraction error: {e}")
return None
def predict(sequence):
"""Predict if the sequence is an AMP or not."""
features = extract_features(sequence)
prediction = model.predict(features)[0]
probabilities = model.predict_proba(features)[0]
prob_amp = probabilities[0]
prob_non_amp = probabilities[1]
return f"{prob_amp * 100:.2f}% chance of being an Antimicrobial Peptide (AMP)" if prediction == 0 else f"{prob_non_amp * 100:.2f}% chance of being Non-AMP"
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="Enter Protein Sequence"),
outputs=gr.Label(label="Prediction"),
title="AMP Classifier",
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
)
iface.launch(share=True)