AMP-Classifier / app.py
nonzeroexit's picture
Update app.py
c9a939f verified
raw
history blame
3.95 kB
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler
# Load trained SVM model and scaler (Ensure both files exist in the Space)
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")
# List of features used in your model
selected_features = [
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
"HC", "HG", "HL", "HK", "HP",
"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
"WC",
"YR", "YD", "YC", "YG", "YL", "YS", "YV",
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]
def extract_features(sequence):
"""Extract only the required features and normalize them."""
# Compute all possible features
all_features = AAComposition.CalculateAADipeptideComposition(sequence) # Amino Acid Composition
# Extract the values from the dictionary
feature_values = list(all_features.values()) # Extract values only
# Convert to NumPy array for normalization
feature_array = np.array(feature_values).reshape(-1, 1)
feature_array = feature_array[: 420]
# Min-Max Normalization
normalized_features = scaler.transform(feature_array.T)
# Reshape normalized_features back to a single dimension
normalized_features = normalized_features.flatten() # Flatten array
# Create a dictionary with selected features
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
if feature in all_features}
# Convert dictionary to dataframe
selected_feature_df = pd.DataFrame([selected_feature_dict])
# Convert dataframe to numpy array
selected_feature_array = selected_feature_df.T.to_numpy()
return selected_feature_array
def predict(sequence):
"""Predict AMP vs Non-AMP"""
features = extract_features(sequence)
prediction = model.predict(features.T)[0]
probabilities = model.predict_proba(features.T)
return "Potential Bioactive Peptide with Antimicrobial Properties (P-AMP)" if prediction == 0 else "Likely Non-Antimicrobial Peptide", probabilities[0]
# Create Gradio interface
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="Enter Protein Sequence"),
outputs=gr.Label(label="Prediction"),
title="AMP Classifier",
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
)
# Launch app
iface.launch(share=True)