AMP-Classifier / app.py
nonzeroexit's picture
Update app.py
11e1095 verified
raw
history blame
3.71 kB
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler
from fastapi import FastAPI
from gradio.routes import mount_gradio_app
# Load trained SVM model and scaler
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")
# FastAPI instance
app = FastAPI()
# Selected features used in the model
selected_features = [
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
"HC", "HG", "HL", "HK", "HP",
"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
"WC",
"YR", "YD", "YC", "YG", "YL", "YS", "YV",
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]
def extract_features(sequence):
"""Extract only the required features and normalize them."""
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
feature_values = list(all_features.values())
feature_array = np.array(feature_values).reshape(-1, 1)[:420] # Ensure correct shape
normalized_features = scaler.transform(feature_array.T).flatten()
selected_feature_dict = {
feature: normalized_features[i]
for i, feature in enumerate(selected_features)
if feature in all_features
}
selected_feature_df = pd.DataFrame([selected_feature_dict])
return selected_feature_df.to_numpy()
def predict(sequence):
"""Predict AMP vs Non-AMP"""
features = extract_features(sequence)
prediction = model.predict(features.T)[0]
return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}
# FastAPI endpoint
@app.post("/predict/")
async def predict_api(request: dict):
sequence = request.get("sequence", "")
if not sequence or len(sequence) < 10 or len(sequence) > 100:
return {"error": "Sequence length must be between 10 and 100."}
return predict(sequence)
# Gradio Interface (optional if you want UI access)
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="Enter Protein Sequence"),
outputs=gr.Label(label="Prediction"),
title="AMP Classifier",
description="Enter an amino acid sequence to predict AMP or Non-AMP."
)
app = mount_gradio_app(app, iface, path="/gradio")
# Run the server with: `uvicorn filename:app --host 0.0.0.0 --port 7860`