Spaces:
Running
Running
File size: 3,713 Bytes
3b84715 942bf87 51a3749 ea9a1bf 3a814dc 51a3749 2efd314 51a3749 2efd314 76a754e 51159d5 942bf87 2efd314 dc9275e 2efd314 dc9275e 11e1095 dc9275e 3b84715 dc9275e 2efd314 dc9275e 2efd314 dc9275e 51159d5 2efd314 d5efa2c 3b84715 cf1d474 2efd314 3b84715 2efd314 3b84715 2efd314 3b84715 942bf87 2efd314 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler
from fastapi import FastAPI
from gradio.routes import mount_gradio_app
# Load trained SVM model and scaler
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")
# FastAPI instance
app = FastAPI()
# Selected features used in the model
selected_features = [
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
"HC", "HG", "HL", "HK", "HP",
"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
"WC",
"YR", "YD", "YC", "YG", "YL", "YS", "YV",
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]
def extract_features(sequence):
"""Extract only the required features and normalize them."""
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
feature_values = list(all_features.values())
feature_array = np.array(feature_values).reshape(-1, 1)[:420] # Ensure correct shape
normalized_features = scaler.transform(feature_array.T).flatten()
selected_feature_dict = {
feature: normalized_features[i]
for i, feature in enumerate(selected_features)
if feature in all_features
}
selected_feature_df = pd.DataFrame([selected_feature_dict])
return selected_feature_df.to_numpy()
def predict(sequence):
"""Predict AMP vs Non-AMP"""
features = extract_features(sequence)
prediction = model.predict(features.T)[0]
return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}
# FastAPI endpoint
@app.post("/predict/")
async def predict_api(request: dict):
sequence = request.get("sequence", "")
if not sequence or len(sequence) < 10 or len(sequence) > 100:
return {"error": "Sequence length must be between 10 and 100."}
return predict(sequence)
# Gradio Interface (optional if you want UI access)
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="Enter Protein Sequence"),
outputs=gr.Label(label="Prediction"),
title="AMP Classifier",
description="Enter an amino acid sequence to predict AMP or Non-AMP."
)
app = mount_gradio_app(app, iface, path="/gradio")
# Run the server with: `uvicorn filename:app --host 0.0.0.0 --port 7860`
|