Spaces:
Running
Running
File size: 3,566 Bytes
85c36de 942bf87 51a3749 ea9a1bf 3a814dc 51a3749 76a754e 51159d5 942bf87 85c36de dc9275e 11e1095 dc9275e 3b84715 dc9275e 5810e43 8efdc57 85c36de 8efdc57 85c36de 5810e43 85c36de 8efdc57 85c36de 51159d5 85c36de 8efdc57 85c36de 8efdc57 85c36de 3b84715 85c36de 5810e43 c9a939f 85c36de bc0eb4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")
selected_features = [
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
"HC", "HG", "HL", "HK", "HP",
"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
"WC",
"YR", "YD", "YC", "YG", "YL", "YS", "YV",
"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]
def extract_features(sequence):
"""Extract only the required features and normalize them."""
all_features = AAComposition.CalculateAADipeptideComposition(sequence)
feature_values = list(all_features.values())
feature_array = np.array(feature_values).reshape(-1, 1)
feature_array = feature_array[: 420]
normalized_features = scaler.transform(feature_array.T)
normalized_features = normalized_features.flatten()
selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
if feature in all_features}
selected_feature_df = pd.DataFrame([selected_feature_dict])
selected_feature_array = selected_feature_df.T.to_numpy()
return selected_feature_array
def predict(sequence):
"""Predict AMP vs Non-AMP"""
features = extract_features(sequence)
prediction = model.predict(features.T)[0]
probability_amp = model.predict_proba(features.T)
if prediction == 0:
prediction_label = "Potential Bioactive Peptide with Antimicrobial Properties (P-AMP)"
probability_amp = probabilities[0]
else:
prediction_label = "Likely Non-Antimicrobial Peptide"
probability_amp = probabilities[0]
return prediction_label, probability_amp
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(label="Enter Protein Sequence"),
outputs=gr.Label(label="Prediction"),
title="AMP Classifier",
description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
)
iface.launch(share=True) |