File size: 3,945 Bytes
85c36de
942bf87
51a3749
ea9a1bf
3a814dc
51a3749
 
85c36de
76a754e
51159d5
942bf87
85c36de
8efdc57
dc9275e
11e1095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc9275e
3b84715
dc9275e
8efdc57
85c36de
8efdc57
85c36de
8efdc57
 
85c36de
8efdc57
 
85c36de
8efdc57
85c36de
 
8efdc57
 
 
85c36de
8efdc57
51159d5
85c36de
8efdc57
 
85c36de
8efdc57
 
85c36de
 
 
3b84715
85c36de
 
c9a939f
 
 
85c36de
 
 
 
 
 
 
 
 
 
 
bc0eb4e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler

# Load trained SVM model and scaler (Ensure both files exist in the Space)
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")


# List of features used in your model
selected_features = [
    "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
    "AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
    "RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
    "NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
    "DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
    "CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
    "EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
    "QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
    "GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
    "HC", "HG", "HL", "HK", "HP",
    "IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
    "LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
    "KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
    "MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
    "FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
    "PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
    "SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
    "TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
    "WC",
    "YR", "YD", "YC", "YG", "YL", "YS", "YV",
    "VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]

def extract_features(sequence):
    """Extract only the required features and normalize them."""
    # Compute all possible features
    all_features = AAComposition.CalculateAADipeptideComposition(sequence)  # Amino Acid Composition
    # Extract the values from the dictionary
    feature_values = list(all_features.values())  # Extract values only
    # Convert to NumPy array for normalization
    feature_array = np.array(feature_values).reshape(-1, 1)
    feature_array = feature_array[: 420]
    # Min-Max Normalization
    normalized_features = scaler.transform(feature_array.T)

    # Reshape normalized_features back to a single dimension
    normalized_features = normalized_features.flatten()  # Flatten array

    # Create a dictionary with selected features
    selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
                               if feature in all_features}

    # Convert dictionary to dataframe
    selected_feature_df = pd.DataFrame([selected_feature_dict])

    # Convert dataframe to numpy array
    selected_feature_array = selected_feature_df.T.to_numpy()

    return selected_feature_array



def predict(sequence):
    """Predict AMP vs Non-AMP"""
    features = extract_features(sequence)
    prediction = model.predict(features.T)[0]
    probabilities = model.predict_proba(features.T)

    return "Potential Bioactive Peptide with Antimicrobial Properties (P-AMP)" if prediction == 0 else "Likely Non-Antimicrobial Peptide", probabilities[0]

# Create Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="Enter Protein Sequence"),
    outputs=gr.Label(label="Prediction"),
    title="AMP Classifier",
    description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
)

# Launch app
iface.launch(share=True)