File size: 3,827 Bytes
8efdc57
 
3b84715
942bf87
51a3749
ea9a1bf
3a814dc
51a3749
 
8efdc57
76a754e
51159d5
942bf87
dc9275e
8efdc57
dc9275e
11e1095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc9275e
3b84715
dc9275e
8efdc57
 
 
 
 
 
 
 
 
 
 
 
dc9275e
8efdc57
 
 
dc9275e
8efdc57
51159d5
8efdc57
 
 
 
 
 
 
d5efa2c
3b84715
 
 
cf1d474
8efdc57
2efd314
8efdc57
3b84715
 
 
 
 
8efdc57
3b84715
942bf87
8efdc57
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
I need to link the backend to frontend

import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler

# Load trained SVM model and scaler (Ensure both files exist in the Space)
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")


# List of features used in your model
selected_features = [
    "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
    "AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
    "RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
    "NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
    "DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
    "CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
    "EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
    "QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
    "GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
    "HC", "HG", "HL", "HK", "HP",
    "IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
    "LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
    "KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
    "MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
    "FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
    "PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
    "SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
    "TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
    "WC",
    "YR", "YD", "YC", "YG", "YL", "YS", "YV",
    "VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]

def extract_features(sequence):
    """Extract only the required features and normalize them."""
    # Compute all possible features
    all_features = AAComposition.CalculateAADipeptideComposition(sequence)  # Amino Acid Composition
    # Extract the values from the dictionary
    feature_values = list(all_features.values())  # Extract values only
    # Convert to NumPy array for normalization
    feature_array = np.array(feature_values).reshape(-1, 1)
    feature_array = feature_array[: 420]
    # Min-Max Normalization
    normalized_features = scaler.transform(feature_array.T)

    # Reshape normalized_features back to a single dimension
    normalized_features = normalized_features.flatten()  # Flatten array

    # Create a dictionary with selected features
    selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
                               if feature in all_features}

    # Convert dictionary to dataframe
    selected_feature_df = pd.DataFrame([selected_feature_dict])

    # Convert dataframe to numpy array
    selected_feature_array = selected_feature_df.T.to_numpy()

    return selected_feature_array



def predict(sequence):
    """Predict AMP vs Non-AMP"""
    features = extract_features(sequence)
    prediction = model.predict(features.T)[0]
    return "AMP" if prediction == 0 else "Non-AMP"

# Create Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="Enter Protein Sequence"),
    outputs=gr.Label(label="Prediction"),
    title="AMP Classifier",
    description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
)

# Launch app
iface.launch(share=True)