File size: 3,900 Bytes
741f7a3
 
942bf87
51a3749
ea9a1bf
3a814dc
51a3749
 
741f7a3
 
 
 
76a754e
51159d5
942bf87
8efdc57
dc9275e
11e1095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc9275e
741f7a3
 
 
 
 
3b84715
dc9275e
8efdc57
741f7a3
8efdc57
741f7a3
8efdc57
 
741f7a3
8efdc57
 
 
741f7a3
8efdc57
 
 
 
51159d5
8efdc57
 
 
 
741f7a3
 
 
3b84715
741f7a3
 
 
 
 
 
 
 
 
942bf87
741f7a3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler

# Initialize FastAPI app
app = FastAPI()

# Load trained SVM model and scaler
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")

# List of features used in your model
selected_features = [
    "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
    "AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
    "RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
    "NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
    "DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
    "CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
    "EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
    "QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
    "GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
    "HC", "HG", "HL", "HK", "HP",
    "IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
    "LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
    "KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
    "MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
    "FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
    "PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
    "SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
    "TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
    "WC",
    "YR", "YD", "YC", "YG", "YL", "YS", "YV",
    "VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
]

# Define request model
class SequenceRequest(BaseModel):
    sequence: str

# Feature extraction function
def extract_features(sequence):
    """Extract only the required features and normalize them."""
    # Compute all possible features
    all_features = AAComposition.CalculateAADipeptideComposition(sequence)
    # Extract the values from the dictionary
    feature_values = list(all_features.values())
    # Convert to NumPy array for normalization
    feature_array = np.array(feature_values).reshape(-1, 1)
    feature_array = feature_array[:420]
    # Min-Max Normalization
    normalized_features = scaler.transform(feature_array.T)
    # Reshape normalized_features back to a single dimension
    normalized_features = normalized_features.flatten()
    # Create a dictionary with selected features
    selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
                               if feature in all_features}
    # Convert dictionary to dataframe
    selected_feature_df = pd.DataFrame([selected_feature_dict])
    # Convert dataframe to numpy array
    selected_feature_array = selected_feature_df.T.to_numpy()
    return selected_feature_array

# Prediction endpoint
@app.post("/predict")
def predict(request: SequenceRequest):
    """Predict AMP vs Non-AMP"""
    try:
        # Extract features
        features = extract_features(request.sequence)
        # Make prediction
        prediction = model.predict(features.T)[0]
        # Return the result
        return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

# Root endpoint for health check
@app.get("/")
def read_root():
    return {"status": "OK"}