File size: 2,136 Bytes
3b84715
942bf87
51a3749
ea9a1bf
3a814dc
51a3749
2efd314
 
51a3749
2efd314
76a754e
51159d5
942bf87
2efd314
 
dc9275e
2efd314
dc9275e
2efd314
 
dc9275e
3b84715
dc9275e
2efd314
 
 
 
dc9275e
2efd314
 
 
 
 
dc9275e
51159d5
2efd314
d5efa2c
3b84715
 
 
cf1d474
2efd314
 
 
 
 
 
 
 
 
3b84715
2efd314
3b84715
 
 
 
 
2efd314
3b84715
942bf87
2efd314
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
import joblib
import numpy as np
import pandas as pd
from propy import AAComposition
from sklearn.preprocessing import MinMaxScaler
from fastapi import FastAPI
from gradio.routes import mount_gradio_app

# Load trained SVM model and scaler
model = joblib.load("SVM.joblib")
scaler = joblib.load("norm.joblib")

# FastAPI instance
app = FastAPI()

# Selected features used in the model
selected_features = [
    "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"
]  # (Shortened for brevity)

def extract_features(sequence):
    """Extract only the required features and normalize them."""
    all_features = AAComposition.CalculateAADipeptideComposition(sequence)
    feature_values = list(all_features.values())
    feature_array = np.array(feature_values).reshape(-1, 1)[:420]  # Ensure correct shape
    normalized_features = scaler.transform(feature_array.T).flatten()

    selected_feature_dict = {
        feature: normalized_features[i]
        for i, feature in enumerate(selected_features)
        if feature in all_features
    }

    selected_feature_df = pd.DataFrame([selected_feature_dict])
    return selected_feature_df.to_numpy()

def predict(sequence):
    """Predict AMP vs Non-AMP"""
    features = extract_features(sequence)
    prediction = model.predict(features.T)[0]
    return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}

# FastAPI endpoint
@app.post("/predict/")
async def predict_api(request: dict):
    sequence = request.get("sequence", "")
    if not sequence or len(sequence) < 10 or len(sequence) > 100:
        return {"error": "Sequence length must be between 10 and 100."}
    return predict(sequence)

# Gradio Interface (optional if you want UI access)
iface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(label="Enter Protein Sequence"),
    outputs=gr.Label(label="Prediction"),
    title="AMP Classifier",
    description="Enter an amino acid sequence to predict AMP or Non-AMP."
)

app = mount_gradio_app(app, iface, path="/gradio")

# Run the server with: `uvicorn filename:app --host 0.0.0.0 --port 7860`