import gradio as gr import joblib import numpy as np import pandas as pd from propy import AAComposition from sklearn.preprocessing import MinMaxScaler from fastapi import FastAPI from gradio.routes import mount_gradio_app # Load trained SVM model and scaler model = joblib.load("SVM.joblib") scaler = joblib.load("norm.joblib") # FastAPI instance app = FastAPI() # Selected features used in the model selected_features = [ "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V" ] # (Shortened for brevity) def extract_features(sequence): """Extract only the required features and normalize them.""" all_features = AAComposition.CalculateAADipeptideComposition(sequence) feature_values = list(all_features.values()) feature_array = np.array(feature_values).reshape(-1, 1)[:420] # Ensure correct shape normalized_features = scaler.transform(feature_array.T).flatten() selected_feature_dict = { feature: normalized_features[i] for i, feature in enumerate(selected_features) if feature in all_features } selected_feature_df = pd.DataFrame([selected_feature_dict]) return selected_feature_df.to_numpy() def predict(sequence): """Predict AMP vs Non-AMP""" features = extract_features(sequence) prediction = model.predict(features.T)[0] return {"prediction": "AMP" if prediction == 0 else "Non-AMP"} # FastAPI endpoint @app.post("/predict/") async def predict_api(request: dict): sequence = request.get("sequence", "") if not sequence or len(sequence) < 10 or len(sequence) > 100: return {"error": "Sequence length must be between 10 and 100."} return predict(sequence) # Gradio Interface (optional if you want UI access) iface = gr.Interface( fn=predict, inputs=gr.Textbox(label="Enter Protein Sequence"), outputs=gr.Label(label="Prediction"), title="AMP Classifier", description="Enter an amino acid sequence to predict AMP or Non-AMP." ) app = mount_gradio_app(app, iface, path="/gradio") # Run the server with: `uvicorn filename:app --host 0.0.0.0 --port 7860`