Spaces:

nonzeroexit
/

AMP-Classifier

Running

App Files Files Community

AMP-Classifier / app.py

nonzeroexit

Update app.py

11e1095 verified 5 months ago

raw

history blame

3.71 kB

	import gradio as gr
	import joblib
	import numpy as np
	import pandas as pd
	from propy import AAComposition
	from sklearn.preprocessing import MinMaxScaler
	from fastapi import FastAPI
	from gradio.routes import mount_gradio_app

	# Load trained SVM model and scaler
	model = joblib.load("SVM.joblib")
	scaler = joblib.load("norm.joblib")

	# FastAPI instance
	app = FastAPI()

	# Selected features used in the model
	selected_features = [
	"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
	"AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
	"RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
	"NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
	"DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
	"CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
	"EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
	"QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
	"GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
	"HC", "HG", "HL", "HK", "HP",
	"IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
	"LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
	"KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
	"MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
	"FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
	"PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
	"SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
	"TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
	"WC",
	"YR", "YD", "YC", "YG", "YL", "YS", "YV",
	"VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
	]

	def extract_features(sequence):
	"""Extract only the required features and normalize them."""
	all_features = AAComposition.CalculateAADipeptideComposition(sequence)
	feature_values = list(all_features.values())
	feature_array = np.array(feature_values).reshape(-1, 1)[:420] # Ensure correct shape
	normalized_features = scaler.transform(feature_array.T).flatten()

	selected_feature_dict = {
	feature: normalized_features[i]
	for i, feature in enumerate(selected_features)
	if feature in all_features
	}

	selected_feature_df = pd.DataFrame([selected_feature_dict])
	return selected_feature_df.to_numpy()

	def predict(sequence):
	"""Predict AMP vs Non-AMP"""
	features = extract_features(sequence)
	prediction = model.predict(features.T)[0]
	return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}

	# FastAPI endpoint
	@app.post("/predict/")
	async def predict_api(request: dict):
	sequence = request.get("sequence", "")
	if not sequence or len(sequence) < 10 or len(sequence) > 100:
	return {"error": "Sequence length must be between 10 and 100."}
	return predict(sequence)

	# Gradio Interface (optional if you want UI access)
	iface = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(label="Enter Protein Sequence"),
	outputs=gr.Label(label="Prediction"),
	title="AMP Classifier",
	description="Enter an amino acid sequence to predict AMP or Non-AMP."
	)

	app = mount_gradio_app(app, iface, path="/gradio")

	# Run the server with: `uvicorn filename:app --host 0.0.0.0 --port 7860`