nonzeroexit commited on
Commit
2efd314
·
verified ·
1 Parent(s): ea9a1bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -52
app.py CHANGED
@@ -4,80 +4,60 @@ import numpy as np
4
  import pandas as pd
5
  from propy import AAComposition
6
  from sklearn.preprocessing import MinMaxScaler
 
 
7
 
8
- # Load trained SVM model and scaler (Ensure both files exist in the Space)
9
  model = joblib.load("SVM.joblib")
10
  scaler = joblib.load("norm.joblib")
11
 
 
 
12
 
13
- # List of features used in your model
14
  selected_features = [
15
- "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
16
- "AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
17
- "RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
18
- "NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
19
- "DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
20
- "CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
21
- "EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
22
- "QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
23
- "GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
24
- "HC", "HG", "HL", "HK", "HP",
25
- "IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
26
- "LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
27
- "KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
28
- "MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
29
- "FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
30
- "PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
31
- "SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
32
- "TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
33
- "WC",
34
- "YR", "YD", "YC", "YG", "YL", "YS", "YV",
35
- "VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
36
- ]
37
 
38
  def extract_features(sequence):
39
  """Extract only the required features and normalize them."""
40
- # Compute all possible features
41
- all_features = AAComposition.CalculateAADipeptideComposition(sequence) # Amino Acid Composition
42
- # Extract the values from the dictionary
43
- feature_values = list(all_features.values()) # Extract values only
44
- # Convert to NumPy array for normalization
45
- feature_array = np.array(feature_values).reshape(-1, 1)
46
- feature_array = feature_array[: 420]
47
- # Min-Max Normalization
48
- normalized_features = scaler.transform(feature_array.T)
49
 
50
- # Reshape normalized_features back to a single dimension
51
- normalized_features = normalized_features.flatten() # Flatten array
 
 
 
52
 
53
- # Create a dictionary with selected features
54
- selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features)
55
- if feature in all_features}
56
-
57
- # Convert dictionary to dataframe
58
  selected_feature_df = pd.DataFrame([selected_feature_dict])
59
-
60
- # Convert dataframe to numpy array
61
- selected_feature_array = selected_feature_df.T.to_numpy()
62
-
63
- return selected_feature_array
64
-
65
-
66
 
67
  def predict(sequence):
68
  """Predict AMP vs Non-AMP"""
69
  features = extract_features(sequence)
70
  prediction = model.predict(features.T)[0]
71
- return "AMP" if prediction == 0 else "Non-AMP"
 
 
 
 
 
 
 
 
72
 
73
- # Create Gradio interface
74
  iface = gr.Interface(
75
  fn=predict,
76
  inputs=gr.Textbox(label="Enter Protein Sequence"),
77
  outputs=gr.Label(label="Prediction"),
78
  title="AMP Classifier",
79
- description="Enter an amino acid sequence to predict whether it's an antimicrobial peptide (AMP) or not."
80
  )
81
 
82
- # Launch app
83
- iface.launch(share=True)
 
 
4
  import pandas as pd
5
  from propy import AAComposition
6
  from sklearn.preprocessing import MinMaxScaler
7
+ from fastapi import FastAPI
8
+ from gradio.routes import mount_gradio_app
9
 
10
+ # Load trained SVM model and scaler
11
  model = joblib.load("SVM.joblib")
12
  scaler = joblib.load("norm.joblib")
13
 
14
+ # FastAPI instance
15
+ app = FastAPI()
16
 
17
+ # Selected features used in the model
18
  selected_features = [
19
+ "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"
20
+ ] # (Shortened for brevity)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def extract_features(sequence):
23
  """Extract only the required features and normalize them."""
24
+ all_features = AAComposition.CalculateAADipeptideComposition(sequence)
25
+ feature_values = list(all_features.values())
26
+ feature_array = np.array(feature_values).reshape(-1, 1)[:420] # Ensure correct shape
27
+ normalized_features = scaler.transform(feature_array.T).flatten()
 
 
 
 
 
28
 
29
+ selected_feature_dict = {
30
+ feature: normalized_features[i]
31
+ for i, feature in enumerate(selected_features)
32
+ if feature in all_features
33
+ }
34
 
 
 
 
 
 
35
  selected_feature_df = pd.DataFrame([selected_feature_dict])
36
+ return selected_feature_df.to_numpy()
 
 
 
 
 
 
37
 
38
  def predict(sequence):
39
  """Predict AMP vs Non-AMP"""
40
  features = extract_features(sequence)
41
  prediction = model.predict(features.T)[0]
42
+ return {"prediction": "AMP" if prediction == 0 else "Non-AMP"}
43
+
44
+ # FastAPI endpoint
45
+ @app.post("/predict/")
46
+ async def predict_api(request: dict):
47
+ sequence = request.get("sequence", "")
48
+ if not sequence or len(sequence) < 10 or len(sequence) > 100:
49
+ return {"error": "Sequence length must be between 10 and 100."}
50
+ return predict(sequence)
51
 
52
+ # Gradio Interface (optional if you want UI access)
53
  iface = gr.Interface(
54
  fn=predict,
55
  inputs=gr.Textbox(label="Enter Protein Sequence"),
56
  outputs=gr.Label(label="Prediction"),
57
  title="AMP Classifier",
58
+ description="Enter an amino acid sequence to predict AMP or Non-AMP."
59
  )
60
 
61
+ app = mount_gradio_app(app, iface, path="/gradio")
62
+
63
+ # Run the server with: `uvicorn filename:app --host 0.0.0.0 --port 7860`