nonzeroexit commited on
Commit
dc9275e
·
verified ·
1 Parent(s): 3b84715

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -6
app.py CHANGED
@@ -8,13 +8,50 @@ from sklearn.preprocessing import MinMaxScaler
8
  model = joblib.load("SVM.joblib")
9
  scaler = MinMaxScaler()
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def extract_features(sequence):
12
- """Calculate AAC, Dipeptide Composition, and normalize features."""
13
- # Calculate Amino Acid Composition (AAC) and convert to array
14
- aac = np.array(list(AAComposition.CalculateAADipeptideComposition(sequence)), dtype=float)
15
-
16
- # Normalize using the pre-trained scaler (Ensure the scaler is loaded correctly)
17
- normalized_features = scaler.fit_transform([aac]) # Don't use fit_transform(), only transform()
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  return normalized_features
20
 
 
8
  model = joblib.load("SVM.joblib")
9
  scaler = MinMaxScaler()
10
 
11
+
12
+ # List of features used in your model
13
+ selected_features = [
14
+ "A", "R", "N", "D", "C", "E", "Q", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V",
15
+ "AA", "AR", "AN", "AD", "AC", "AE", "AQ", "AG", "AI", "AL", "AK", "AF", "AP", "AS", "AT", "AY", "AV",
16
+ "RA", "RR", "RN", "RD", "RC", "RE", "RQ", "RG", "RH", "RI", "RL", "RK", "RM", "RF", "RS", "RT", "RY", "RV",
17
+ "NA", "NR", "ND", "NC", "NE", "NG", "NI", "NL", "NK", "NP",
18
+ "DA", "DR", "DN", "DD", "DC", "DE", "DQ", "DG", "DI", "DL", "DK", "DP", "DS", "DT", "DV",
19
+ "CA", "CR", "CN", "CD", "CC", "CE", "CG", "CH", "CI", "CL", "CK", "CF", "CP", "CS", "CT", "CY", "CV",
20
+ "EA", "ER", "EN", "ED", "EC", "EE", "EQ", "EG", "EI", "EL", "EK", "EP", "ES", "ET", "EV",
21
+ "QA", "QR", "QC", "QG", "QL", "QK", "QP", "QT", "QV",
22
+ "GA", "GR", "GD", "GC", "GE", "GQ", "GG", "GI", "GL", "GK", "GF", "GP", "GS", "GW", "GY", "GV",
23
+ "HC", "HG", "HL", "HK", "HP",
24
+ "IA", "IR", "ID", "IC", "IE", "II", "IL", "IK", "IF", "IP", "IS", "IT", "IV",
25
+ "LA", "LR", "LN", "LD", "LC", "LE", "LQ", "LG", "LI", "LL", "LK", "LM", "LF", "LP", "LS", "LT", "LV",
26
+ "KA", "KR", "KN", "KD", "KC", "KE", "KQ", "KG", "KH", "KI", "KL", "KK", "KM", "KF", "KP", "KS", "KT", "KV",
27
+ "MA", "ME", "MI", "ML", "MK", "MF", "MP", "MS", "MT", "MV",
28
+ "FR", "FC", "FQ", "FG", "FI", "FL", "FF", "FS", "FT", "FY", "FV",
29
+ "PA", "PR", "PD", "PC", "PE", "PG", "PL", "PK", "PS", "PV",
30
+ "SA", "SR", "SD", "SC", "SE", "SG", "SH", "SI", "SL", "SK", "SF", "SP", "SS", "ST", "SY", "SV",
31
+ "TA", "TR", "TN", "TC", "TE", "TG", "TI", "TL", "TK", "TF", "TP", "TS", "TT", "TV",
32
+ "WC",
33
+ "YR", "YD", "YC", "YG", "YL", "YS", "YV",
34
+ "VA", "VR", "VD", "VC", "VE", "VQ", "VG", "VI", "VL", "VK", "VP", "VS", "VT", "VY", "VV"
35
+ ]
36
+
37
  def extract_features(sequence):
38
+ """Extract only the required features and normalize them."""
39
+ # Compute all possible features
40
+ aac = propy.AAComposition.CalculateAAC(sequence) # Amino Acid Composition
41
+ dipeptide_comp = propy.AAComposition.CalculateAADipeptideComposition(sequence) # Dipeptide Composition
42
+
43
+ # Combine both feature sets
44
+ all_features = {**aac, **dipeptide_comp}
45
+
46
+ # Extract only the selected features
47
+ selected_feature_values = [all_features[feature] for feature in selected_features if feature in all_features]
48
+
49
+ # Convert to NumPy array for normalization
50
+ feature_array = np.array(selected_feature_values).reshape(1, -1)
51
+
52
+ # Min-Max Normalization
53
+ scaler = MinMaxScaler()
54
+ normalized_features = scaler.fit_transform(feature_array).flatten()
55
 
56
  return normalized_features
57