nonzeroexit commited on
Commit
191df8f
·
verified ·
1 Parent(s): dba0066

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -15
app.py CHANGED
@@ -46,24 +46,36 @@ selected_features = [
46
  ]
47
 
48
  def extract_features(sequence):
49
-
50
- dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
51
 
52
- # 2. Calculate other features
53
- auto_features = Autocorrelation.CalculateAutoTotal(sequence)
54
- ctd_features = CTD.CalculateCTD(sequence)
55
- pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
56
- all_features = {**auto_features, **ctd_features, **pseudo_features,**dipeptide_features}
57
- all_features = list(all_features.values())
58
- all_features = np.array(all_features).reshape(-1, 1) #Correct shape
59
- normalized_features = scaler.transform(all_features.T)
60
- normalized_features = normalized_features.flatten()
 
61
 
62
- selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features) if feature in all_features}
63
- selected_feature_df = pd.DataFrame([selected_feature_dict])
64
- selected_feature_array = selected_feature_df.T.to_numpy()
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- return selected_feature_array
67
 
68
 
69
 
 
46
  ]
47
 
48
  def extract_features(sequence):
49
+ if len(sequence) < 2: # Ensure sequence is long enough
50
+ return None
51
 
52
+ try:
53
+ dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
54
+ auto_features = Autocorrelation.CalculateAutoTotal(sequence)
55
+ ctd_features = CTD.CalculateCTD(sequence)
56
+
57
+ # Check sequence length before APAAC
58
+ if len(sequence) > 2: # APAAC requires a longer sequence
59
+ pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
60
+ else:
61
+ pseudo_features = {}
62
 
63
+ all_features = {**auto_features, **ctd_features, **pseudo_features, **dipeptide_features}
64
+ all_features = list(all_features.values())
65
+ all_features = np.array(all_features).reshape(-1, 1) # Correct shape
66
+ normalized_features = scaler.transform(all_features.T)
67
+ normalized_features = normalized_features.flatten()
68
+
69
+ selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features) if feature in all_features}
70
+ selected_feature_df = pd.DataFrame([selected_feature_dict])
71
+ selected_feature_array = selected_feature_df.T.to_numpy()
72
+
73
+ return selected_feature_array
74
+
75
+ except Exception as e:
76
+ print(f"Feature extraction error: {e}")
77
+ return None # Return None if extraction fails
78
 
 
79
 
80
 
81