nonzeroexit commited on
Commit
a359627
·
verified ·
1 Parent(s): ccb89be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -54,18 +54,25 @@ def extract_features(sequence):
54
  ctd_features = CTD.CalculateCTD(sequence)
55
 
56
  try:
57
- pseudo_features = PseudoAAC.GetAPseudoAAC(sequence) # May cause ZeroDivisionError
58
  except ZeroDivisionError:
59
- pseudo_features = {} # Ignore if it fails
60
 
61
  all_features = {**auto_features, **ctd_features, **pseudo_features, **dipeptide_features}
62
- all_features = list(all_features.values())
63
- all_features = np.array(all_features).reshape(-1, 1) # Correct shape
64
 
65
- normalized_features = scaler.transform(all_features.T)
66
- normalized_features = normalized_features.flatten()
 
67
 
68
- selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features) if feature in all_features}
 
 
 
 
 
 
 
 
69
  selected_feature_df = pd.DataFrame([selected_feature_dict])
70
  selected_feature_array = selected_feature_df.T.to_numpy()
71
 
 
54
  ctd_features = CTD.CalculateCTD(sequence)
55
 
56
  try:
57
+ pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
58
  except ZeroDivisionError:
59
+ pseudo_features = {} # Ignore PseudoAAC features if they fail
60
 
61
  all_features = {**auto_features, **ctd_features, **pseudo_features, **dipeptide_features}
 
 
62
 
63
+ # Ensure we only keep features that were used during scaler training
64
+ feature_names = list(all_features.keys()) # Extracted feature names
65
+ feature_values = np.array(list(all_features.values())).reshape(1, -1) # Reshape for scaler
66
 
67
+ if feature_values.shape[1] != 145: # Check expected feature count
68
+ print(f"Warning: Extracted {feature_values.shape[1]} features, expected 145. Skipping normalization.")
69
+ return None # Skip this sequence
70
+
71
+ # Normalize the feature values
72
+ normalized_features = scaler.transform(feature_values)
73
+ normalized_features = normalized_features.flatten()
74
+
75
+ selected_feature_dict = {feature_names[i]: normalized_features[i] for i in range(len(feature_names))}
76
  selected_feature_df = pd.DataFrame([selected_feature_dict])
77
  selected_feature_array = selected_feature_df.T.to_numpy()
78