nonzeroexit commited on
Commit
dba0066
·
verified ·
1 Parent(s): 248a61c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -31
app.py CHANGED
@@ -46,40 +46,25 @@ selected_features = [
46
  ]
47
 
48
  def extract_features(sequence):
49
- """Extracts features, aligns, and normalizes, prioritizing AADipeptide."""
50
- try:
51
- # 1. Calculate Dipeptide Composition (as per your request)
52
- dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
53
- dipeptide_values = list(dipeptide_features.values())
54
- dipeptide_array = np.array(dipeptide_values).reshape(1, -1) #Correct shape
55
 
56
- # 2. Calculate other features
57
- auto_features = Autocorrelation.CalculateAutoTotal(sequence)
58
- ctd_features = CTD.CalculateCTD(sequence)
59
- pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
60
- all_features = {**auto_features, **ctd_features, **pseudo_features,**dipeptide_features}
 
 
 
 
61
 
62
- # Create a DataFrame for ALL features
63
- all_features_df = pd.DataFrame([all_features])
 
64
 
65
- # --- Feature Selection and Alignment ---
66
- present_features = [col for col in selected_features if col in all_features_df.columns]
67
- selected_df = all_features_df[present_features]
68
- aligned_df = pd.DataFrame(columns=selected_features)
69
- aligned_df.update(selected_df)
70
- aligned_df = aligned_df.fillna(0)
71
-
72
-
73
- # Normalize
74
- normalized_features = scaler.transform(aligned_df)
75
- return normalized_features
76
-
77
- except (ZeroDivisionError, KeyError, TypeError, ValueError) as e:
78
- print(f"Error during feature extraction: {e}")
79
- return None
80
- except Exception as e:
81
- print(f"An unexpected error occurred: {e}")
82
- return None
83
 
84
 
85
  def predict(sequence):
 
46
  ]
47
 
48
  def extract_features(sequence):
49
+
50
+ dipeptide_features = AAComposition.CalculateAADipeptideComposition(sequence)
 
 
 
 
51
 
52
+ # 2. Calculate other features
53
+ auto_features = Autocorrelation.CalculateAutoTotal(sequence)
54
+ ctd_features = CTD.CalculateCTD(sequence)
55
+ pseudo_features = PseudoAAC.GetAPseudoAAC(sequence)
56
+ all_features = {**auto_features, **ctd_features, **pseudo_features,**dipeptide_features}
57
+ all_features = list(all_features.values())
58
+ all_features = np.array(all_features).reshape(-1, 1) #Correct shape
59
+ normalized_features = scaler.transform(all_features.T)
60
+ normalized_features = normalized_features.flatten()
61
 
62
+ selected_feature_dict = {feature: normalized_features[i] for i, feature in enumerate(selected_features) if feature in all_features}
63
+ selected_feature_df = pd.DataFrame([selected_feature_dict])
64
+ selected_feature_array = selected_feature_df.T.to_numpy()
65
 
66
+ return selected_feature_array
67
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
 
70
  def predict(sequence):