nonzeroexit commited on
Commit
4eaa8e5
·
verified ·
1 Parent(s): 248ff12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -44,39 +44,31 @@ selected_features = [
44
  ]
45
 
46
  def extract_features(sequence):
 
 
 
47
  aa_features = AAComposition.CalculateAADipeptideComposition(sequence)
48
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
49
  ctd_features = CTD.CalculateCTD(sequence)
50
  pseaac_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
51
 
 
52
  all_features = {**aa_features, **auto_features, **ctd_features, **pseaac_features}
53
 
54
- # Convert to DataFrame
55
- feature_df = pd.DataFrame([all_features])
56
-
57
- print("Extracted Features:", feature_df.columns.tolist()) # Debugging line
58
-
59
  # Ensure all selected features are present
60
- missing_features = [f for f in selected_features if f not in feature_df.columns]
61
- extra_features = [f for f in feature_df.columns if f not in selected_features]
62
-
63
- if missing_features:
64
- print(f"Missing Features ({len(missing_features)}):", missing_features)
65
 
66
- if extra_features:
67
- print(f"Extra Features ({len(extra_features)}):", extra_features)
68
 
69
- # Fix missing columns by adding them with default values (0)
70
- for feature in missing_features:
71
- feature_df[feature] = 0
72
 
73
- # Select only the required features
74
- feature_df = feature_df[selected_features]
75
 
76
- # Normalize
77
- normalized_features = scaler.transform(feature_df)
78
 
79
- return normalized_features
80
 
81
 
82
  def predict(sequence):
 
44
  ]
45
 
46
  def extract_features(sequence):
47
+ """Extract selected features and normalize them."""
48
+
49
+ # Extract features from different methods
50
  aa_features = AAComposition.CalculateAADipeptideComposition(sequence)
51
  auto_features = Autocorrelation.CalculateAutoTotal(sequence)
52
  ctd_features = CTD.CalculateCTD(sequence)
53
  pseaac_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
54
 
55
+ # Combine all extracted features
56
  all_features = {**aa_features, **auto_features, **ctd_features, **pseaac_features}
57
 
 
 
 
 
 
58
  # Ensure all selected features are present
59
+ feature_dict = {feature: all_features.get(feature, 0) for feature in selected_features}
 
 
 
 
60
 
61
+ # Convert to DataFrame
62
+ feature_df = pd.DataFrame([feature_dict])
63
 
64
+ # Normalize the features
65
+ normalized_features = scaler.transform(feature_df)
 
66
 
67
+ # Convert to a NumPy array in the expected format
68
+ selected_feature_array = normalized_features.flatten().reshape(1, -1)
69
 
70
+ return selected_feature_array
 
71
 
 
72
 
73
 
74
  def predict(sequence):