Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -44,39 +44,31 @@ selected_features = [
|
|
44 |
]
|
45 |
|
46 |
def extract_features(sequence):
|
|
|
|
|
|
|
47 |
aa_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
48 |
auto_features = Autocorrelation.CalculateAutoTotal(sequence)
|
49 |
ctd_features = CTD.CalculateCTD(sequence)
|
50 |
pseaac_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
|
51 |
|
|
|
52 |
all_features = {**aa_features, **auto_features, **ctd_features, **pseaac_features}
|
53 |
|
54 |
-
# Convert to DataFrame
|
55 |
-
feature_df = pd.DataFrame([all_features])
|
56 |
-
|
57 |
-
print("Extracted Features:", feature_df.columns.tolist()) # Debugging line
|
58 |
-
|
59 |
# Ensure all selected features are present
|
60 |
-
|
61 |
-
extra_features = [f for f in feature_df.columns if f not in selected_features]
|
62 |
-
|
63 |
-
if missing_features:
|
64 |
-
print(f"Missing Features ({len(missing_features)}):", missing_features)
|
65 |
|
66 |
-
|
67 |
-
|
68 |
|
69 |
-
#
|
70 |
-
|
71 |
-
feature_df[feature] = 0
|
72 |
|
73 |
-
#
|
74 |
-
|
75 |
|
76 |
-
|
77 |
-
normalized_features = scaler.transform(feature_df)
|
78 |
|
79 |
-
return normalized_features
|
80 |
|
81 |
|
82 |
def predict(sequence):
|
|
|
44 |
]
|
45 |
|
46 |
def extract_features(sequence):
|
47 |
+
"""Extract selected features and normalize them."""
|
48 |
+
|
49 |
+
# Extract features from different methods
|
50 |
aa_features = AAComposition.CalculateAADipeptideComposition(sequence)
|
51 |
auto_features = Autocorrelation.CalculateAutoTotal(sequence)
|
52 |
ctd_features = CTD.CalculateCTD(sequence)
|
53 |
pseaac_features = PseudoAAC.GetAPseudoAAC(sequence, lamda=9)
|
54 |
|
55 |
+
# Combine all extracted features
|
56 |
all_features = {**aa_features, **auto_features, **ctd_features, **pseaac_features}
|
57 |
|
|
|
|
|
|
|
|
|
|
|
58 |
# Ensure all selected features are present
|
59 |
+
feature_dict = {feature: all_features.get(feature, 0) for feature in selected_features}
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
# Convert to DataFrame
|
62 |
+
feature_df = pd.DataFrame([feature_dict])
|
63 |
|
64 |
+
# Normalize the features
|
65 |
+
normalized_features = scaler.transform(feature_df)
|
|
|
66 |
|
67 |
+
# Convert to a NumPy array in the expected format
|
68 |
+
selected_feature_array = normalized_features.flatten().reshape(1, -1)
|
69 |
|
70 |
+
return selected_feature_array
|
|
|
71 |
|
|
|
72 |
|
73 |
|
74 |
def predict(sequence):
|