Spaces:
Sleeping
Sleeping
Commit
·
02b1dab
1
Parent(s):
9871430
Update utils/preprocessing.py
Browse files- utils/preprocessing.py +15 -0
utils/preprocessing.py
CHANGED
@@ -9,3 +9,18 @@ def preprocess_data_for_streamlit(data_path):
|
|
9 |
scaler = StandardScaler()
|
10 |
X_scaled = scaler.fit_transform(X)
|
11 |
return df, X_scaled
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
scaler = StandardScaler()
|
10 |
X_scaled = scaler.fit_transform(X)
|
11 |
return df, X_scaled
|
12 |
+
# utils/preprocessing.py
|
13 |
+
import pandas as pd
|
14 |
+
from sklearn.model_selection import train_test_split
|
15 |
+
from sklearn.preprocessing import StandardScaler
|
16 |
+
|
17 |
+
def preprocess_data(data_path, test_size=0.2, random_state=42):
|
18 |
+
df = pd.read_csv(data_path)
|
19 |
+
df = feature_engineering(df)
|
20 |
+
X = df.drop('label', axis=1)
|
21 |
+
y = df['label']
|
22 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
|
23 |
+
scaler = StandardScaler()
|
24 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
25 |
+
X_test_scaled = scaler.transform(X_test)
|
26 |
+
return X_train_scaled, X_test_scaled, y_train, y_test
|