Spaces:
Sleeping
Sleeping
File size: 988 Bytes
60daf05 dcda405 0fcc3c8 60daf05 02b1dab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# utils/preprocessing.py
import pandas as pd
from sklearn.preprocessing import StandardScaler
from utils import feature_engineering
def preprocess_data_for_streamlit(data_path):
df = pd.read_csv(data_path)
df = feature_engineering(df) # Assuming feature_engineering is defined
X = df.drop('label', axis=1)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
return df, X_scaled
# utils/preprocessing.py
import pandas as pd
from sklearn.model_selection import train_test_split
def preprocess_data(data_path, test_size=0.2, random_state=42):
df = pd.read_csv(data_path)
df = feature_engineering(df)
X = df.drop('label', axis=1)
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
return X_train_scaled, X_test_scaled, y_train, y_test
|