Fouzanjaved's picture
Create app.py
5716541 verified
raw
history blame
2.67 kB
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
# Load data
df = pd.read_csv("diabetes.csv")
# Replace 0s with NaN (Glucose, BP, etc.)
cols = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"]
df[cols] = df[cols].replace(0, float('nan'))
# Impute missing values with mean
imputer = SimpleImputer(strategy="mean")
df[cols] = imputer.fit_transform(df[cols])
# Remove outliers using IQR
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]
# Feature selection (keep: Pregnancies, Glucose, Insulin, BMI, Age)
X = df[["Pregnancies", "Glucose", "Insulin", "BMI", "Age"]]
y = df["Outcome"]
# Normalize to [0, 1]
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
# Machine Learning Models (DT, KNN, RF, NB, AB, LR, SVM)
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
# Split data (85% train, 15% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
# Initialize models
models = {
"DT": DecisionTreeClassifier(),
"KNN": KNeighborsClassifier(n_neighbors=7),
"RF": RandomForestClassifier(),
"NB": GaussianNB(),
"AB": AdaBoostClassifier(),
"LR": LogisticRegression(),
"SVM": SVC()
}
# Evaluate via k-fold CV (k=10)
for name, model in models.items():
scores = cross_val_score(model, X, y, cv=10, scoring="accuracy")
print(f"{name} CV Accuracy: {scores.mean():.2%}")
# Evaluate via train-test split
for name, model in models.items():
model.fit(X_train, y_train)
acc = model.score(X_test, y_test)
print(f"{name} Test Accuracy: {acc:.2%}")
#Neural Network (Keras)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
# NN with 2 hidden layers (architecture from paper)
model = Sequential([
Dense(26, activation="relu", input_shape=(5,)),
Dense(5, activation="relu"),
Dense(1, activation="sigmoid")
])
# Compile with SGD (lr=0.01)
model.compile(optimizer=SGD(learning_rate=0.01),
loss="binary_crossentropy",
metrics=["accuracy"])
# Train for 400 epochs
history = model.fit(X_train, y_train, epochs=400, batch_size=32,
validation_data=(X_test, y_test), verbose=0)