Fouzanjaved commited on
Commit
115725e
·
verified ·
1 Parent(s): 8a86a27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -86
app.py CHANGED
@@ -1,88 +1,88 @@
1
  import pandas as pd
2
- from sklearn.impute import SimpleImputer
 
 
3
  from sklearn.preprocessing import MinMaxScaler
4
-
5
- # Load data
6
- df = pd.read_csv("diabetes.csv")
7
-
8
- # Replace 0s with NaN (Glucose, BP, etc.)
9
- cols = ["Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI"]
10
- df[cols] = df[cols].replace(0, float('nan'))
11
-
12
- # Impute missing values with mean
13
- imputer = SimpleImputer(strategy="mean")
14
- df[cols] = imputer.fit_transform(df[cols])
15
-
16
- # Remove outliers using IQR
17
- Q1 = df.quantile(0.25)
18
- Q3 = df.quantile(0.75)
19
- IQR = Q3 - Q1
20
- df = df[~((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)]
21
-
22
- # Feature selection (keep: Pregnancies, Glucose, Insulin, BMI, Age)
23
- X = df[["Pregnancies", "Glucose", "Insulin", "BMI", "Age"]]
24
- y = df["Outcome"]
25
-
26
- # Normalize to [0, 1]
27
- scaler = MinMaxScaler()
28
- X = scaler.fit_transform(X)
29
-
30
-
31
-
32
-
33
- # Machine Learning Models (DT, KNN, RF, NB, AB, LR, SVM)
34
- from sklearn.model_selection import train_test_split, cross_val_score
35
- from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
36
- from sklearn.svm import SVC
37
- from sklearn.linear_model import LogisticRegression
38
- from sklearn.tree import DecisionTreeClassifier
39
- from sklearn.neighbors import KNeighborsClassifier
40
- from sklearn.naive_bayes import GaussianNB
41
-
42
- # Split data (85% train, 15% test)
43
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
44
-
45
- # Initialize models
46
- models = {
47
- "DT": DecisionTreeClassifier(),
48
- "KNN": KNeighborsClassifier(n_neighbors=7),
49
- "RF": RandomForestClassifier(),
50
- "NB": GaussianNB(),
51
- "AB": AdaBoostClassifier(),
52
- "LR": LogisticRegression(),
53
- "SVM": SVC()
54
- }
55
-
56
- # Evaluate via k-fold CV (k=10)
57
- for name, model in models.items():
58
- scores = cross_val_score(model, X, y, cv=10, scoring="accuracy")
59
- print(f"{name} CV Accuracy: {scores.mean():.2%}")
60
-
61
- # Evaluate via train-test split
62
- for name, model in models.items():
63
- model.fit(X_train, y_train)
64
- acc = model.score(X_test, y_test)
65
- print(f"{name} Test Accuracy: {acc:.2%}")
66
-
67
-
68
- #Neural Network (Keras)
69
-
70
- from tensorflow.keras.models import Sequential
71
- from tensorflow.keras.layers import Dense
72
- from tensorflow.keras.optimizers import SGD
73
-
74
- # NN with 2 hidden layers (architecture from paper)
75
- model = Sequential([
76
- Dense(26, activation="relu", input_shape=(5,)),
77
- Dense(5, activation="relu"),
78
- Dense(1, activation="sigmoid")
79
- ])
80
-
81
- # Compile with SGD (lr=0.01)
82
- model.compile(optimizer=SGD(learning_rate=0.01),
83
- loss="binary_crossentropy",
84
- metrics=["accuracy"])
85
-
86
- # Train for 400 epochs
87
- history = model.fit(X_train, y_train, epochs=400, batch_size=32,
88
- validation_data=(X_test, y_test), verbose=0)
 
1
  import pandas as pd
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from tensorflow.keras.models import load_model
5
  from sklearn.preprocessing import MinMaxScaler
6
+ import gradio as gr
7
+ import joblib
8
+
9
+ # Load pre-trained model and scaler
10
+ model = load_model('diabetes_model.h5')
11
+ scaler = joblib.load('scaler.pkl')
12
+
13
+ def predict_diabetes(pregnancies, glucose, insulin, bmi, age):
14
+ """Predict diabetes probability from input features"""
15
+ # Create input array
16
+ input_data = np.array([[pregnancies, glucose, insulin, bmi, age]])
17
+
18
+ # Scale features
19
+ scaled_data = scaler.transform(input_data)
20
+
21
+ # Make prediction
22
+ probability = model.predict(scaled_data, verbose=0)[0][0]
23
+
24
+ # Interpret results
25
+ status = "Diabetic" if probability >= 0.5 else "Not Diabetic"
26
+ confidence = probability if probability >= 0.5 else 1 - probability
27
+
28
+ # Create explanation
29
+ explanation = f"""
30
+ ### Prediction: {status}
31
+ Confidence: {confidence:.1%}
32
+
33
+ #### Key factors contributing to this prediction:
34
+ - Glucose level: **{'High' if glucose > 140 else 'Normal'}** ({glucose} mg/dL)
35
+ - BMI: **{'Obese' if bmi >= 30 else 'Overweight' if bmi >= 25 else 'Normal'}** ({bmi})
36
+ - Age: {age} years
37
+ - Insulin: {insulin} μU/mL
38
+ - Pregnancies: {pregnancies}
39
+ """
40
+
41
+ # Create bar chart of feature importance
42
+ features = ['Pregnancies', 'Glucose', 'Insulin', 'BMI', 'Age']
43
+ importance = [0.15, 0.45, 0.10, 0.20, 0.10] # Example weights
44
+
45
+ return {
46
+ "probability": float(probability),
47
+ "status": status,
48
+ "explanation": explanation,
49
+ "importance": (features, importance)
50
+ }
51
+
52
+ # Create Gradio interface
53
+ inputs = [
54
+ gr.Slider(0, 15, step=1, label="Number of Pregnancies"),
55
+ gr.Slider(50, 200, value=120, label="Glucose Level (mg/dL)"),
56
+ gr.Slider(0, 300, value=80, label="Insulin Level (μU/mL)"),
57
+ gr.Slider(15, 50, value=32, label="BMI (kg/m²)"),
58
+ gr.Slider(20, 100, value=33, label="Age (years)")
59
+ ]
60
+
61
+ outputs = [
62
+ gr.Label(label="Diabetes Probability"),
63
+ gr.Markdown(label="Explanation"),
64
+ gr.BarPlot(x="Feature", y="Importance", label="Feature Importance")
65
+ ]
66
+
67
+ title = "Diabetes Prediction App"
68
+ description = "Early detection of diabetes using machine learning. Based on research: Khanam, J.J. & Foo, S.Y. (2021)"
69
+ article = """
70
+ **About this model**:
71
+ - Trained on Pima Indians Diabetes Dataset
72
+ - Neural Network with 88.6% accuracy
73
+ - Predicts diabetes risk using 5 key health parameters
74
+ """
75
+
76
+ gr.Interface(
77
+ fn=predict_diabetes,
78
+ inputs=inputs,
79
+ outputs=outputs,
80
+ title=title,
81
+ description=description,
82
+ article=article,
83
+ examples=[
84
+ [0, 90, 80, 24, 25],
85
+ [3, 150, 95, 32, 35],
86
+ [6, 180, 150, 38, 45]
87
+ ]
88
+ ).launch()