Sowmith22 commited on
Commit
6f00681
ยท
verified ยท
1 Parent(s): bd19444

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +209 -0
  2. fertility_synthetic_50000.csv +0 -0
  3. requirements (2).txt +8 -0
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import os
7
+ import joblib
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.preprocessing import StandardScaler
10
+ from tensorflow import keras
11
+
12
+ # -------------------------
13
+ # Streamlit Page Config
14
+ # -------------------------
15
+ st.set_page_config(page_title="Fertility Prediction", layout="wide")
16
+ st.title("๐Ÿงฌ Fertility Health Prediction App")
17
+
18
+ # -------------------------
19
+ # Sidebar Navigation
20
+ # -------------------------
21
+ page = st.sidebar.radio("๐Ÿ“‚ Navigate", ["๐Ÿ” EDA", "๐Ÿค– Model Training", "๐Ÿ”ฎ Prediction"])
22
+
23
+ # -------------------------
24
+ # Load Data
25
+ # -------------------------
26
+ @st.cache_data
27
+ def load_data():
28
+ df = pd.read_csv(r"C:\Users\91879\Downloads\fertility_synthetic_50000\fertility_synthetic_50000.csv")
29
+ df.drop_duplicates(inplace=True)
30
+ return df
31
+
32
+ df = load_data()
33
+
34
+ # -------------------------
35
+ # EDA Page
36
+ # -------------------------
37
+ if page == "๐Ÿ” EDA":
38
+ st.header("๐Ÿ“Š Exploratory Data Analysis")
39
+
40
+ st.subheader("๐Ÿ“‘ Dataset Overview")
41
+ st.write(f"๐Ÿ—‚๏ธ Shape of dataset: {df.shape}")
42
+
43
+ col1, col2 = st.columns(2)
44
+ with col1:
45
+ st.write("๐Ÿ‘€ First 5 rows:")
46
+ st.dataframe(df.head())
47
+ with col2:
48
+ st.write("๐Ÿ“ Basic statistics:")
49
+ st.dataframe(df.describe())
50
+
51
+ st.subheader("โ“ Missing Values")
52
+ st.write(df.isna().sum())
53
+
54
+ st.subheader("๐Ÿ“ˆ Data Visualization")
55
+
56
+ # Target vs Sperm Count
57
+ fig, ax = plt.subplots(figsize=(6,4))
58
+ sns.barplot(data=df, x='Target_HealthyOffspring',
59
+ y='Male_SpermCount_million_per_mL',
60
+ estimator=np.mean, ax=ax, palette="viridis")
61
+ ax.set_title('๐ŸŽฏ Target vs Male Sperm Count (mean)')
62
+ st.pyplot(fig)
63
+
64
+ # Correlation heatmap
65
+ st.write("๐ŸŒก๏ธ Correlation Heatmap:")
66
+ fig, ax = plt.subplots(figsize=(12,10))
67
+ sns.heatmap(df.corr(numeric_only=True), annot=False, cmap="coolwarm", ax=ax)
68
+ st.pyplot(fig)
69
+
70
+ # -------------------------
71
+ # Model Training Page
72
+ # -------------------------
73
+ elif page == "๐Ÿค– Model Training":
74
+ st.header("โš™๏ธ Model Training")
75
+
76
+ # Prepare data
77
+ X = df.drop("Target_HealthyOffspring", axis=1)
78
+ y = df["Target_HealthyOffspring"]
79
+
80
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=29)
81
+
82
+ # Scale data
83
+ scaler = StandardScaler()
84
+ X_train_scaled = scaler.fit_transform(X_train)
85
+ X_test_scaled = scaler.transform(X_test)
86
+
87
+ # Ensure models directory
88
+ os.makedirs("models", exist_ok=True)
89
+ joblib.dump(scaler, "models/fertility_scaler.pkl")
90
+
91
+ # Model architecture
92
+ model = keras.Sequential([
93
+ keras.layers.Input(shape=(X_train.shape[1],)),
94
+ keras.layers.Dense(7, activation="relu"),
95
+ keras.layers.Dense(5, activation="relu"),
96
+ keras.layers.Dense(4, activation="relu"),
97
+ keras.layers.Dense(2, activation="softmax")
98
+ ])
99
+
100
+ model.compile(loss="sparse_categorical_crossentropy",
101
+ optimizer="adam",
102
+ metrics=["accuracy"])
103
+
104
+ # Train model
105
+ if st.button("๐Ÿš€ Train Model"):
106
+ with st.spinner("โณ Training in progress..."):
107
+ history = model.fit(X_train_scaled, y_train, epochs=10, validation_split=0.2, verbose=1)
108
+
109
+ # Save model
110
+ model.save("models/fertility_model.h5")
111
+
112
+ st.success("โœ… Model trained and saved successfully!")
113
+
114
+ # Plot training history
115
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
116
+ ax1.plot(history.history["accuracy"], label="Training Accuracy", color="green")
117
+ ax1.plot(history.history["val_accuracy"], label="Validation Accuracy", color="blue")
118
+ ax1.set_title("๐Ÿ“ˆ Accuracy")
119
+ ax1.set_xlabel("Epochs")
120
+ ax1.set_ylabel("Accuracy")
121
+ ax1.legend()
122
+
123
+ ax2.plot(history.history["loss"], label="Training Loss", color="red")
124
+ ax2.plot(history.history["val_loss"], label="Validation Loss", color="orange")
125
+ ax2.set_title("๐Ÿ“‰ Loss")
126
+ ax2.set_xlabel("Epochs")
127
+ ax2.set_ylabel("Loss")
128
+ ax2.legend()
129
+
130
+ st.pyplot(fig)
131
+
132
+ # Evaluate on test set
133
+ test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=0)
134
+ st.metric("๐Ÿงช Test Accuracy", f"{test_acc:.4f}")
135
+ st.metric("๐Ÿ“‰ Test Loss", f"{test_loss:.4f}")
136
+
137
+ # -------------------------
138
+ # Prediction Page
139
+ # -------------------------
140
+ elif page == "๐Ÿ”ฎ Prediction":
141
+ st.header("๐Ÿ”ฎ Make a Prediction")
142
+
143
+ try:
144
+ model = keras.models.load_model("models/fertility_model.h5")
145
+ scaler = joblib.load("models/fertility_scaler.pkl")
146
+ st.success("๐Ÿ“‚ Model & Scaler loaded successfully!")
147
+ except:
148
+ st.error("โŒ Model not found. Please train it first under 'Model Training'.")
149
+ st.stop()
150
+
151
+ # Create input form
152
+ with st.form("prediction_form"):
153
+ st.subheader("๐Ÿงพ Enter Patient Details")
154
+
155
+ col1, col2 = st.columns(2)
156
+ with col1:
157
+ st.markdown("**๐Ÿ‘จ Male Factors**")
158
+ male_sperm_count = st.number_input("Sperm Count (million/mL)", min_value=0.0, value=15.0)
159
+ male_sperm_motility = st.number_input("Sperm Motility (%)", min_value=0.0, max_value=100.0, value=40.0)
160
+ male_sperm_morphology = st.number_input("Sperm Morphology (%)", min_value=0.0, max_value=100.0, value=4.0)
161
+ male_testosterone = st.number_input("Testosterone (ng/dL)", min_value=0.0, value=300.0)
162
+ male_fsh = st.number_input("Male FSH (mIU/mL)", min_value=0.0, value=1.5)
163
+
164
+ with col2:
165
+ st.markdown("**๐Ÿ‘ฉ Female Factors**")
166
+ female_age = st.number_input("Female Age (years)", min_value=18, max_value=50, value=30)
167
+ female_ovulation = st.number_input("Ovulation Regularity (days)", min_value=0, value=28)
168
+ female_estradiol = st.number_input("Estradiol (pg/mL)", min_value=0.0, value=20.0)
169
+ female_progesterone = st.number_input("Progesterone (ng/mL)", min_value=0.0, value=10.0)
170
+ female_fsh = st.number_input("Female FSH (mIU/mL)", min_value=0.0, value=3.0)
171
+
172
+ st.markdown("**๐Ÿ’š Lifestyle Factors**")
173
+ col3, col4 = st.columns(2)
174
+ with col3:
175
+ intercourse_freq = st.number_input("Intercourse Frequency (per week)", min_value=0, value=2)
176
+ folic_acid = st.number_input("Folic Acid Intake (mcg/day)", min_value=0, value=400)
177
+ with col4:
178
+ smoking = st.number_input("Cigarettes per day", min_value=0, value=0)
179
+ alcohol = st.number_input("Alcoholic drinks per week", min_value=0, value=0)
180
+ hba1c = st.number_input("HbA1c (%)", min_value=0.0, max_value=20.0, value=5.0)
181
+
182
+ submitted = st.form_submit_button("โœจ Predict")
183
+
184
+ if submitted:
185
+ input_data = np.array([[male_sperm_count, male_sperm_motility, male_sperm_morphology,
186
+ male_testosterone, male_fsh, female_age, female_ovulation,
187
+ female_estradiol, female_progesterone, female_fsh,
188
+ intercourse_freq, folic_acid, smoking, alcohol, hba1c]])
189
+
190
+ scaled_input = scaler.transform(input_data)
191
+ prediction = model.predict(scaled_input)
192
+ predicted_class = np.argmax(prediction, axis=1)
193
+ confidence = np.max(prediction) * 100
194
+
195
+ st.subheader("๐Ÿ“Œ Prediction Results")
196
+ if predicted_class[0] == 1:
197
+ st.success(f"โœ… Likely to have healthy offspring (Confidence: {confidence:.2f}%)")
198
+ else:
199
+ st.error(f"โŒ Unlikely to have healthy offspring (Confidence: {confidence:.2f}%)")
200
+
201
+ st.progress(int(confidence))
202
+
203
+ # Probability distribution
204
+ fig, ax = plt.subplots(figsize=(6, 4))
205
+ ax.bar(['โŒ Unlikely (0)', 'โœ… Likely (1)'], prediction[0],
206
+ color=['crimson', 'seagreen'])
207
+ ax.set_title('๐Ÿ“Š Prediction Probability Distribution')
208
+ ax.set_ylabel('Probability')
209
+ st.pyplot(fig)
fertility_synthetic_50000.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements (2).txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ altair
2
+ pandas
3
+ streamlit
4
+ streamlit_drawable_canvas
5
+ joblib
6
+ pillow
7
+ opencv-python
8
+ scikit-learn