Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- app.py +209 -0
- fertility_synthetic_50000.csv +0 -0
- requirements (2).txt +8 -0
app.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import os
|
7 |
+
import joblib
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
+
from sklearn.preprocessing import StandardScaler
|
10 |
+
from tensorflow import keras
|
11 |
+
|
12 |
+
# -------------------------
|
13 |
+
# Streamlit Page Config
|
14 |
+
# -------------------------
|
15 |
+
st.set_page_config(page_title="Fertility Prediction", layout="wide")
|
16 |
+
st.title("๐งฌ Fertility Health Prediction App")
|
17 |
+
|
18 |
+
# -------------------------
|
19 |
+
# Sidebar Navigation
|
20 |
+
# -------------------------
|
21 |
+
page = st.sidebar.radio("๐ Navigate", ["๐ EDA", "๐ค Model Training", "๐ฎ Prediction"])
|
22 |
+
|
23 |
+
# -------------------------
|
24 |
+
# Load Data
|
25 |
+
# -------------------------
|
26 |
+
@st.cache_data
|
27 |
+
def load_data():
|
28 |
+
df = pd.read_csv(r"C:\Users\91879\Downloads\fertility_synthetic_50000\fertility_synthetic_50000.csv")
|
29 |
+
df.drop_duplicates(inplace=True)
|
30 |
+
return df
|
31 |
+
|
32 |
+
df = load_data()
|
33 |
+
|
34 |
+
# -------------------------
|
35 |
+
# EDA Page
|
36 |
+
# -------------------------
|
37 |
+
if page == "๐ EDA":
|
38 |
+
st.header("๐ Exploratory Data Analysis")
|
39 |
+
|
40 |
+
st.subheader("๐ Dataset Overview")
|
41 |
+
st.write(f"๐๏ธ Shape of dataset: {df.shape}")
|
42 |
+
|
43 |
+
col1, col2 = st.columns(2)
|
44 |
+
with col1:
|
45 |
+
st.write("๐ First 5 rows:")
|
46 |
+
st.dataframe(df.head())
|
47 |
+
with col2:
|
48 |
+
st.write("๐ Basic statistics:")
|
49 |
+
st.dataframe(df.describe())
|
50 |
+
|
51 |
+
st.subheader("โ Missing Values")
|
52 |
+
st.write(df.isna().sum())
|
53 |
+
|
54 |
+
st.subheader("๐ Data Visualization")
|
55 |
+
|
56 |
+
# Target vs Sperm Count
|
57 |
+
fig, ax = plt.subplots(figsize=(6,4))
|
58 |
+
sns.barplot(data=df, x='Target_HealthyOffspring',
|
59 |
+
y='Male_SpermCount_million_per_mL',
|
60 |
+
estimator=np.mean, ax=ax, palette="viridis")
|
61 |
+
ax.set_title('๐ฏ Target vs Male Sperm Count (mean)')
|
62 |
+
st.pyplot(fig)
|
63 |
+
|
64 |
+
# Correlation heatmap
|
65 |
+
st.write("๐ก๏ธ Correlation Heatmap:")
|
66 |
+
fig, ax = plt.subplots(figsize=(12,10))
|
67 |
+
sns.heatmap(df.corr(numeric_only=True), annot=False, cmap="coolwarm", ax=ax)
|
68 |
+
st.pyplot(fig)
|
69 |
+
|
70 |
+
# -------------------------
|
71 |
+
# Model Training Page
|
72 |
+
# -------------------------
|
73 |
+
elif page == "๐ค Model Training":
|
74 |
+
st.header("โ๏ธ Model Training")
|
75 |
+
|
76 |
+
# Prepare data
|
77 |
+
X = df.drop("Target_HealthyOffspring", axis=1)
|
78 |
+
y = df["Target_HealthyOffspring"]
|
79 |
+
|
80 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=29)
|
81 |
+
|
82 |
+
# Scale data
|
83 |
+
scaler = StandardScaler()
|
84 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
85 |
+
X_test_scaled = scaler.transform(X_test)
|
86 |
+
|
87 |
+
# Ensure models directory
|
88 |
+
os.makedirs("models", exist_ok=True)
|
89 |
+
joblib.dump(scaler, "models/fertility_scaler.pkl")
|
90 |
+
|
91 |
+
# Model architecture
|
92 |
+
model = keras.Sequential([
|
93 |
+
keras.layers.Input(shape=(X_train.shape[1],)),
|
94 |
+
keras.layers.Dense(7, activation="relu"),
|
95 |
+
keras.layers.Dense(5, activation="relu"),
|
96 |
+
keras.layers.Dense(4, activation="relu"),
|
97 |
+
keras.layers.Dense(2, activation="softmax")
|
98 |
+
])
|
99 |
+
|
100 |
+
model.compile(loss="sparse_categorical_crossentropy",
|
101 |
+
optimizer="adam",
|
102 |
+
metrics=["accuracy"])
|
103 |
+
|
104 |
+
# Train model
|
105 |
+
if st.button("๐ Train Model"):
|
106 |
+
with st.spinner("โณ Training in progress..."):
|
107 |
+
history = model.fit(X_train_scaled, y_train, epochs=10, validation_split=0.2, verbose=1)
|
108 |
+
|
109 |
+
# Save model
|
110 |
+
model.save("models/fertility_model.h5")
|
111 |
+
|
112 |
+
st.success("โ
Model trained and saved successfully!")
|
113 |
+
|
114 |
+
# Plot training history
|
115 |
+
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
|
116 |
+
ax1.plot(history.history["accuracy"], label="Training Accuracy", color="green")
|
117 |
+
ax1.plot(history.history["val_accuracy"], label="Validation Accuracy", color="blue")
|
118 |
+
ax1.set_title("๐ Accuracy")
|
119 |
+
ax1.set_xlabel("Epochs")
|
120 |
+
ax1.set_ylabel("Accuracy")
|
121 |
+
ax1.legend()
|
122 |
+
|
123 |
+
ax2.plot(history.history["loss"], label="Training Loss", color="red")
|
124 |
+
ax2.plot(history.history["val_loss"], label="Validation Loss", color="orange")
|
125 |
+
ax2.set_title("๐ Loss")
|
126 |
+
ax2.set_xlabel("Epochs")
|
127 |
+
ax2.set_ylabel("Loss")
|
128 |
+
ax2.legend()
|
129 |
+
|
130 |
+
st.pyplot(fig)
|
131 |
+
|
132 |
+
# Evaluate on test set
|
133 |
+
test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=0)
|
134 |
+
st.metric("๐งช Test Accuracy", f"{test_acc:.4f}")
|
135 |
+
st.metric("๐ Test Loss", f"{test_loss:.4f}")
|
136 |
+
|
137 |
+
# -------------------------
|
138 |
+
# Prediction Page
|
139 |
+
# -------------------------
|
140 |
+
elif page == "๐ฎ Prediction":
|
141 |
+
st.header("๐ฎ Make a Prediction")
|
142 |
+
|
143 |
+
try:
|
144 |
+
model = keras.models.load_model("models/fertility_model.h5")
|
145 |
+
scaler = joblib.load("models/fertility_scaler.pkl")
|
146 |
+
st.success("๐ Model & Scaler loaded successfully!")
|
147 |
+
except:
|
148 |
+
st.error("โ Model not found. Please train it first under 'Model Training'.")
|
149 |
+
st.stop()
|
150 |
+
|
151 |
+
# Create input form
|
152 |
+
with st.form("prediction_form"):
|
153 |
+
st.subheader("๐งพ Enter Patient Details")
|
154 |
+
|
155 |
+
col1, col2 = st.columns(2)
|
156 |
+
with col1:
|
157 |
+
st.markdown("**๐จ Male Factors**")
|
158 |
+
male_sperm_count = st.number_input("Sperm Count (million/mL)", min_value=0.0, value=15.0)
|
159 |
+
male_sperm_motility = st.number_input("Sperm Motility (%)", min_value=0.0, max_value=100.0, value=40.0)
|
160 |
+
male_sperm_morphology = st.number_input("Sperm Morphology (%)", min_value=0.0, max_value=100.0, value=4.0)
|
161 |
+
male_testosterone = st.number_input("Testosterone (ng/dL)", min_value=0.0, value=300.0)
|
162 |
+
male_fsh = st.number_input("Male FSH (mIU/mL)", min_value=0.0, value=1.5)
|
163 |
+
|
164 |
+
with col2:
|
165 |
+
st.markdown("**๐ฉ Female Factors**")
|
166 |
+
female_age = st.number_input("Female Age (years)", min_value=18, max_value=50, value=30)
|
167 |
+
female_ovulation = st.number_input("Ovulation Regularity (days)", min_value=0, value=28)
|
168 |
+
female_estradiol = st.number_input("Estradiol (pg/mL)", min_value=0.0, value=20.0)
|
169 |
+
female_progesterone = st.number_input("Progesterone (ng/mL)", min_value=0.0, value=10.0)
|
170 |
+
female_fsh = st.number_input("Female FSH (mIU/mL)", min_value=0.0, value=3.0)
|
171 |
+
|
172 |
+
st.markdown("**๐ Lifestyle Factors**")
|
173 |
+
col3, col4 = st.columns(2)
|
174 |
+
with col3:
|
175 |
+
intercourse_freq = st.number_input("Intercourse Frequency (per week)", min_value=0, value=2)
|
176 |
+
folic_acid = st.number_input("Folic Acid Intake (mcg/day)", min_value=0, value=400)
|
177 |
+
with col4:
|
178 |
+
smoking = st.number_input("Cigarettes per day", min_value=0, value=0)
|
179 |
+
alcohol = st.number_input("Alcoholic drinks per week", min_value=0, value=0)
|
180 |
+
hba1c = st.number_input("HbA1c (%)", min_value=0.0, max_value=20.0, value=5.0)
|
181 |
+
|
182 |
+
submitted = st.form_submit_button("โจ Predict")
|
183 |
+
|
184 |
+
if submitted:
|
185 |
+
input_data = np.array([[male_sperm_count, male_sperm_motility, male_sperm_morphology,
|
186 |
+
male_testosterone, male_fsh, female_age, female_ovulation,
|
187 |
+
female_estradiol, female_progesterone, female_fsh,
|
188 |
+
intercourse_freq, folic_acid, smoking, alcohol, hba1c]])
|
189 |
+
|
190 |
+
scaled_input = scaler.transform(input_data)
|
191 |
+
prediction = model.predict(scaled_input)
|
192 |
+
predicted_class = np.argmax(prediction, axis=1)
|
193 |
+
confidence = np.max(prediction) * 100
|
194 |
+
|
195 |
+
st.subheader("๐ Prediction Results")
|
196 |
+
if predicted_class[0] == 1:
|
197 |
+
st.success(f"โ
Likely to have healthy offspring (Confidence: {confidence:.2f}%)")
|
198 |
+
else:
|
199 |
+
st.error(f"โ Unlikely to have healthy offspring (Confidence: {confidence:.2f}%)")
|
200 |
+
|
201 |
+
st.progress(int(confidence))
|
202 |
+
|
203 |
+
# Probability distribution
|
204 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
205 |
+
ax.bar(['โ Unlikely (0)', 'โ
Likely (1)'], prediction[0],
|
206 |
+
color=['crimson', 'seagreen'])
|
207 |
+
ax.set_title('๐ Prediction Probability Distribution')
|
208 |
+
ax.set_ylabel('Probability')
|
209 |
+
st.pyplot(fig)
|
fertility_synthetic_50000.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements (2).txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair
|
2 |
+
pandas
|
3 |
+
streamlit
|
4 |
+
streamlit_drawable_canvas
|
5 |
+
joblib
|
6 |
+
pillow
|
7 |
+
opencv-python
|
8 |
+
scikit-learn
|