Spaces:

Sowmith22
/

fertilityann

Sleeping

App Files Files Community

fertilityann / app.py

Sowmith22

Update app.py

8eee061 verified 12 days ago

raw

history blame contribute delete

8.38 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import os
	import joblib
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from tensorflow import keras

	# -------------------------
	# Streamlit Page Config
	# -------------------------
	st.set_page_config(page_title="Fertility Prediction", layout="wide")
	st.title("🧬 Fertility Health Prediction App")

	# -------------------------
	# Sidebar Navigation
	# -------------------------
	page = st.sidebar.radio("📂 Navigate", ["🔍 EDA", "🤖 Model Training", "🔮 Prediction"])

	# -------------------------
	# Load Data
	# -------------------------
	@st.cache_data
	def load_data():
	df=pd.read_csv("fertility_synthetic_50000.csv")
	df.drop_duplicates(inplace=True)
	return df

	df = load_data()

	# -------------------------
	# EDA Page
	# -------------------------
	if page == "🔍 EDA":
	st.header("📊 Exploratory Data Analysis")

	st.subheader("📑 Dataset Overview")
	st.write(f"🗂️ Shape of dataset: {df.shape}")

	col1, col2 = st.columns(2)
	with col1:
	st.write("👀 First 5 rows:")
	st.dataframe(df.head())
	with col2:
	st.write("📏 Basic statistics:")
	st.dataframe(df.describe())

	st.subheader("❓ Missing Values")
	st.write(df.isna().sum())

	st.subheader("📈 Data Visualization")

	# Target vs Sperm Count
	fig, ax = plt.subplots(figsize=(6,4))
	sns.barplot(data=df, x='Target_HealthyOffspring',
	y='Male_SpermCount_million_per_mL',
	estimator=np.mean, ax=ax, palette="viridis")
	ax.set_title('🎯 Target vs Male Sperm Count (mean)')
	st.pyplot(fig)

	# Correlation heatmap
	st.write("🌡️ Correlation Heatmap:")
	fig, ax = plt.subplots(figsize=(12,10))
	sns.heatmap(df.corr(numeric_only=True), annot=False, cmap="coolwarm", ax=ax)
	st.pyplot(fig)

	# -------------------------
	# Model Training Page
	# -------------------------
	elif page == "🤖 Model Training":
	st.header("⚙️ Model Training")

	# Prepare data
	X = df.drop("Target_HealthyOffspring", axis=1)
	y = df["Target_HealthyOffspring"]

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=29)

	# Scale data
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_test_scaled = scaler.transform(X_test)

	# Ensure models directory
	os.makedirs("models", exist_ok=True)
	joblib.dump(scaler, "models/fertility_scaler.pkl")

	# Model architecture
	model = keras.Sequential([
	keras.layers.Input(shape=(X_train.shape[1],)),
	keras.layers.Dense(7, activation="relu"),
	keras.layers.Dense(5, activation="relu"),
	keras.layers.Dense(4, activation="relu"),
	keras.layers.Dense(2, activation="softmax")
	])

	model.compile(loss="sparse_categorical_crossentropy",
	optimizer="adam",
	metrics=["accuracy"])

	# Train model
	if st.button("🚀 Train Model"):
	with st.spinner("⏳ Training in progress..."):
	history = model.fit(X_train_scaled, y_train, epochs=10, validation_split=0.2, verbose=1)

	# Save model
	model.save("models/fertility_model.h5")

	st.success("✅ Model trained and saved successfully!")

	# Plot training history
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
	ax1.plot(history.history["accuracy"], label="Training Accuracy", color="green")
	ax1.plot(history.history["val_accuracy"], label="Validation Accuracy", color="blue")
	ax1.set_title("📈 Accuracy")
	ax1.set_xlabel("Epochs")
	ax1.set_ylabel("Accuracy")
	ax1.legend()

	ax2.plot(history.history["loss"], label="Training Loss", color="red")
	ax2.plot(history.history["val_loss"], label="Validation Loss", color="orange")
	ax2.set_title("📉 Loss")
	ax2.set_xlabel("Epochs")
	ax2.set_ylabel("Loss")
	ax2.legend()

	st.pyplot(fig)

	# Evaluate on test set
	test_loss, test_acc = model.evaluate(X_test_scaled, y_test, verbose=0)
	st.metric("🧪 Test Accuracy", f"{test_acc:.4f}")
	st.metric("📉 Test Loss", f"{test_loss:.4f}")

	# -------------------------
	# Prediction Page
	# -------------------------
	elif page == "🔮 Prediction":
	st.header("🔮 Make a Prediction")

	try:
	model = keras.models.load_model("models/fertility_model.h5")
	scaler = joblib.load("models/fertility_scaler.pkl")
	st.success("📂 Model & Scaler loaded successfully!")
	except:
	st.error("❌ Model not found. Please train it first under 'Model Training'.")
	st.stop()

	# Create input form
	with st.form("prediction_form"):
	st.subheader("🧾 Enter Patient Details")

	col1, col2 = st.columns(2)
	with col1:
	st.markdown("👨 Male Factors")
	male_sperm_count = st.number_input("Sperm Count (million/mL)", min_value=0.0, value=15.0)
	male_sperm_motility = st.number_input("Sperm Motility (%)", min_value=0.0, max_value=100.0, value=40.0)
	male_sperm_morphology = st.number_input("Sperm Morphology (%)", min_value=0.0, max_value=100.0, value=4.0)
	male_testosterone = st.number_input("Testosterone (ng/dL)", min_value=0.0, value=300.0)
	male_fsh = st.number_input("Male FSH (mIU/mL)", min_value=0.0, value=1.5)

	with col2:
	st.markdown("👩 Female Factors")
	female_age = st.number_input("Female Age (years)", min_value=18, max_value=50, value=30)
	female_ovulation = st.number_input("Ovulation Regularity (days)", min_value=0, value=28)
	female_estradiol = st.number_input("Estradiol (pg/mL)", min_value=0.0, value=20.0)
	female_progesterone = st.number_input("Progesterone (ng/mL)", min_value=0.0, value=10.0)
	female_fsh = st.number_input("Female FSH (mIU/mL)", min_value=0.0, value=3.0)

	st.markdown("💚 Lifestyle Factors")
	col3, col4 = st.columns(2)
	with col3:
	intercourse_freq = st.number_input("Intercourse Frequency (per week)", min_value=0, value=2)
	folic_acid = st.number_input("Folic Acid Intake (mcg/day)", min_value=0, value=400)
	with col4:
	smoking = st.number_input("Cigarettes per day", min_value=0, value=0)
	alcohol = st.number_input("Alcoholic drinks per week", min_value=0, value=0)
	hba1c = st.number_input("HbA1c (%)", min_value=0.0, max_value=20.0, value=5.0)

	submitted = st.form_submit_button("✨ Predict")

	if submitted:
	input_data = np.array([[male_sperm_count, male_sperm_motility, male_sperm_morphology,
	male_testosterone, male_fsh, female_age, female_ovulation,
	female_estradiol, female_progesterone, female_fsh,
	intercourse_freq, folic_acid, smoking, alcohol, hba1c]])

	scaled_input = scaler.transform(input_data)
	prediction = model.predict(scaled_input)
	predicted_class = np.argmax(prediction, axis=1)
	confidence = np.max(prediction) * 100

	st.subheader("📌 Prediction Results")
	if predicted_class[0] == 1:
	st.success(f"✅ Likely to have healthy offspring (Confidence: {confidence:.2f}%)")
	else:
	st.error(f"❌ Unlikely to have healthy offspring (Confidence: {confidence:.2f}%)")

	st.progress(int(confidence))

	# Probability distribution
	fig, ax = plt.subplots(figsize=(6, 4))
	ax.bar(['❌ Unlikely (0)', '✅ Likely (1)'], prediction[0],
	color=['crimson', 'seagreen'])
	ax.set_title('📊 Prediction Probability Distribution')
	ax.set_ylabel('Probability')
	st.pyplot(fig)