Spaces:

lucifer7210
/

PCOS-ML

Sleeping

PCOS-ML / app.py

Zane Vijay Falcao

Upload folder using huggingface_hub

d3915f5 verified 4 months ago

18.3 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import pickle
	import joblib
	import os

	# Load the trained model (try both joblib and pickle in case one fails)
	def load_model():
	try:
	model = joblib.load('pcos_model.joblib')
	print("Model loaded using joblib")
	return model
	except:
	try:
	with open('random_forest_model', 'rb') as file:
	model = pickle.load(file)
	print("Model loaded using pickle from random_forest_model")
	return model
	except:
	try:
	with open('random_forest_model.pkl', 'rb') as file:
	model = pickle.load(file)
	print("Model loaded using pickle from pcos_model.pkl")
	return model
	except Exception as e:
	print(f"Error loading model: {e}")
	# Fallback to a simple model for demo purposes
	from sklearn.ensemble import RandomForestClassifier
	print("Creating a fallback model for demonstration")
	fallback_model = RandomForestClassifier(n_estimators=100, random_state=42)
	# Train with dummy data to initialize
	X_dummy = np.random.rand(100, 43)
	y_dummy = np.random.choice([0, 1], 100)
	fallback_model.fit(X_dummy, y_dummy)
	return fallback_model

	# Load the model
	model = load_model()

	# Define the features required for prediction
	features = [
	"Age (yrs)", "Weight (Kg)", "Height(Cm)", "BMI", "Blood Group", "Pulse rate(bpm)",
	"RR (breaths/min)", "Hb(g/dl)", "Cycle length(days)", "Cycle(R/I)", "Marraige Status (Yrs)",
	"Pregnant(Y/N)", "No. of abortions", "Hip(inch)", "Waist(inch)", "Waist:Hip Ratio",
	"Weight gain(Y/N)", "hair growth(Y/N)", "Skin darkening (Y/N)", "Hair loss(Y/N)",
	"Pimples(Y/N)", "Fast food (Y/N)", "Reg.Exercise(Y/N)", "BP _Systolic (mmHg)",
	"BP _Diastolic (mmHg)", "Follicle No. (L)", "Follicle No. (R)", "Avg. F size (L) (mm)",
	"Avg. F size (R) (mm)", "Endometrium (mm)", "FSH(mIU/mL)", "LH(mIU/mL)", "FSH/LH",
	"Hip:Waist Ratio", "TSH (mIU/L)", "AMH(ng/mL)", "PRL(ng/mL)", "Vit D3 (ng/mL)",
	"PRG(ng/mL)", "RBS(mg/dl)", "Weight gain", "I beta-HCG(mIU/mL)", "II beta-HCG(mIU/mL)"
	]

	# Create visualizations for the dashboard
	def create_visualizations():
	# For demo purposes, we'll use sample data similar to what was in your notebook
	# In a real application, you would load the actual dataset

	# Sample data for visualization (small dataset for demo)
	np.random.seed(42)
	n_samples = 100

	# Create sample data
	sample_data = {
	"Age (yrs)": np.random.normal(25, 5, n_samples),
	"PCOS (Y/N)": np.random.choice([0, 1], n_samples, p=[0.6, 0.4]),
	"BMI": np.random.normal(25, 5, n_samples),
	"Cycle length(days)": np.random.normal(28, 5, n_samples),
	"Follicle No. (L)": np.random.normal(12, 5, n_samples),
	"Follicle No. (R)": np.random.normal(12, 5, n_samples),
	"Endometrium (mm)": np.random.normal(8, 2, n_samples),
	"Cycle(R/I)": np.random.choice([2, 4], n_samples),
	"Weight (Kg)": np.random.normal(65, 10, n_samples),
	"Hb(g/dl)": np.random.normal(12, 1.5, n_samples)
	}

	# Create a DataFrame
	df = pd.DataFrame(sample_data)

	# For PCOS cases, adjust the values to show differences
	pcos_indices = df["PCOS (Y/N)"] == 1
	df.loc[pcos_indices, "BMI"] += 2
	df.loc[pcos_indices, "Cycle length(days)"] += 5
	df.loc[pcos_indices, "Follicle No. (L)"] += 8
	df.loc[pcos_indices, "Follicle No. (R)"] += 7
	df.loc[pcos_indices, "Cycle(R/I)"] = 4

	# Create visualizations
	visualizations = []

	# 1. BMI vs Age scatter plot
	fig1, ax1 = plt.subplots(figsize=(8, 6))
	sns.scatterplot(x="Age (yrs)", y="BMI", hue="PCOS (Y/N)",
	data=df, palette=["teal", "plum"], ax=ax1)
	ax1.set_title("BMI vs Age by PCOS Status")
	visualizations.append(fig1)

	# 2. Cycle length vs Age scatter plot
	fig2, ax2 = plt.subplots(figsize=(8, 6))
	sns.scatterplot(x="Age (yrs)", y="Cycle length(days)", hue="PCOS (Y/N)",
	data=df, palette=["teal", "plum"], ax=ax2)
	ax2.set_title("Menstrual Cycle Length vs Age by PCOS Status")
	visualizations.append(fig2)

	# 3. Follicle distribution scatter plot
	fig3, ax3 = plt.subplots(figsize=(8, 6))
	sns.scatterplot(x="Follicle No. (L)", y="Follicle No. (R)", hue="PCOS (Y/N)",
	data=df, palette=["teal", "plum"], ax=ax3)
	ax3.set_title("Follicle Distribution (Left vs Right Ovary)")
	visualizations.append(fig3)

	# 4. Boxplot for Follicle numbers
	fig4, ax4 = plt.subplots(figsize=(10, 6))
	sns.boxplot(x="PCOS (Y/N)", y="Follicle No. (L)", data=df, palette=["teal", "plum"], ax=ax4)
	ax4.set_title("Follicle Count (Left Ovary) by PCOS Status")
	visualizations.append(fig4)

	# 5. Endometrium thickness boxplot
	fig5, ax5 = plt.subplots(figsize=(10, 6))
	sns.boxplot(x="PCOS (Y/N)", y="Endometrium (mm)", data=df, palette=["teal", "plum"], ax=ax5)
	ax5.set_title("Endometrium Thickness by PCOS Status")
	visualizations.append(fig5)

	return visualizations

	# Helper function to get numerical value for categorical inputs
	def get_numerical_value(value, options):
	try:
	return options.index(value)
	except:
	return 0

	# Helper function to preprocess inputs
	def preprocess_inputs(input_dict):
	# Convert checkbox values to 0/1
	for key in input_dict:
	if isinstance(input_dict[key], bool):
	input_dict[key] = 1 if input_dict[key] else 0

	# Convert blood group to numeric
	blood_groups = ["A+", "A-", "B+", "B-", "AB+", "AB-", "O+", "O-"]
	if "Blood Group" in input_dict and input_dict["Blood Group"] in blood_groups:
	input_dict["Blood Group"] = blood_groups.index(input_dict["Blood Group"])

	return input_dict

	# Function to process input and make predictions
	def predict_pcos(*args):
	if model is None:
	return "Model not loaded correctly. Please check if model files are available."

	try:
	# Convert inputs to a dictionary and then DataFrame
	input_dict = {feature: value for feature, value in zip(features, args)}

	# Preprocess inputs
	input_dict = preprocess_inputs(input_dict)

	# Convert to DataFrame
	input_df = pd.DataFrame([input_dict])

	# Print for debugging
	print("Input shape:", input_df.shape)
	print("Input data types:", input_df.dtypes)

	# Make prediction
	try:
	prediction = model.predict(input_df)[0]
	probability = model.predict_proba(input_df)[0]

	result = "Positive for PCOS" if prediction == 1 else "Negative for PCOS"
	conf = probability[1] if prediction == 1 else probability[0]

	return f"{result} (Confidence: {conf:.2f})"
	except AttributeError:
	# If model is a numpy array, use a simple threshold-based prediction
	# This is a fallback if the loaded model is just coefficients
	print("Model is not a classifier object, using fallback prediction")
	risk_score = np.mean([
	input_df["BMI"].values[0] / 30,
	input_df["Follicle No. (L)"].values[0] / 15,
	input_df["Follicle No. (R)"].values[0] / 15,
	(1 if input_df["Cycle(R/I)"].values[0] > 3 else 0)
	])
	prediction = 1 if risk_score > 0.6 else 0
	result = "Positive for PCOS" if prediction == 1 else "Negative for PCOS"
	return f"{result} (Risk Score: {risk_score:.2f})"

	except Exception as e:
	import traceback
	traceback.print_exc()
	return f"Error making prediction: {str(e)}"

	# Function to display visualizations
	def show_visualization(visualization_index):
	visualizations = create_visualizations()
	if 0 <= visualization_index < len(visualizations):
	return visualizations[visualization_index]
	return None

	# Create the Gradio interface
	with gr.Blocks(title="PCOS Detection Tool") as app:
	gr.Markdown("# PCOS Detection and Analysis Tool")
	gr.Markdown("This application uses machine learning to detect Polycystic Ovary Syndrome (PCOS) based on patient data.")

	with gr.Tabs():
	with gr.TabItem("Make Prediction"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### Patient Demographics")
	age = gr.Slider(18, 50, value=25, label="Age (yrs)")
	weight = gr.Slider(40, 120, value=60, label="Weight (Kg)")
	height = gr.Slider(140, 190, value=160, label="Height (cm)")
	blood_group = gr.Dropdown(["A+", "A-", "B+", "B-", "AB+", "AB-", "O+", "O-"], value="A+", label="Blood Group")
	bmi = gr.Slider(15, 40, value=22, label="BMI")

	with gr.Column():
	gr.Markdown("### Vital Signs")
	pulse = gr.Slider(60, 120, value=80, label="Pulse rate (bpm)")
	rr = gr.Slider(12, 25, value=16, label="Respiratory Rate (breaths/min)")
	systolic = gr.Slider(90, 180, value=120, label="BP Systolic (mmHg)")
	diastolic = gr.Slider(60, 120, value=80, label="BP Diastolic (mmHg)")
	hb = gr.Slider(8, 18, value=12, label="Hemoglobin (g/dl)")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Menstrual History")
	cycle_length = gr.Slider(21, 45, value=28, label="Cycle length (days)")
	cycle_regularity = gr.Radio([2, 4], value=2, label="Cycle Regularity (2=Regular, 4=Irregular)")

	with gr.Column():
	gr.Markdown("### Physical Measurements")
	hip = gr.Slider(30, 60, value=40, label="Hip (inch)")
	waist = gr.Slider(20, 50, value=30, label="Waist (inch)")
	waist_hip_ratio = gr.Slider(0.6, 1.2, value=0.75, label="Waist:Hip Ratio")
	hip_waist_ratio = gr.Slider(1.0, 2.0, value=1.33, label="Hip:Waist Ratio")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Symptoms")
	weight_gain = gr.Checkbox(label="Weight gain", value=False)
	hair_growth = gr.Checkbox(label="Excessive hair growth", value=False)
	skin_darkening = gr.Checkbox(label="Skin darkening", value=False)
	hair_loss = gr.Checkbox(label="Hair loss", value=False)
	pimples = gr.Checkbox(label="Pimples", value=False)

	with gr.Column():
	gr.Markdown("### Lifestyle")
	fast_food = gr.Checkbox(label="Fast food consumption", value=False)
	regular_exercise = gr.Checkbox(label="Regular exercise", value=False)

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Ultrasound Findings")
	follicle_l = gr.Slider(0, 30, value=10, label="Follicle No. (Left)")
	follicle_r = gr.Slider(0, 30, value=10, label="Follicle No. (Right)")
	avg_fsize_l = gr.Slider(0, 25, value=5, label="Avg. Follicle size (Left) (mm)")
	avg_fsize_r = gr.Slider(0, 25, value=5, label="Avg. Follicle size (Right) (mm)")
	endometrium = gr.Slider(1, 20, value=8, label="Endometrium (mm)")

	with gr.Column():
	gr.Markdown("### Hormone Levels")
	fsh = gr.Slider(0, 20, value=6, label="FSH (mIU/mL)")
	lh = gr.Slider(0, 20, value=7, label="LH (mIU/mL)")
	fsh_lh_ratio = gr.Slider(0, 3, value=0.85, label="FSH/LH Ratio")
	tsh = gr.Slider(0, 10, value=2.5, label="TSH (mIU/L)")
	amh = gr.Slider(0, 10, value=3, label="AMH (ng/mL)")
	prl = gr.Slider(0, 30, value=15, label="Prolactin (ng/mL)")
	vit_d3 = gr.Slider(0, 100, value=30, label="Vitamin D3 (ng/mL)")
	prg = gr.Slider(0, 20, value=5, label="Progesterone (ng/mL)")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Other Medical")
	married_years = gr.Slider(0, 20, value=0, label="Marriage Status (Years)")
	pregnant = gr.Checkbox(label="Currently Pregnant", value=False)
	abortions = gr.Slider(0, 5, value=0, label="Number of abortions")
	rbs = gr.Slider(70, 200, value=90, label="Random Blood Sugar (mg/dl)")
	beta_hcg1 = gr.Slider(0, 100, value=5, label="I beta-HCG (mIU/mL)")
	beta_hcg2 = gr.Slider(0, 100, value=5, label="II beta-HCG (mIU/mL)")

	predict_btn = gr.Button("Predict PCOS Status")
	prediction_output = gr.Textbox(label="Prediction Result")

	# Connect inputs to prediction function
	input_components = [
	age, weight, height, bmi, blood_group, pulse, rr, hb, cycle_length,
	cycle_regularity, married_years, pregnant, abortions, hip, waist,
	waist_hip_ratio, weight_gain, hair_growth, skin_darkening, hair_loss,
	pimples, fast_food, regular_exercise, systolic, diastolic, follicle_l,
	follicle_r, avg_fsize_l, avg_fsize_r, endometrium, fsh, lh, fsh_lh_ratio,
	hip_waist_ratio, tsh, amh, prl, vit_d3, prg, rbs, weight_gain, beta_hcg1, beta_hcg2
	]

	predict_btn.click(
	predict_pcos,
	inputs=input_components,
	outputs=prediction_output
	)

	with gr.TabItem("Visualizations"):
	gr.Markdown("### PCOS Data Analysis Visualizations")

	visualization_choice = gr.Radio(
	["BMI vs Age", "Menstrual Cycle Length vs Age", "Follicle Distribution",
	"Follicle Count Boxplot", "Endometrium Thickness"],
	value="BMI vs Age",
	label="Select Visualization"
	)

	visualization_output = gr.Plot()

	visualization_choice.change(
	lambda choice: show_visualization(["BMI vs Age", "Menstrual Cycle Length vs Age",
	"Follicle Distribution", "Follicle Count Boxplot",
	"Endometrium Thickness"].index(choice)),
	inputs=visualization_choice,
	outputs=visualization_output
	)

	with gr.TabItem("About PCOS"):
	gr.Markdown("""
	# Polycystic Ovary Syndrome (PCOS)

	Polycystic ovary syndrome (PCOS) is a hormonal disorder common among women of reproductive age.
	Women with PCOS may have infrequent or prolonged menstrual periods or excess male hormone (androgen) levels.

	## Common Symptoms
	- Irregular periods
	- Excess androgen (elevated levels of male hormones)
	- Polycystic ovaries
	- Weight gain
	- Acne
	- Excessive hair growth (hirsutism)
	- Thinning hair or hair loss
	- Infertility

	## Risk Factors
	- Having a mother or sister with PCOS
	- Insulin resistance
	- Obesity

	## Complications
	- Infertility
	- Gestational diabetes or pregnancy-induced high blood pressure
	- Miscarriage or premature birth
	- Type 2 diabetes or prediabetes
	- Depression, anxiety, and eating disorders
	- Sleep apnea
	- Endometrial cancer
	- Cardiovascular disease

	## Treatment
	Treatment focuses on managing your individual concerns, such as infertility, hirsutism, acne or obesity.
	Specific treatment might involve lifestyle changes or medication.
	""")

	with gr.TabItem("Debug Info"):
	gr.Markdown("### Model and System Information")
	debug_output = gr.Textbox(label="Debug Information", value=f"Model type: {type(model).__name__}")

	debug_btn = gr.Button("Check Model Status")

	def check_model():
	try:
	if model is None:
	return "Model not loaded"

	model_info = f"Model type: {type(model).__name__}\n"

	# Try to get additional info based on model type
	if hasattr(model, 'n_estimators'):
	model_info += f"Number of estimators: {model.n_estimators}\n"

	if hasattr(model, 'feature_importances_'):
	top_features = np.argsort(model.feature_importances_)[-5:]
	model_info += "Top 5 important features (indices): " + str(top_features.tolist()) + "\n"

	# Check if the model has predict and predict_proba methods
	has_predict = hasattr(model, 'predict') and callable(getattr(model, 'predict'))
	has_proba = hasattr(model, 'predict_proba') and callable(getattr(model, 'predict_proba'))

	model_info += f"Has predict method: {has_predict}\n"
	model_info += f"Has predict_proba method: {has_proba}\n"

	return model_info
	except Exception as e:
	return f"Error checking model: {str(e)}"

	debug_btn.click(check_model, outputs=debug_output)

	# Launch the app
	if __name__ == "__main__":
	app.launch(share=True, debug=True)