Spaces:

Sowmith22
/

CancerPrediction

Sleeping

App Files Files Community

CancerPrediction / app.py

Sowmith22

Rename app (1).py to app.py

b7d55b3 verified 29 days ago

raw

history blame contribute delete

4.52 kB

	# cancer_prediction_app.py

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.impute import SimpleImputer
	from sklearn.compose import ColumnTransformer
	from sklearn.linear_model import LogisticRegression

	# -----------------------------
	# Backstory, Problem Statement, Applications
	# -----------------------------
	st.title("🧬 Cancer Prediction using Machine Learning")

	st.sidebar.title("About Project")

	st.sidebar.markdown("""
	Backstory:
	Cancer remains a leading cause of death worldwide. Early prediction and detection play a crucial role in improving survival rates. As data availability has increased, machine learning provides powerful tools to help medical professionals make more accurate predictions.

	Problem Statement:
	Develop a machine learning model that predicts the likelihood of cancer based on patient features. The goal is to assist healthcare providers in making better-informed diagnostic decisions.

	Applications:
	- Early detection and screening support.
	- Prioritization of high-risk patients.
	- Resource allocation in healthcare systems.
	""")

	st.markdown("""
	This app allows you to manually input all patient features and receive a prediction indicating whether cancer is present or not. The model is built with a robust pipeline combining imputation, scaling, encoding, and logistic regression.
	""")

	# -----------------------------
	# Load Data
	# -----------------------------
	@st.cache_data
	def load_data():
	data = pd.read_csv("cancer_prediction_data.csv")
	return data

	data = load_data()

	st.subheader("Dataset Columns:")
	st.write(list(data.columns))

	# -----------------------------
	# Identify Target and Features
	# -----------------------------
	TARGET_COLUMN = "Cancer_Present"

	if TARGET_COLUMN not in data.columns:
	st.error(f"❌ Target column '{TARGET_COLUMN}' not found. Please check dataset columns above.")
	st.stop()

	X = data.drop(TARGET_COLUMN, axis=1)
	y = data[TARGET_COLUMN]

	# -----------------------------
	# Detect categorical and numeric columns
	# -----------------------------
	categorical_cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
	numeric_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()

	st.write("Categorical columns detected:", categorical_cols)
	st.write("Numeric columns detected:", numeric_cols)

	# -----------------------------
	# Preprocessing pipeline
	# -----------------------------
	numeric_pipeline = Pipeline([
	("imputer", SimpleImputer(strategy="mean")),
	("scaler", StandardScaler())
	])

	categorical_pipeline = Pipeline([
	("imputer", SimpleImputer(strategy="most_frequent")),
	("encoder", OneHotEncoder(handle_unknown="ignore"))
	])

	preprocessor = ColumnTransformer([
	("num", numeric_pipeline, numeric_cols),
	("cat", categorical_pipeline, categorical_cols)
	])

	pipeline = Pipeline([
	("preprocessor", preprocessor),
	("classifier", LogisticRegression(max_iter=1000))
	])

	# -----------------------------
	# Train the pipeline
	# -----------------------------
	pipeline.fit(X, y)

	# -----------------------------
	# Sidebar Inputs for Prediction
	# -----------------------------
	st.sidebar.header("Enter Patient Features")

	def user_input_features():
	input_data = {}
	# Numeric inputs
	for col in numeric_cols:
	val = st.sidebar.number_input(
	f"{col}",
	min_value=float(X[col].min()) if not pd.isna(X[col].min()) else 0.0,
	max_value=float(X[col].max()) if not pd.isna(X[col].max()) else 1.0,
	value=float(X[col].mean()) if not pd.isna(X[col].mean()) else 0.0
	)
	input_data[col] = val
	# Categorical inputs
	for col in categorical_cols:
	val = st.sidebar.selectbox(
	f"{col}",
	options=sorted(X[col].dropna().unique())
	)
	input_data[col] = val
	return pd.DataFrame([input_data])

	input_df = user_input_features()

	# -----------------------------
	# Make Prediction
	# -----------------------------
	if st.button("Predict"):
	prediction = pipeline.predict(input_df)[0]
	prediction_proba = pipeline.predict_proba(input_df)[0][1]

	st.subheader("Prediction Result")

	if prediction == 1:
	st.error("⚠️ Cancer is Present")
	else:
	st.success("✅ No Cancer")

	# -----------------------------
	# Additional Info
	# -----------------------------