Spaces:
Sleeping
Sleeping
File size: 4,538 Bytes
21904aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import streamlit as st
import joblib
import numpy as np
import pandas as pd
# --- 1. Load Model and Dataset for Feature Information ---
@st.cache_data
def load_data_and_model():
"""
Loads the saved model and the dataset from the Excel file.
Using st.cache_data to avoid reloading on every interaction.
"""
try:
# Load the pre-trained Voting Classifier model
model = joblib.load('voting_classifier_model.joblib')
except FileNotFoundError:
st.error("The model file 'voting_classifier_model.joblib' was not found.")
st.info("Please ensure the model file is in the same directory as this script.")
st.stop()
try:
# Load your specific dataset to get feature names and default values
df = pd.read_excel('breast-cancer.xls')
# Assuming the first column is 'id' and the second is 'diagnosis' (the target)
# The rest are the features.
feature_names = df.columns[2:].tolist()
# Store the dataframe for calculating min/max/mean values for sliders
feature_data = df[feature_names]
except FileNotFoundError:
st.error("The dataset file 'breast-cancer.xls' was not found.")
st.info("Please ensure your Excel file is in the same directory as this script.")
st.stop()
except Exception as e:
st.error(f"Could not load or process the dataset file. Error: {e}")
st.stop()
return model, feature_names, feature_data
model, feature_names, feature_data = load_data_and_model()
# --- 2. Streamlit App Interface ---
st.set_page_config(page_title="Breast Cancer Predictor", layout="wide")
# Main Title
st.title("🔬 Breast Cancer Prediction Interface")
st.markdown("""
This application uses your pre-trained model to predict whether a breast tumor is **Malignant** or **Benign**.
The input fields below are based on the columns from your `breast-cancer.xls` file.
""")
st.write("---")
# --- 3. User Input via Sliders ---
st.sidebar.header("Input Tumor Features")
st.sidebar.markdown("Use the sliders to provide the feature values.")
# Dictionary to hold the user's input
input_features = {}
# Create sliders for all features based on your Excel file
for feature in feature_names:
# Set min/max/default values from the actual data for better usability
min_val = float(feature_data[feature].min())
max_val = float(feature_data[feature].max())
mean_val = float(feature_data[feature].mean())
# Create a slider for each feature
input_features[feature] = st.sidebar.slider(
label=f"{feature.replace('_', ' ').title()}",
min_value=min_val,
max_value=max_val,
value=mean_val,
key=f"slider_{feature}"
)
st.sidebar.write("---")
# --- 4. Prediction Logic ---
# Convert the dictionary of input features into a NumPy array
# The order of features must match the order in the feature_names list
input_data = np.array([list(input_features.values())])
# Main section for displaying inputs and results
st.header("Prediction Results")
col1, col2 = st.columns([2, 1])
with col1:
st.subheader("Current Input Values")
st.json(input_features)
# "Predict" button
if st.button("✨ Predict Diagnosis", key="predict_button"):
try:
# Make prediction. This returns the string label directly (e.g., 'M' or 'B').
prediction_label = model.predict(input_data)[0]
# Get prediction probabilities. The order corresponds to model.classes_
prediction_proba = model.predict_proba(input_data)[0]
with col2:
st.subheader("Diagnosis")
# Display the predicted label directly
# We check for 'M' or 'B' as is common in this dataset
if prediction_label.upper() == 'M':
st.error("Predicted Diagnosis: **Malignant**")
else:
st.success("Predicted Diagnosis: **Benign**")
st.subheader("Prediction Confidence")
# Get the class labels from the model itself to ensure correct order
class_labels = list(model.classes_)
# Display probabilities for each class using the model's class order
for i, label in enumerate(class_labels):
display_label = "Malignant" if label.upper() == 'M' else "Benign"
st.write(f"Confidence for **{display_label}**: `{prediction_proba[i]:.2%}`")
except Exception as e:
st.error(f"An error occurred during prediction: {e}")
st.write("---") |