File size: 4,538 Bytes
21904aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import streamlit as st
import joblib
import numpy as np
import pandas as pd

# --- 1. Load Model and Dataset for Feature Information ---

@st.cache_data
def load_data_and_model():
    """
    Loads the saved model and the dataset from the Excel file.
    Using st.cache_data to avoid reloading on every interaction.
    """
    try:
        # Load the pre-trained Voting Classifier model
        model = joblib.load('voting_classifier_model.joblib')
    except FileNotFoundError:
        st.error("The model file 'voting_classifier_model.joblib' was not found.")
        st.info("Please ensure the model file is in the same directory as this script.")
        st.stop()

    try:
        # Load your specific dataset to get feature names and default values
        df = pd.read_excel('breast-cancer.xls')
        # Assuming the first column is 'id' and the second is 'diagnosis' (the target)
        # The rest are the features.
        feature_names = df.columns[2:].tolist()
        
        # Store the dataframe for calculating min/max/mean values for sliders
        feature_data = df[feature_names]

    except FileNotFoundError:
        st.error("The dataset file 'breast-cancer.xls' was not found.")
        st.info("Please ensure your Excel file is in the same directory as this script.")
        st.stop()
    except Exception as e:
        st.error(f"Could not load or process the dataset file. Error: {e}")
        st.stop()
        
    return model, feature_names, feature_data

model, feature_names, feature_data = load_data_and_model()


# --- 2. Streamlit App Interface ---

st.set_page_config(page_title="Breast Cancer Predictor", layout="wide")

# Main Title
st.title("🔬 Breast Cancer Prediction Interface")
st.markdown("""
This application uses your pre-trained model to predict whether a breast tumor is **Malignant** or **Benign**. 
The input fields below are based on the columns from your `breast-cancer.xls` file.
""")

st.write("---")


# --- 3. User Input via Sliders ---

st.sidebar.header("Input Tumor Features")
st.sidebar.markdown("Use the sliders to provide the feature values.")

# Dictionary to hold the user's input
input_features = {}

# Create sliders for all features based on your Excel file
for feature in feature_names:
    # Set min/max/default values from the actual data for better usability
    min_val = float(feature_data[feature].min())
    max_val = float(feature_data[feature].max())
    mean_val = float(feature_data[feature].mean())

    # Create a slider for each feature
    input_features[feature] = st.sidebar.slider(
        label=f"{feature.replace('_', ' ').title()}",
        min_value=min_val,
        max_value=max_val,
        value=mean_val,
        key=f"slider_{feature}"
    )

st.sidebar.write("---")


# --- 4. Prediction Logic ---

# Convert the dictionary of input features into a NumPy array
# The order of features must match the order in the feature_names list
input_data = np.array([list(input_features.values())])

# Main section for displaying inputs and results
st.header("Prediction Results")
col1, col2 = st.columns([2, 1])

with col1:
    st.subheader("Current Input Values")
    st.json(input_features)

# "Predict" button
if st.button("✨ Predict Diagnosis", key="predict_button"):
    try:
        # Make prediction. This returns the string label directly (e.g., 'M' or 'B').
        prediction_label = model.predict(input_data)[0]

        # Get prediction probabilities. The order corresponds to model.classes_
        prediction_proba = model.predict_proba(input_data)[0]

        with col2:
            st.subheader("Diagnosis")
            # Display the predicted label directly
            # We check for 'M' or 'B' as is common in this dataset
            if prediction_label.upper() == 'M':
                st.error("Predicted Diagnosis: **Malignant**")
            else:
                st.success("Predicted Diagnosis: **Benign**")

            st.subheader("Prediction Confidence")
            # Get the class labels from the model itself to ensure correct order
            class_labels = list(model.classes_)
            
            # Display probabilities for each class using the model's class order
            for i, label in enumerate(class_labels):
                display_label = "Malignant" if label.upper() == 'M' else "Benign"
                st.write(f"Confidence for **{display_label}**: `{prediction_proba[i]:.2%}`")

    except Exception as e:
        st.error(f"An error occurred during prediction: {e}")

st.write("---")