Testys commited on
Commit
5638701
·
verified ·
1 Parent(s): 25ff0c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -48
app.py CHANGED
@@ -1,56 +1,74 @@
1
  import streamlit as st
2
  import joblib
3
  import numpy as np
4
- import pandas as pd
5
 
6
- # --- 1. Load Model and Dataset for Feature Information ---
7
 
8
- @st.cache_data
9
- def load_data_and_model():
10
  """
11
- Loads the saved model and the dataset from the Excel file.
12
- Using st.cache_data to avoid reloading on every interaction.
13
  """
14
  try:
15
  # Load the pre-trained Voting Classifier model
16
  model = joblib.load('voting_classifier_model.joblib')
 
17
  except FileNotFoundError:
18
  st.error("The model file 'voting_classifier_model.joblib' was not found.")
19
  st.info("Please ensure the model file is in the same directory as this script.")
20
  st.stop()
21
-
22
- try:
23
- # Load your specific dataset to get feature names and default values
24
- df = pd.read_excel('breast-cancer.xls')
25
- # Assuming the first column is 'id' and the second is 'diagnosis' (the target)
26
- # The rest are the features.
27
- feature_names = df.columns[2:].tolist()
28
-
29
- # Store the dataframe for calculating min/max/mean values for sliders
30
- feature_data = df[feature_names]
31
-
32
- except FileNotFoundError:
33
- st.error("The dataset file 'breast-cancer.xls' was not found.")
34
- st.info("Please ensure your Excel file is in the same directory as this script.")
35
- st.stop()
36
  except Exception as e:
37
- st.error(f"Could not load or process the dataset file. Error: {e}")
38
  st.stop()
39
-
40
- return model, feature_names, feature_data
41
 
42
- model, feature_names, feature_data = load_data_and_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
  # --- 2. Streamlit App Interface ---
46
 
47
- st.set_page_config(page_title="Breast Cancer Predictor", layout="wide")
48
 
49
  # Main Title
50
- st.title("🔬 Breast Cancer Prediction Interface")
51
  st.markdown("""
52
- This application uses your pre-trained model to predict whether a breast tumor is **Malignant** or **Benign**.
53
- The input fields below are based on the columns from your `breast-cancer.xls` file.
54
  """)
55
 
56
  st.write("---")
@@ -64,19 +82,14 @@ st.sidebar.markdown("Use the sliders to provide the feature values.")
64
  # Dictionary to hold the user's input
65
  input_features = {}
66
 
67
- # Create sliders for all features based on your Excel file
68
- for feature in feature_names:
69
- # Set min/max/default values from the actual data for better usability
70
- min_val = float(feature_data[feature].min())
71
- max_val = float(feature_data[feature].max())
72
- mean_val = float(feature_data[feature].mean())
73
-
74
- # Create a slider for each feature
75
  input_features[feature] = st.sidebar.slider(
76
  label=f"{feature.replace('_', ' ').title()}",
77
- min_value=min_val,
78
- max_value=max_val,
79
- value=mean_val,
80
  key=f"slider_{feature}"
81
  )
82
 
@@ -86,7 +99,6 @@ st.sidebar.write("---")
86
  # --- 4. Prediction Logic ---
87
 
88
  # Convert the dictionary of input features into a NumPy array
89
- # The order of features must match the order in the feature_names list
90
  input_data = np.array([list(input_features.values())])
91
 
92
  # Main section for displaying inputs and results
@@ -100,17 +112,16 @@ with col1:
100
  # "Predict" button
101
  if st.button("✨ Predict Diagnosis", key="predict_button"):
102
  try:
103
- # Make prediction. This returns the string label directly (e.g., 'M' or 'B').
104
  prediction_label = model.predict(input_data)[0]
105
 
106
- # Get prediction probabilities. The order corresponds to model.classes_
107
  prediction_proba = model.predict_proba(input_data)[0]
108
 
109
  with col2:
110
  st.subheader("Diagnosis")
111
- # Display the predicted label directly
112
- # We check for 'M' or 'B' as is common in this dataset
113
- if prediction_label.upper() == 'M':
114
  st.error("Predicted Diagnosis: **Malignant**")
115
  else:
116
  st.success("Predicted Diagnosis: **Benign**")
@@ -119,9 +130,9 @@ if st.button("✨ Predict Diagnosis", key="predict_button"):
119
  # Get the class labels from the model itself to ensure correct order
120
  class_labels = list(model.classes_)
121
 
122
- # Display probabilities for each class using the model's class order
123
  for i, label in enumerate(class_labels):
124
- display_label = "Malignant" if label.upper() == 'M' else "Benign"
125
  st.write(f"Confidence for **{display_label}**: `{prediction_proba[i]:.2%}`")
126
 
127
  except Exception as e:
 
1
  import streamlit as st
2
  import joblib
3
  import numpy as np
 
4
 
5
+ # --- 1. Load Model and Define Feature Information ---
6
 
7
+ @st.cache_resource
8
+ def load_model():
9
  """
10
+ Loads the saved model. Using st.cache_resource to load the model only once.
 
11
  """
12
  try:
13
  # Load the pre-trained Voting Classifier model
14
  model = joblib.load('voting_classifier_model.joblib')
15
+ return model
16
  except FileNotFoundError:
17
  st.error("The model file 'voting_classifier_model.joblib' was not found.")
18
  st.info("Please ensure the model file is in the same directory as this script.")
19
  st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  except Exception as e:
21
+ st.error(f"An error occurred while loading the model: {e}")
22
  st.stop()
 
 
23
 
24
+ model = load_model()
25
+
26
+ # --- Hardcoded Feature Information ---
27
+ # We define the feature names and their typical ranges (min, mean, max)
28
+ # This removes the need to load the original dataset file.
29
+ FEATURE_INFO = {
30
+ 'radius_mean': [6.98, 14.12, 28.11],
31
+ 'texture_mean': [9.71, 19.28, 39.28],
32
+ 'perimeter_mean': [43.79, 91.96, 188.5],
33
+ 'area_mean': [143.5, 654.8, 2501.0],
34
+ 'smoothness_mean': [0.05, 0.09, 0.16],
35
+ 'compactness_mean': [0.01, 0.10, 0.34],
36
+ 'concavity_mean': [0.0, 0.08, 0.42],
37
+ 'concave points_mean': [0.0, 0.04, 0.20],
38
+ 'symmetry_mean': [0.10, 0.18, 0.30],
39
+ 'fractal_dimension_mean': [0.04, 0.06, 0.09],
40
+ 'radius_se': [0.11, 0.40, 2.87],
41
+ 'texture_se': [0.36, 1.21, 4.88],
42
+ 'perimeter_se': [0.75, 2.86, 21.98],
43
+ 'area_se': [6.80, 40.33, 542.2],
44
+ 'smoothness_se': [0.001, 0.007, 0.031],
45
+ 'compactness_se': [0.002, 0.025, 0.135],
46
+ 'concavity_se': [0.0, 0.031, 0.396],
47
+ 'concave points_se': [0.0, 0.011, 0.052],
48
+ 'symmetry_se': [0.007, 0.020, 0.078],
49
+ 'fractal_dimension_se': [0.0008, 0.003, 0.029],
50
+ 'radius_worst': [7.93, 16.26, 36.04],
51
+ 'texture_worst': [12.02, 25.67, 49.54],
52
+ 'perimeter_worst': [50.41, 107.26, 251.2],
53
+ 'area_worst': [185.2, 880.5, 4254.0],
54
+ 'smoothness_worst': [0.07, 0.13, 0.22],
55
+ 'compactness_worst': [0.02, 0.25, 1.05],
56
+ 'concavity_worst': [0.0, 0.27, 1.25],
57
+ 'concave points_worst': [0.0, 0.11, 0.29],
58
+ 'symmetry_worst': [0.15, 0.29, 0.66],
59
+ 'fractal_dimension_worst': [0.05, 0.08, 0.20]
60
+ }
61
+ feature_names = list(FEATURE_INFO.keys())
62
 
63
 
64
  # --- 2. Streamlit App Interface ---
65
 
66
+ st.set_page_config(page_title="Breast Cancer Diagnosis System", layout="wide")
67
 
68
  # Main Title
69
+ st.title("🔬 Breast Cancer Diagnosis System Interface")
70
  st.markdown("""
71
+ This application uses a pre-trained model to predict whether a breast tumor is **Malignant** or **Benign**.
 
72
  """)
73
 
74
  st.write("---")
 
82
  # Dictionary to hold the user's input
83
  input_features = {}
84
 
85
+ # Create sliders for all features based on the hardcoded info
86
+ for feature, values in FEATURE_INFO.items():
87
+ min_val, mean_val, max_val = values
 
 
 
 
 
88
  input_features[feature] = st.sidebar.slider(
89
  label=f"{feature.replace('_', ' ').title()}",
90
+ min_value=float(min_val),
91
+ max_value=float(max_val),
92
+ value=float(mean_val), # Default to the mean value
93
  key=f"slider_{feature}"
94
  )
95
 
 
99
  # --- 4. Prediction Logic ---
100
 
101
  # Convert the dictionary of input features into a NumPy array
 
102
  input_data = np.array([list(input_features.values())])
103
 
104
  # Main section for displaying inputs and results
 
112
  # "Predict" button
113
  if st.button("✨ Predict Diagnosis", key="predict_button"):
114
  try:
115
+ # Make prediction
116
  prediction_label = model.predict(input_data)[0]
117
 
118
+ # Get prediction probabilities
119
  prediction_proba = model.predict_proba(input_data)[0]
120
 
121
  with col2:
122
  st.subheader("Diagnosis")
123
+ # Display the predicted label
124
+ if str(prediction_label).upper() == 'M':
 
125
  st.error("Predicted Diagnosis: **Malignant**")
126
  else:
127
  st.success("Predicted Diagnosis: **Benign**")
 
130
  # Get the class labels from the model itself to ensure correct order
131
  class_labels = list(model.classes_)
132
 
133
+ # Display probabilities for each class
134
  for i, label in enumerate(class_labels):
135
+ display_label = "Malignant" if str(label).upper() == 'M' else "Benign"
136
  st.write(f"Confidence for **{display_label}**: `{prediction_proba[i]:.2%}`")
137
 
138
  except Exception as e: