kothariyashhh commited on
Commit
4b132af
·
verified ·
1 Parent(s): 08b841e

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +173 -0
  2. model/best_model.pkl +3 -0
  3. model/preprocessor.pkl +3 -0
  4. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import joblib
7
+ from sklearn.compose import ColumnTransformer
8
+ from sklearn.preprocessing import RobustScaler
9
+ from lightgbm import LGBMRegressor
10
+
11
+ class RULPredictionApp:
12
+ def __init__(self):
13
+ # Load the trained model and preprocessor
14
+ self.model = joblib.load('model/best_model.pkl')
15
+ self.preprocessor = joblib.load('model/preprocessor.pkl')
16
+ self.df = pd.read_csv('dataset/ccm_rul_dataset.csv')
17
+ self.df_Arm500 = self.preprocess_data()
18
+
19
+ def preprocess_data(self):
20
+ df = self.df.copy()
21
+
22
+ # Combine date and time columns into single datetime columns
23
+ df['time_temperature_measurement'] = df['date'] + " " + df['time_temperature_measurement1']
24
+ df['sample_time_continuous_caster'] = df['date'] + " " + df['sample_time_continuous_caster']
25
+
26
+ # Convert to datetime
27
+ df['sample_time_continuous_caster'] = pd.to_datetime(df['sample_time_continuous_caster'])
28
+ df['date'] = pd.to_datetime(df['date'])
29
+
30
+ # Define relevant columns
31
+ cols = ['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement',
32
+ 'workpiece_weight, tonn', 'cast_in_row', 'workpiece_slice_geometry',
33
+ 'alloy_type', 'steel_weight_theoretical, tonn', 'metal_residue_grab1, tonn',
34
+ 'steel_weight, tonn', 'resistance, tonn', 'swing_frequency, amount/minute',
35
+ 'crystallizer_movement, mm', 'alloy_speed, meter/minute',
36
+ 'water_consumption, liter/minute', 'water_temperature_delta, Celsius deg.',
37
+ 'Ce, %', 'C, %', 'Si, %', 'Mn,%', 'S, %', 'P, %', 'Cr, %', 'Ni, %',
38
+ 'Cu, %', 'As, %', 'Mo, %', 'Nb, %', 'Ti, %', 'V, %', 'Al, %', 'Ca, %',
39
+ 'N, %', 'Pb, %', 'Mg, %', 'Zn, %', 'sleeve', 'num_crystallizer', 'num_stream', 'RUL']
40
+
41
+ df = df[cols]
42
+
43
+ # Map categorical columns to numeric
44
+ df['workpiece_slice_geometry'] = df['workpiece_slice_geometry'].map({'150x150': 1, '180x180': 0})
45
+ df.rename(columns={'workpiece_slice_geometry': 'workpiece_slice_geometry_150x150'}, inplace=True)
46
+ df['alloy_type'] = df['alloy_type'].map({'open': 1, 'close': 0})
47
+ df.rename(columns={'alloy_type': 'alloy_type_open'}, inplace=True)
48
+
49
+ # Correct specific 'sleeve' value and convert to integer
50
+ df.loc[df['sleeve'] == '530314 К', 'sleeve'] = '540314000'
51
+ df['sleeve'] = df['sleeve'].astype(int)
52
+
53
+ # Drop rows with any missing values
54
+ df.dropna(axis=0, inplace=True)
55
+
56
+ # Filter dataset for steel type "Arm500"
57
+ df_Arm500 = df[df['steel_type'] == "Arm500"]
58
+ df_Arm500 = df_Arm500.drop(labels=['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement'], axis=1)
59
+ return df_Arm500
60
+
61
+ def get_input_features(self):
62
+ x = self.df_Arm500.drop(['RUL'], axis=1)
63
+ input_features = {}
64
+
65
+ # Create sidebar inputs for each feature
66
+ for feature in x.columns:
67
+ input_features[feature] = st.sidebar.number_input(feature, value=float(x[feature].mean()))
68
+
69
+ return pd.DataFrame([input_features])
70
+
71
+ def predict_rul(self, input_df):
72
+ # Transform input data using preprocessor and predict RUL
73
+ input_df_transformed = pd.DataFrame(self.preprocessor.transform(input_df), columns=input_df.columns)
74
+ prediction = self.model.predict(input_df_transformed)
75
+ return prediction
76
+
77
+ def plot_correlation_matrix(self):
78
+ st.header("Correlation Matrix")
79
+ st.write("The correlation matrix shows the correlation coefficients between the variables. It helps to understand the linear relationships between features.")
80
+
81
+ # Calculate correlation matrix and plot heatmap
82
+ corr_matrix = self.df_Arm500.corr()
83
+ plt.figure(figsize=(15, 10))
84
+ sns.heatmap(corr_matrix, cmap='coolwarm', center=0, annot=True, fmt=".2f")
85
+ st.pyplot(plt)
86
+
87
+ def plot_feature_importance(self):
88
+ st.header("Feature Importance")
89
+ st.write("The feature importance plot shows the relative importance of each feature in predicting the RUL. Higher values indicate more important features.")
90
+
91
+ # Get feature importance from model and plot
92
+ x = self.df_Arm500.drop(['RUL'], axis=1)
93
+ feature_importance = pd.DataFrame({
94
+ 'feature': x.columns,
95
+ 'importance': self.model.feature_importances_
96
+ }).sort_values(by='importance', ascending=True)
97
+
98
+ plt.figure(figsize=(10, 8))
99
+ plt.barh(feature_importance['feature'], feature_importance['importance'], color='blue')
100
+ plt.xlabel('Importance')
101
+ plt.ylabel('Feature')
102
+ plt.title('Feature Importance')
103
+ st.pyplot(plt)
104
+
105
+ def plot_additional_graphs(self):
106
+ st.header("Additional Graphs")
107
+
108
+ # Plot RUL Distribution
109
+ st.subheader("RUL Distribution")
110
+ st.write("The distribution plot shows the distribution of Remaining Useful Life (RUL) in the dataset.")
111
+ plt.figure(figsize=(10, 6))
112
+ sns.histplot(self.df_Arm500['RUL'], kde=True, color='blue')
113
+ plt.title("RUL Distribution")
114
+ st.pyplot(plt)
115
+
116
+ # Plot pairplot of selected features
117
+ st.subheader("Pairplot of Selected Features")
118
+ st.write("The pairplot shows the pairwise relationships between selected features. It helps to visualize the distribution and relationships between features.")
119
+ x = self.df_Arm500.drop(['RUL'], axis=1)
120
+ selected_features = st.multiselect('Select features for pairplot', list(x.columns), default=list(x.columns)[:5])
121
+ if len(selected_features) > 1:
122
+ sns.pairplot(self.df_Arm500[selected_features])
123
+ st.pyplot(plt)
124
+
125
+ def plot_actual_vs_prediction(self):
126
+ st.header("Actual vs Predicted RUL")
127
+ st.write("The scatter plot shows the relationship between actual RUL and predicted RUL. A line is added for reference, where points close to the line indicate accurate predictions.")
128
+
129
+ # Predict RUL on whole dataset and plot actual vs predicted
130
+ x = self.df_Arm500.drop(['RUL'], axis=1)
131
+ y = self.df_Arm500['RUL']
132
+ y_pred = self.model.predict(pd.DataFrame(self.preprocessor.transform(x), columns=x.columns))
133
+
134
+ plt.figure(figsize=(10, 6))
135
+ plt.scatter(y, y_pred, alpha=0.5, color='green')
136
+ plt.xlabel("Actual RUL")
137
+ plt.ylabel("Predicted RUL")
138
+ plt.title("Actual vs Predicted RUL")
139
+ plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2) # Add a line for reference
140
+ st.pyplot(plt)
141
+
142
+ def run(self):
143
+ st.title("RUL Prediction and Data Visualization")
144
+
145
+ # Sidebar for user input
146
+ st.sidebar.header("Input Features")
147
+ input_df = self.get_input_features()
148
+
149
+ # Predict RUL with progress bar
150
+ if st.sidebar.button("Predict RUL"):
151
+ with st.spinner('Predicting...'):
152
+ prediction = self.predict_rul(input_df)
153
+ st.success(f"Predicted RUL: **{prediction[0]:.2f}**")
154
+
155
+ # Checkboxes for visualizations
156
+ if st.sidebar.checkbox("Show Correlation Matrix"):
157
+ self.plot_correlation_matrix()
158
+ if st.sidebar.checkbox("Show Feature Importance"):
159
+ self.plot_feature_importance()
160
+ if st.sidebar.checkbox("Show Additional Graphs"):
161
+ self.plot_additional_graphs()
162
+ if st.sidebar.checkbox("Show Actual vs Predicted RUL"):
163
+ self.plot_actual_vs_prediction()
164
+
165
+ # Display Data with scrollable head
166
+ st.subheader("Data Preview")
167
+ st.write("The data preview shows the first 5 rows of the dataset. You can scroll to view more columns.")
168
+ st.dataframe(self.df_Arm500, height=200)
169
+
170
+
171
+ if __name__ == "__main__":
172
+ app = RULPredictionApp()
173
+ app.run()
model/best_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf536692eaa7d3ff409b98a8bd6be464813b3a402dcc43b2c923fcea733f3ce6
3
+ size 285924
model/preprocessor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bfaa3b5147c67b512890f9097acdc2ef8cc9624f6fd959f3fc9f5ae657cb1dd
3
+ size 4606
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.0.1
2
+ numpy==1.24.3
3
+ matplotlib==3.7.1
4
+ seaborn==0.12.2
5
+ scikit-learn==1.2.2
6
+ xgboost==1.7.5
7
+ lightgbm==4.0.0
8
+ streamlit
9
+ scikit-learn
10
+ joblib