Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- app.py +173 -0
- model/best_model.pkl +3 -0
- model/preprocessor.pkl +3 -0
- requirements.txt +10 -0
app.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import joblib
|
7 |
+
from sklearn.compose import ColumnTransformer
|
8 |
+
from sklearn.preprocessing import RobustScaler
|
9 |
+
from lightgbm import LGBMRegressor
|
10 |
+
|
11 |
+
class RULPredictionApp:
|
12 |
+
def __init__(self):
|
13 |
+
# Load the trained model and preprocessor
|
14 |
+
self.model = joblib.load('model/best_model.pkl')
|
15 |
+
self.preprocessor = joblib.load('model/preprocessor.pkl')
|
16 |
+
self.df = pd.read_csv('dataset/ccm_rul_dataset.csv')
|
17 |
+
self.df_Arm500 = self.preprocess_data()
|
18 |
+
|
19 |
+
def preprocess_data(self):
|
20 |
+
df = self.df.copy()
|
21 |
+
|
22 |
+
# Combine date and time columns into single datetime columns
|
23 |
+
df['time_temperature_measurement'] = df['date'] + " " + df['time_temperature_measurement1']
|
24 |
+
df['sample_time_continuous_caster'] = df['date'] + " " + df['sample_time_continuous_caster']
|
25 |
+
|
26 |
+
# Convert to datetime
|
27 |
+
df['sample_time_continuous_caster'] = pd.to_datetime(df['sample_time_continuous_caster'])
|
28 |
+
df['date'] = pd.to_datetime(df['date'])
|
29 |
+
|
30 |
+
# Define relevant columns
|
31 |
+
cols = ['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement',
|
32 |
+
'workpiece_weight, tonn', 'cast_in_row', 'workpiece_slice_geometry',
|
33 |
+
'alloy_type', 'steel_weight_theoretical, tonn', 'metal_residue_grab1, tonn',
|
34 |
+
'steel_weight, tonn', 'resistance, tonn', 'swing_frequency, amount/minute',
|
35 |
+
'crystallizer_movement, mm', 'alloy_speed, meter/minute',
|
36 |
+
'water_consumption, liter/minute', 'water_temperature_delta, Celsius deg.',
|
37 |
+
'Ce, %', 'C, %', 'Si, %', 'Mn,%', 'S, %', 'P, %', 'Cr, %', 'Ni, %',
|
38 |
+
'Cu, %', 'As, %', 'Mo, %', 'Nb, %', 'Ti, %', 'V, %', 'Al, %', 'Ca, %',
|
39 |
+
'N, %', 'Pb, %', 'Mg, %', 'Zn, %', 'sleeve', 'num_crystallizer', 'num_stream', 'RUL']
|
40 |
+
|
41 |
+
df = df[cols]
|
42 |
+
|
43 |
+
# Map categorical columns to numeric
|
44 |
+
df['workpiece_slice_geometry'] = df['workpiece_slice_geometry'].map({'150x150': 1, '180x180': 0})
|
45 |
+
df.rename(columns={'workpiece_slice_geometry': 'workpiece_slice_geometry_150x150'}, inplace=True)
|
46 |
+
df['alloy_type'] = df['alloy_type'].map({'open': 1, 'close': 0})
|
47 |
+
df.rename(columns={'alloy_type': 'alloy_type_open'}, inplace=True)
|
48 |
+
|
49 |
+
# Correct specific 'sleeve' value and convert to integer
|
50 |
+
df.loc[df['sleeve'] == '530314 К', 'sleeve'] = '540314000'
|
51 |
+
df['sleeve'] = df['sleeve'].astype(int)
|
52 |
+
|
53 |
+
# Drop rows with any missing values
|
54 |
+
df.dropna(axis=0, inplace=True)
|
55 |
+
|
56 |
+
# Filter dataset for steel type "Arm500"
|
57 |
+
df_Arm500 = df[df['steel_type'] == "Arm500"]
|
58 |
+
df_Arm500 = df_Arm500.drop(labels=['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement'], axis=1)
|
59 |
+
return df_Arm500
|
60 |
+
|
61 |
+
def get_input_features(self):
|
62 |
+
x = self.df_Arm500.drop(['RUL'], axis=1)
|
63 |
+
input_features = {}
|
64 |
+
|
65 |
+
# Create sidebar inputs for each feature
|
66 |
+
for feature in x.columns:
|
67 |
+
input_features[feature] = st.sidebar.number_input(feature, value=float(x[feature].mean()))
|
68 |
+
|
69 |
+
return pd.DataFrame([input_features])
|
70 |
+
|
71 |
+
def predict_rul(self, input_df):
|
72 |
+
# Transform input data using preprocessor and predict RUL
|
73 |
+
input_df_transformed = pd.DataFrame(self.preprocessor.transform(input_df), columns=input_df.columns)
|
74 |
+
prediction = self.model.predict(input_df_transformed)
|
75 |
+
return prediction
|
76 |
+
|
77 |
+
def plot_correlation_matrix(self):
|
78 |
+
st.header("Correlation Matrix")
|
79 |
+
st.write("The correlation matrix shows the correlation coefficients between the variables. It helps to understand the linear relationships between features.")
|
80 |
+
|
81 |
+
# Calculate correlation matrix and plot heatmap
|
82 |
+
corr_matrix = self.df_Arm500.corr()
|
83 |
+
plt.figure(figsize=(15, 10))
|
84 |
+
sns.heatmap(corr_matrix, cmap='coolwarm', center=0, annot=True, fmt=".2f")
|
85 |
+
st.pyplot(plt)
|
86 |
+
|
87 |
+
def plot_feature_importance(self):
|
88 |
+
st.header("Feature Importance")
|
89 |
+
st.write("The feature importance plot shows the relative importance of each feature in predicting the RUL. Higher values indicate more important features.")
|
90 |
+
|
91 |
+
# Get feature importance from model and plot
|
92 |
+
x = self.df_Arm500.drop(['RUL'], axis=1)
|
93 |
+
feature_importance = pd.DataFrame({
|
94 |
+
'feature': x.columns,
|
95 |
+
'importance': self.model.feature_importances_
|
96 |
+
}).sort_values(by='importance', ascending=True)
|
97 |
+
|
98 |
+
plt.figure(figsize=(10, 8))
|
99 |
+
plt.barh(feature_importance['feature'], feature_importance['importance'], color='blue')
|
100 |
+
plt.xlabel('Importance')
|
101 |
+
plt.ylabel('Feature')
|
102 |
+
plt.title('Feature Importance')
|
103 |
+
st.pyplot(plt)
|
104 |
+
|
105 |
+
def plot_additional_graphs(self):
|
106 |
+
st.header("Additional Graphs")
|
107 |
+
|
108 |
+
# Plot RUL Distribution
|
109 |
+
st.subheader("RUL Distribution")
|
110 |
+
st.write("The distribution plot shows the distribution of Remaining Useful Life (RUL) in the dataset.")
|
111 |
+
plt.figure(figsize=(10, 6))
|
112 |
+
sns.histplot(self.df_Arm500['RUL'], kde=True, color='blue')
|
113 |
+
plt.title("RUL Distribution")
|
114 |
+
st.pyplot(plt)
|
115 |
+
|
116 |
+
# Plot pairplot of selected features
|
117 |
+
st.subheader("Pairplot of Selected Features")
|
118 |
+
st.write("The pairplot shows the pairwise relationships between selected features. It helps to visualize the distribution and relationships between features.")
|
119 |
+
x = self.df_Arm500.drop(['RUL'], axis=1)
|
120 |
+
selected_features = st.multiselect('Select features for pairplot', list(x.columns), default=list(x.columns)[:5])
|
121 |
+
if len(selected_features) > 1:
|
122 |
+
sns.pairplot(self.df_Arm500[selected_features])
|
123 |
+
st.pyplot(plt)
|
124 |
+
|
125 |
+
def plot_actual_vs_prediction(self):
|
126 |
+
st.header("Actual vs Predicted RUL")
|
127 |
+
st.write("The scatter plot shows the relationship between actual RUL and predicted RUL. A line is added for reference, where points close to the line indicate accurate predictions.")
|
128 |
+
|
129 |
+
# Predict RUL on whole dataset and plot actual vs predicted
|
130 |
+
x = self.df_Arm500.drop(['RUL'], axis=1)
|
131 |
+
y = self.df_Arm500['RUL']
|
132 |
+
y_pred = self.model.predict(pd.DataFrame(self.preprocessor.transform(x), columns=x.columns))
|
133 |
+
|
134 |
+
plt.figure(figsize=(10, 6))
|
135 |
+
plt.scatter(y, y_pred, alpha=0.5, color='green')
|
136 |
+
plt.xlabel("Actual RUL")
|
137 |
+
plt.ylabel("Predicted RUL")
|
138 |
+
plt.title("Actual vs Predicted RUL")
|
139 |
+
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2) # Add a line for reference
|
140 |
+
st.pyplot(plt)
|
141 |
+
|
142 |
+
def run(self):
|
143 |
+
st.title("RUL Prediction and Data Visualization")
|
144 |
+
|
145 |
+
# Sidebar for user input
|
146 |
+
st.sidebar.header("Input Features")
|
147 |
+
input_df = self.get_input_features()
|
148 |
+
|
149 |
+
# Predict RUL with progress bar
|
150 |
+
if st.sidebar.button("Predict RUL"):
|
151 |
+
with st.spinner('Predicting...'):
|
152 |
+
prediction = self.predict_rul(input_df)
|
153 |
+
st.success(f"Predicted RUL: **{prediction[0]:.2f}**")
|
154 |
+
|
155 |
+
# Checkboxes for visualizations
|
156 |
+
if st.sidebar.checkbox("Show Correlation Matrix"):
|
157 |
+
self.plot_correlation_matrix()
|
158 |
+
if st.sidebar.checkbox("Show Feature Importance"):
|
159 |
+
self.plot_feature_importance()
|
160 |
+
if st.sidebar.checkbox("Show Additional Graphs"):
|
161 |
+
self.plot_additional_graphs()
|
162 |
+
if st.sidebar.checkbox("Show Actual vs Predicted RUL"):
|
163 |
+
self.plot_actual_vs_prediction()
|
164 |
+
|
165 |
+
# Display Data with scrollable head
|
166 |
+
st.subheader("Data Preview")
|
167 |
+
st.write("The data preview shows the first 5 rows of the dataset. You can scroll to view more columns.")
|
168 |
+
st.dataframe(self.df_Arm500, height=200)
|
169 |
+
|
170 |
+
|
171 |
+
if __name__ == "__main__":
|
172 |
+
app = RULPredictionApp()
|
173 |
+
app.run()
|
model/best_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf536692eaa7d3ff409b98a8bd6be464813b3a402dcc43b2c923fcea733f3ce6
|
3 |
+
size 285924
|
model/preprocessor.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bfaa3b5147c67b512890f9097acdc2ef8cc9624f6fd959f3fc9f5ae657cb1dd
|
3 |
+
size 4606
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas==2.0.1
|
2 |
+
numpy==1.24.3
|
3 |
+
matplotlib==3.7.1
|
4 |
+
seaborn==0.12.2
|
5 |
+
scikit-learn==1.2.2
|
6 |
+
xgboost==1.7.5
|
7 |
+
lightgbm==4.0.0
|
8 |
+
streamlit
|
9 |
+
scikit-learn
|
10 |
+
joblib
|