Spaces:

bacancydataprophets
/

RUL

Sleeping

App Files Files Community

kothariyashhh commited on Jul 3, 2024

Commit

4b132af

verified ·

1 Parent(s): 08b841e

Upload 4 files

Browse files

Files changed (4) hide show

app.py +173 -0
model/best_model.pkl +3 -0
model/preprocessor.pkl +3 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import joblib
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import RobustScaler
+from lightgbm import LGBMRegressor
+class RULPredictionApp:
+    def __init__(self):
+        # Load the trained model and preprocessor
+        self.model = joblib.load('model/best_model.pkl')
+        self.preprocessor = joblib.load('model/preprocessor.pkl')
+        self.df = pd.read_csv('dataset/ccm_rul_dataset.csv')
+        self.df_Arm500 = self.preprocess_data()
+    def preprocess_data(self):
+        df = self.df.copy()
+        # Combine date and time columns into single datetime columns
+        df['time_temperature_measurement'] = df['date'] + " " + df['time_temperature_measurement1']
+        df['sample_time_continuous_caster'] = df['date'] + " " + df['sample_time_continuous_caster']
+        # Convert to datetime
+        df['sample_time_continuous_caster'] = pd.to_datetime(df['sample_time_continuous_caster'])
+        df['date'] = pd.to_datetime(df['date'])
+        # Define relevant columns
+        cols = ['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement',
+                'workpiece_weight, tonn', 'cast_in_row', 'workpiece_slice_geometry',
+                'alloy_type', 'steel_weight_theoretical, tonn', 'metal_residue_grab1, tonn',
+                'steel_weight, tonn', 'resistance, tonn', 'swing_frequency, amount/minute',
+                'crystallizer_movement, mm', 'alloy_speed, meter/minute',
+                'water_consumption, liter/minute', 'water_temperature_delta, Celsius deg.',
+                'Ce, %', 'C, %', 'Si, %', 'Mn,%', 'S, %', 'P, %', 'Cr, %', 'Ni, %',
+                'Cu, %', 'As, %', 'Mo, %', 'Nb, %', 'Ti, %', 'V, %', 'Al, %', 'Ca, %',
+                'N, %', 'Pb, %', 'Mg, %', 'Zn, %', 'sleeve', 'num_crystallizer', 'num_stream', 'RUL']
+        df = df[cols]
+        # Map categorical columns to numeric
+        df['workpiece_slice_geometry'] = df['workpiece_slice_geometry'].map({'150x150': 1, '180x180': 0})
+        df.rename(columns={'workpiece_slice_geometry': 'workpiece_slice_geometry_150x150'}, inplace=True)
+        df['alloy_type'] = df['alloy_type'].map({'open': 1, 'close': 0})
+        df.rename(columns={'alloy_type': 'alloy_type_open'}, inplace=True)
+        # Correct specific 'sleeve' value and convert to integer
+        df.loc[df['sleeve'] == '530314 К', 'sleeve'] = '540314000'
+        df['sleeve'] = df['sleeve'].astype(int)
+        # Drop rows with any missing values
+        df.dropna(axis=0, inplace=True)
+        # Filter dataset for steel type "Arm500"
+        df_Arm500 = df[df['steel_type'] == "Arm500"]
+        df_Arm500 = df_Arm500.drop(labels=['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement'], axis=1)
+        return df_Arm500
+    def get_input_features(self):
+        x = self.df_Arm500.drop(['RUL'], axis=1)
+        input_features = {}
+        # Create sidebar inputs for each feature
+        for feature in x.columns:
+            input_features[feature] = st.sidebar.number_input(feature, value=float(x[feature].mean()))
+        return pd.DataFrame([input_features])
+    def predict_rul(self, input_df):
+        # Transform input data using preprocessor and predict RUL
+        input_df_transformed = pd.DataFrame(self.preprocessor.transform(input_df), columns=input_df.columns)
+        prediction = self.model.predict(input_df_transformed)
+        return prediction
+    def plot_correlation_matrix(self):
+        st.header("Correlation Matrix")
+        st.write("The correlation matrix shows the correlation coefficients between the variables. It helps to understand the linear relationships between features.")
+        # Calculate correlation matrix and plot heatmap
+        corr_matrix = self.df_Arm500.corr()
+        plt.figure(figsize=(15, 10))
+        sns.heatmap(corr_matrix, cmap='coolwarm', center=0, annot=True, fmt=".2f")
+        st.pyplot(plt)
+    def plot_feature_importance(self):
+        st.header("Feature Importance")
+        st.write("The feature importance plot shows the relative importance of each feature in predicting the RUL. Higher values indicate more important features.")
+        # Get feature importance from model and plot
+        x = self.df_Arm500.drop(['RUL'], axis=1)
+        feature_importance = pd.DataFrame({
+            'feature': x.columns,
+            'importance': self.model.feature_importances_
+        }).sort_values(by='importance', ascending=True)
+        plt.figure(figsize=(10, 8))
+        plt.barh(feature_importance['feature'], feature_importance['importance'], color='blue')
+        plt.xlabel('Importance')
+        plt.ylabel('Feature')
+        plt.title('Feature Importance')
+        st.pyplot(plt)
+    def plot_additional_graphs(self):
+        st.header("Additional Graphs")
+        # Plot RUL Distribution
+        st.subheader("RUL Distribution")
+        st.write("The distribution plot shows the distribution of Remaining Useful Life (RUL) in the dataset.")
+        plt.figure(figsize=(10, 6))
+        sns.histplot(self.df_Arm500['RUL'], kde=True, color='blue')
+        plt.title("RUL Distribution")
+        st.pyplot(plt)
+        # Plot pairplot of selected features
+        st.subheader("Pairplot of Selected Features")
+        st.write("The pairplot shows the pairwise relationships between selected features. It helps to visualize the distribution and relationships between features.")
+        x = self.df_Arm500.drop(['RUL'], axis=1)
+        selected_features = st.multiselect('Select features for pairplot', list(x.columns), default=list(x.columns)[:5])
+        if len(selected_features) > 1:
+            sns.pairplot(self.df_Arm500[selected_features])
+            st.pyplot(plt)
+    def plot_actual_vs_prediction(self):
+        st.header("Actual vs Predicted RUL")
+        st.write("The scatter plot shows the relationship between actual RUL and predicted RUL. A line is added for reference, where points close to the line indicate accurate predictions.")
+        # Predict RUL on whole dataset and plot actual vs predicted
+        x = self.df_Arm500.drop(['RUL'], axis=1)
+        y = self.df_Arm500['RUL']
+        y_pred = self.model.predict(pd.DataFrame(self.preprocessor.transform(x), columns=x.columns))
+        plt.figure(figsize=(10, 6))
+        plt.scatter(y, y_pred, alpha=0.5, color='green')
+        plt.xlabel("Actual RUL")
+        plt.ylabel("Predicted RUL")
+        plt.title("Actual vs Predicted RUL")
+        plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)  # Add a line for reference
+        st.pyplot(plt)
+    def run(self):
+        st.title("RUL Prediction and Data Visualization")
+        # Sidebar for user input
+        st.sidebar.header("Input Features")
+        input_df = self.get_input_features()
+        # Predict RUL with progress bar
+        if st.sidebar.button("Predict RUL"):
+            with st.spinner('Predicting...'):
+                prediction = self.predict_rul(input_df)
+            st.success(f"Predicted RUL: **{prediction[0]:.2f}**")
+        # Checkboxes for visualizations
+        if st.sidebar.checkbox("Show Correlation Matrix"):
+            self.plot_correlation_matrix()
+        if st.sidebar.checkbox("Show Feature Importance"):
+            self.plot_feature_importance()
+        if st.sidebar.checkbox("Show Additional Graphs"):
+            self.plot_additional_graphs()
+        if st.sidebar.checkbox("Show Actual vs Predicted RUL"):
+            self.plot_actual_vs_prediction()
+        # Display Data with scrollable head
+        st.subheader("Data Preview")
+        st.write("The data preview shows the first 5 rows of the dataset. You can scroll to view more columns.")
+        st.dataframe(self.df_Arm500, height=200)
+if __name__ == "__main__":
+    app = RULPredictionApp()
+    app.run()

model/best_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf536692eaa7d3ff409b98a8bd6be464813b3a402dcc43b2c923fcea733f3ce6
+size 285924

model/preprocessor.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bfaa3b5147c67b512890f9097acdc2ef8cc9624f6fd959f3fc9f5ae657cb1dd
+size 4606

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+pandas==2.0.1
+numpy==1.24.3
+matplotlib==3.7.1
+seaborn==0.12.2
+scikit-learn==1.2.2
+xgboost==1.7.5
+lightgbm==4.0.0
+streamlit
+scikit-learn
+joblib