Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.ensemble import IsolationForest | |
| from sklearn.preprocessing import StandardScaler | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import torch | |
| from huggingface_hub import login | |
| import os | |
| st.set_page_config(page_title="Smart Factory RAG Assistant", layout="wide") | |
| st.title("π Industry 5.0 | FactoryGPT") | |
| # Load open-access model (Zephyr) | |
| def load_model(): | |
| tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "HuggingFaceH4/zephyr-7b-beta", | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| return pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| nlp = load_model() | |
| # File Upload | |
| uploaded_file = st.file_uploader("π Upload your factory CSV data", type=["csv"]) | |
| if uploaded_file: | |
| df = pd.read_csv(uploaded_file) | |
| st.success("β File uploaded and loaded!") | |
| # Data Summary | |
| st.subheader("π Data Summary") | |
| st.write(f"Number of rows: {df.shape[0]}") | |
| st.write(f"Number of columns: {df.shape[1]}") | |
| st.write("Column types:") | |
| st.dataframe(df.dtypes.astype(str).rename("Type")) | |
| # Descriptive Stats | |
| st.subheader("π Descriptive Statistics") | |
| st.dataframe(df.describe().T) | |
| # Correlation Analysis | |
| st.subheader("π Parameter Correlation Heatmap") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| corr = df.corr(numeric_only=True) | |
| sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", ax=ax) | |
| st.pyplot(fig) | |
| # Technical Visualizations | |
| st.subheader("π Technical Graphs") | |
| numeric_columns = df.select_dtypes(include='number').columns.tolist() | |
| # Time Series Plot | |
| selected_graph_column = st.selectbox("Select a parameter for time series plot", numeric_columns) | |
| time_column = st.selectbox("Select time/index column (optional)", ['Index'] + df.columns.tolist(), index=0) | |
| fig2, ax2 = plt.subplots(figsize=(10, 4)) | |
| if time_column != 'Index': | |
| try: | |
| df[time_column] = pd.to_datetime(df[time_column]) | |
| df_sorted = df.sort_values(by=time_column) | |
| ax2.plot(df_sorted[time_column], df_sorted[selected_graph_column]) | |
| ax2.set_xlabel(time_column) | |
| except: | |
| ax2.plot(df[selected_graph_column]) | |
| ax2.set_xlabel("Index") | |
| else: | |
| ax2.plot(df[selected_graph_column]) | |
| ax2.set_xlabel("Index") | |
| ax2.set_title(f"Trend Over Time: {selected_graph_column}") | |
| ax2.set_ylabel(selected_graph_column) | |
| st.pyplot(fig2) | |
| # Pairplot | |
| if len(numeric_columns) > 1: | |
| st.subheader("π Pairwise Parameter Relationships") | |
| sampled_df = df[numeric_columns].sample(n=100, random_state=1) if len(df) > 100 else df[numeric_columns] | |
| pair_fig = sns.pairplot(sampled_df) | |
| st.pyplot(pair_fig) | |
| # Boxplots | |
| st.subheader("π Distribution & Outliers per Parameter") | |
| selected_box_column = st.selectbox("Select parameter for boxplot", numeric_columns) | |
| fig3, ax3 = plt.subplots() | |
| sns.boxplot(y=df[selected_box_column], ax=ax3) | |
| ax3.set_title(f"Boxplot: {selected_box_column}") | |
| st.pyplot(fig3) | |
| # Anomaly Detection | |
| st.subheader("β οΈ Anomaly Detection using Isolation Forest") | |
| num_df = df.select_dtypes(include='number').dropna() | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(num_df) | |
| iso = IsolationForest(contamination=0.05) | |
| df['Anomaly'] = iso.fit_predict(X_scaled) | |
| anomalies = df[df['Anomaly'] == -1] | |
| st.write(f"Detected {len(anomalies)} anomalies") | |
| st.dataframe(anomalies.head(10)) | |
| # Role-based Assistant | |
| st.subheader("π§ Role-Based Decision Assistant") | |
| role = st.selectbox("Select your role", ["Engineer", "Operator"]) | |
| predefined_qas = { | |
| "Engineer": [ | |
| "Which parameters are showing strong correlation?", | |
| "Are there any indicators of potential equipment failure?", | |
| "How should we optimize process efficiency based on anomalies?" | |
| ], | |
| "Operator": [ | |
| "What is the most critical parameter to monitor today?", | |
| "Do any sensors show abnormal values?", | |
| "What immediate steps should I take due to anomalies?" | |
| ] | |
| } | |
| predefined_q = st.selectbox("Choose a predefined question (optional)", ["None"] + predefined_qas[role]) | |
| manual_q = st.text_input("Or type your own question below:") | |
| question = manual_q if manual_q.strip() else (predefined_q if predefined_q != "None" else "") | |
| if question: | |
| with st.spinner("Generating insights..."): | |
| summary = df.describe().round(2).to_string() | |
| corr_text = corr.round(2).to_string() | |
| anomaly_count = len(anomalies) | |
| context = f""" | |
| You are a highly skilled {role} working in a smart manufacturing facility. | |
| Here is a summary of the uploaded data: | |
| STATISTICS: | |
| {summary} | |
| CORRELATIONS: | |
| {corr_text} | |
| ANOMALIES: {anomaly_count} rows flagged. | |
| QUESTION: {question} | |
| Provide a short, focused response in your role. | |
| """ | |
| prompt = f"<s>[INST] {context} [/INST]" | |
| output = nlp(prompt, max_new_tokens=250, do_sample=True, temperature=0.5)[0]['generated_text'] | |
| if '[/INST]' in output: | |
| answer = output.split('[/INST]')[-1].strip() | |
| else: | |
| answer = output.strip() | |
| st.success("β Recommendation:") | |
| st.markdown(f"**{answer}**") | |
| else: | |
| st.info("π Please upload a factory CSV data file to begin analysis.") |