Spaces:
Build error
Build error
| import pandas as pd | |
| import streamlit as st | |
| import numpy as np | |
| from pre import preprocess_uploaded_file | |
| from difflib import SequenceMatcher | |
| import time | |
| def similar(a, b, threshold=0.9): | |
| return SequenceMatcher(None, a, b).ratio() > threshold | |
| def find_different_scenarios(grouped_data, area): | |
| # Filter data for the specific functional area | |
| area_data = grouped_data[grouped_data['Functional area'] == area] | |
| # Get scenarios for each environment | |
| scenarios_by_env = {env: set(area_data[area_data['Environment'] == env]['Scenario Name']) | |
| for env in area_data['Environment'].unique()} | |
| # Find scenarios that are in one environment but not the other | |
| diff_scenarios = [] | |
| envs = list(scenarios_by_env.keys()) | |
| for i in range(len(envs)): | |
| for j in range(i+1, len(envs)): | |
| env1, env2 = envs[i], envs[j] | |
| diff = scenarios_by_env[env1] ^ scenarios_by_env[env2] # symmetric difference | |
| for scenario in diff: | |
| if scenario in scenarios_by_env[env1]: | |
| diff_scenarios.append((scenario, env1, 'Present', env2, 'Missing')) | |
| else: | |
| diff_scenarios.append((scenario, env2, 'Present', env1, 'Missing')) | |
| return diff_scenarios | |
| def perform_multi_env_analysis(uploaded_dataframes): | |
| # Concatenate all dataframes into a single dataframe | |
| combined_data = pd.concat(uploaded_dataframes, ignore_index=True) | |
| # Get unique environments and functional areas | |
| unique_environments = combined_data['Environment'].unique() | |
| unique_areas = np.append(combined_data['Functional area'].unique(), "All") | |
| # Select environments to display | |
| selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments) | |
| # Initialize session state for selected functional areas if it doesn't exist | |
| if 'selected_functional_areas' not in st.session_state: | |
| st.session_state.selected_functional_areas = ["All"] | |
| # Make sure functional_areas_multiselect is also initialized | |
| if 'functional_areas_multiselect' not in st.session_state: | |
| st.session_state.functional_areas_multiselect = st.session_state.selected_functional_areas | |
| # Select functional areas to display, using session state | |
| selected_functional_areas = st.multiselect( | |
| "Select functional areas", | |
| unique_areas, | |
| default=st.session_state.selected_functional_areas, | |
| key="functional_areas_multiselect" | |
| ) | |
| # Add a button to confirm the selection | |
| if st.button("Confirm Functional Area Selection"): | |
| # Update session state with the new selection | |
| st.session_state.selected_functional_areas = selected_functional_areas | |
| st.success("Functional area selection updated!") | |
| time.sleep(0.5) # Add a small delay for better user experience | |
| st.rerun() # Rerun the app to reflect the changes | |
| if "All" in selected_functional_areas: | |
| selected_functional_areas = combined_data['Functional area'].unique() | |
| # Filter data based on selected environments and functional areas | |
| filtered_data = combined_data[ | |
| (combined_data['Environment'].isin(selected_environments)) & | |
| (combined_data['Functional area'].isin(selected_functional_areas)) | |
| ] | |
| # Group data by Environment, Functional area, Scenario Name, and Status | |
| grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario Name', 'Status']).size().unstack(fill_value=0) | |
| # Ensure 'PASSED' and 'FAILED' columns exist | |
| if 'PASSED' not in grouped_data.columns: | |
| grouped_data['PASSED'] = 0 | |
| if 'FAILED' not in grouped_data.columns: | |
| grouped_data['FAILED'] = 0 | |
| # Calculate total scenarios | |
| grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED'] | |
| # Reset index to make Environment, Functional area, and Scenario Name as columns | |
| grouped_data = grouped_data.reset_index() | |
| # Reorder columns | |
| grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario Name', 'Total', 'PASSED', 'FAILED']] | |
| # Display summary statistics | |
| st.write("### Summary Statistics") | |
| summary = grouped_data.groupby('Environment').agg({ | |
| 'Total': 'sum', | |
| 'PASSED': 'sum', | |
| 'FAILED': 'sum' | |
| }).reset_index() | |
| # Add column names as the first row | |
| summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True) | |
| # Display the DataFrame | |
| st.dataframe(summary_with_headers) | |
| # Define scenarios_by_env here | |
| scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario Name']) for env in selected_environments} | |
| missing_scenarios = [] | |
| mismatched_scenarios = [] | |
| st.write("### Inconsistent Scenario Count Analysis by Functional Area") | |
| if len(selected_environments) > 1: | |
| # Group data by Environment and Functional area, count scenarios | |
| scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario Name'].nunique().unstack(fill_value=0) | |
| # Calculate the difference between max and min counts for each functional area | |
| count_diff = scenario_counts.max() - scenario_counts.min() | |
| # Sort functional areas by count difference, descending | |
| inconsistent_areas = count_diff.sort_values(ascending=False) | |
| st.write("Functional areas with inconsistent scenario counts across environments:") | |
| for area, diff in inconsistent_areas.items(): | |
| if diff > 0: | |
| st.write(f"- {area}: Difference of {diff} scenarios") | |
| st.write(scenario_counts[area]) | |
| st.write("\n") | |
| # Option to show detailed breakdown with a unique key | |
| if st.checkbox("Show detailed scenario count breakdown", key="show_detailed_breakdown"): | |
| st.write(scenario_counts) | |
| # Add a selectbox for choosing the functional area to analyze | |
| selected_area = st.selectbox("Select a functional area to analyze:", | |
| options=[area for area, diff in inconsistent_areas.items() if diff > 0]) | |
| if selected_area: | |
| st.write(f"### Detailed Analysis of Different Scenarios for '{selected_area}'") | |
| # Get scenarios for each environment | |
| scenarios_by_env = {env: set(filtered_data[(filtered_data['Environment'] == env) & | |
| (filtered_data['Functional area'] == selected_area)]['Scenario Name']) | |
| for env in selected_environments} | |
| # Find scenarios that are different between environments | |
| all_scenarios = set.union(*scenarios_by_env.values()) | |
| diff_scenarios = [scenario for scenario in all_scenarios | |
| if any(scenario not in env_scenarios for env_scenarios in scenarios_by_env.values())] | |
| # Create a DataFrame to show presence/absence of scenarios | |
| diff_df = pd.DataFrame(index=diff_scenarios, columns=selected_environments) | |
| for scenario in diff_scenarios: | |
| for env in selected_environments: | |
| diff_df.at[scenario, env] = 'Present' if scenario in scenarios_by_env[env] else 'Missing' | |
| diff_df.reset_index(inplace=True) | |
| diff_df.rename(columns={'index': 'Scenario'}, inplace=True) | |
| # Sort the DataFrame to show scenarios with differences first | |
| diff_df['has_diff'] = diff_df.apply(lambda row: len(set(row[1:])) > 1, axis=1) | |
| diff_df = diff_df.sort_values('has_diff', ascending=False).drop('has_diff', axis=1) | |
| st.write(f"Number of scenarios that differ between environments: {len(diff_scenarios)}") | |
| # Display the DataFrame | |
| st.dataframe(diff_df) | |
| # Provide a download button for the DataFrame | |
| csv = diff_df.to_csv(index=False) | |
| st.download_button( | |
| label="Download CSV", | |
| data=csv, | |
| file_name=f"{selected_area}_scenario_comparison.csv", | |
| mime="text/csv", | |
| ) | |
| else: | |
| st.write("Please select at least two environments for comparison.") | |
| def multi_env_compare_main(): | |
| st.title("Multi-Environment Comparison") | |
| # Get the number of environments from the user | |
| num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1) | |
| # Initialize list to store uploaded dataframes | |
| uploaded_dataframes = [] | |
| # Loop through the number of environments and create file uploaders | |
| for i in range(num_environments): | |
| uploaded_files = st.file_uploader(f"Upload CSV or XLSX files for Environment {i + 1}", type=["csv", "xlsx"], accept_multiple_files=True) | |
| for uploaded_file in uploaded_files: | |
| # Preprocess the uploaded file | |
| data = preprocess_uploaded_file(uploaded_file) | |
| # Append the dataframe to the list | |
| uploaded_dataframes.append(data) | |
| # Check if any files were uploaded | |
| if uploaded_dataframes: | |
| # Perform analysis for uploaded data | |
| perform_multi_env_analysis(uploaded_dataframes) | |
| else: | |
| st.write("Please upload at least one file.") | |
| if __name__ == "__main__": | |
| multi_env_compare_main() |