Spaces:
Sleeping
Sleeping
Create multi_env_compare.py
Browse files- multi_env_compare.py +162 -0
multi_env_compare.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import numpy as np
|
4 |
+
from pre import preprocess_uploaded_file
|
5 |
+
from difflib import SequenceMatcher
|
6 |
+
|
7 |
+
def similar(a, b, threshold=0.9):
|
8 |
+
return SequenceMatcher(None, a, b).ratio() > threshold
|
9 |
+
|
10 |
+
def perform_multi_env_analysis(uploaded_dataframes):
|
11 |
+
# Concatenate all dataframes into a single dataframe
|
12 |
+
combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
|
13 |
+
|
14 |
+
# Get unique environments and functional areas
|
15 |
+
unique_environments = combined_data['Environment'].unique()
|
16 |
+
unique_areas = np.append(combined_data['Functional area'].unique(), "All")
|
17 |
+
|
18 |
+
# Select environments to display
|
19 |
+
selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments)
|
20 |
+
|
21 |
+
# Initialize session state for selected functional areas if it doesn't exist
|
22 |
+
if 'selected_functional_areas' not in st.session_state:
|
23 |
+
st.session_state.selected_functional_areas = ["All"]
|
24 |
+
|
25 |
+
# Select functional areas to display, using session state
|
26 |
+
selected_functional_areas = st.multiselect(
|
27 |
+
"Select functional areas",
|
28 |
+
unique_areas,
|
29 |
+
default=st.session_state.selected_functional_areas
|
30 |
+
)
|
31 |
+
|
32 |
+
# Update session state with the new selection
|
33 |
+
st.session_state.selected_functional_areas = selected_functional_areas
|
34 |
+
|
35 |
+
if "All" in selected_functional_areas:
|
36 |
+
selected_functional_areas = combined_data['Functional area'].unique()
|
37 |
+
|
38 |
+
# Filter data based on selected environments and functional areas
|
39 |
+
filtered_data = combined_data[
|
40 |
+
(combined_data['Environment'].isin(selected_environments)) &
|
41 |
+
(combined_data['Functional area'].isin(selected_functional_areas))
|
42 |
+
]
|
43 |
+
|
44 |
+
# Group data by Environment, Functional area, Scenario name, and Status
|
45 |
+
grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)
|
46 |
+
|
47 |
+
# Ensure 'PASSED' and 'FAILED' columns exist
|
48 |
+
if 'PASSED' not in grouped_data.columns:
|
49 |
+
grouped_data['PASSED'] = 0
|
50 |
+
if 'FAILED' not in grouped_data.columns:
|
51 |
+
grouped_data['FAILED'] = 0
|
52 |
+
|
53 |
+
# Calculate total scenarios
|
54 |
+
grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
|
55 |
+
|
56 |
+
# Reset index to make Environment, Functional area, and Scenario name as columns
|
57 |
+
grouped_data = grouped_data.reset_index()
|
58 |
+
|
59 |
+
# Reorder columns
|
60 |
+
grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]
|
61 |
+
|
62 |
+
# Display the grouped data
|
63 |
+
st.write("### Scenario Counts by Environment and Functional Area")
|
64 |
+
# st.dataframe(grouped_data.style.highlight_max(axis=0, subset=['Total', 'PASSED', 'FAILED']))
|
65 |
+
|
66 |
+
# Display summary statistics
|
67 |
+
st.write("### Summary Statistics")
|
68 |
+
summary = grouped_data.groupby('Environment').agg({
|
69 |
+
'Total': 'sum',
|
70 |
+
'PASSED': 'sum',
|
71 |
+
'FAILED': 'sum'
|
72 |
+
}).reset_index()
|
73 |
+
|
74 |
+
# Add column names as the first row
|
75 |
+
summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True)
|
76 |
+
|
77 |
+
# Display the DataFrame
|
78 |
+
st.dataframe(summary_with_headers)
|
79 |
+
# Define scenarios_by_env here
|
80 |
+
scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}
|
81 |
+
|
82 |
+
# Debug: Print the number of scenarios in each environment
|
83 |
+
for env, scenarios in scenarios_by_env.items():
|
84 |
+
st.write(f"Number of scenarios in {env}: {len(scenarios)}")
|
85 |
+
|
86 |
+
missing_scenarios = []
|
87 |
+
mismatched_scenarios = []
|
88 |
+
|
89 |
+
# New section for efficient inconsistency analysis
|
90 |
+
st.write("### Inconsistent Scenario Count Analysis by Functional Area")
|
91 |
+
|
92 |
+
if len(selected_environments) > 1:
|
93 |
+
# Group data by Environment and Functional area, count scenarios
|
94 |
+
scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)
|
95 |
+
|
96 |
+
# Calculate the difference between max and min counts for each functional area
|
97 |
+
count_diff = scenario_counts.max() - scenario_counts.min()
|
98 |
+
|
99 |
+
# Sort functional areas by count difference, descending
|
100 |
+
inconsistent_areas = count_diff.sort_values(ascending=False)
|
101 |
+
|
102 |
+
st.write("Functional areas with inconsistent scenario counts across environments:")
|
103 |
+
for area, diff in inconsistent_areas.items():
|
104 |
+
if diff > 0:
|
105 |
+
st.write(f"- {area}: Difference of {diff} scenarios")
|
106 |
+
st.write(scenario_counts[area])
|
107 |
+
st.write("\n")
|
108 |
+
|
109 |
+
# Option to show detailed breakdown
|
110 |
+
if st.checkbox("Show detailed scenario count breakdown"):
|
111 |
+
st.write(scenario_counts)
|
112 |
+
|
113 |
+
else:
|
114 |
+
st.write("Please select at least two environments for comparison.")
|
115 |
+
|
116 |
+
# Debug: Print the number of missing and mismatched scenarios
|
117 |
+
st.write(f"Number of truly missing scenarios: {len(missing_scenarios)}")
|
118 |
+
st.write(f"Number of scenarios with name differences: {len(mismatched_scenarios)}")
|
119 |
+
|
120 |
+
if missing_scenarios:
|
121 |
+
st.write("### Truly Missing Scenarios")
|
122 |
+
missing_df = pd.DataFrame(missing_scenarios)
|
123 |
+
st.dataframe(missing_df)
|
124 |
+
else:
|
125 |
+
st.write("No truly missing scenarios found across environments.")
|
126 |
+
|
127 |
+
if mismatched_scenarios:
|
128 |
+
st.write("### Scenarios with Name Differences")
|
129 |
+
mismatched_df = pd.DataFrame(mismatched_scenarios)
|
130 |
+
st.dataframe(mismatched_df)
|
131 |
+
else:
|
132 |
+
st.write("No scenarios with name differences found across environments.")
|
133 |
+
|
134 |
+
def multi_env_compare_main():
|
135 |
+
st.title("Multi-Environment Comparison")
|
136 |
+
|
137 |
+
# Get the number of environments from the user
|
138 |
+
num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
|
139 |
+
|
140 |
+
# Initialize list to store uploaded dataframes
|
141 |
+
uploaded_dataframes = []
|
142 |
+
|
143 |
+
# Loop through the number of environments and create file uploaders
|
144 |
+
for i in range(num_environments):
|
145 |
+
uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
|
146 |
+
|
147 |
+
for uploaded_file in uploaded_files:
|
148 |
+
# Preprocess the uploaded CSV file
|
149 |
+
data = preprocess_uploaded_file(uploaded_file)
|
150 |
+
|
151 |
+
# Append the dataframe to the list
|
152 |
+
uploaded_dataframes.append(data)
|
153 |
+
|
154 |
+
# Check if any files were uploaded
|
155 |
+
if uploaded_dataframes:
|
156 |
+
# Perform analysis for uploaded data
|
157 |
+
perform_multi_env_analysis(uploaded_dataframes)
|
158 |
+
else:
|
159 |
+
st.write("Please upload at least one CSV file.")
|
160 |
+
|
161 |
+
if __name__ == "__main__":
|
162 |
+
multi_env_compare_main()
|