BananaSauce commited on
Commit
62b007e
·
verified ·
1 Parent(s): 853735a

Create multi_env_compare.py

Browse files
Files changed (1) hide show
  1. multi_env_compare.py +162 -0
multi_env_compare.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import numpy as np
4
+ from pre import preprocess_uploaded_file
5
+ from difflib import SequenceMatcher
6
+
7
+ def similar(a, b, threshold=0.9):
8
+ return SequenceMatcher(None, a, b).ratio() > threshold
9
+
10
+ def perform_multi_env_analysis(uploaded_dataframes):
11
+ # Concatenate all dataframes into a single dataframe
12
+ combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
13
+
14
+ # Get unique environments and functional areas
15
+ unique_environments = combined_data['Environment'].unique()
16
+ unique_areas = np.append(combined_data['Functional area'].unique(), "All")
17
+
18
+ # Select environments to display
19
+ selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments)
20
+
21
+ # Initialize session state for selected functional areas if it doesn't exist
22
+ if 'selected_functional_areas' not in st.session_state:
23
+ st.session_state.selected_functional_areas = ["All"]
24
+
25
+ # Select functional areas to display, using session state
26
+ selected_functional_areas = st.multiselect(
27
+ "Select functional areas",
28
+ unique_areas,
29
+ default=st.session_state.selected_functional_areas
30
+ )
31
+
32
+ # Update session state with the new selection
33
+ st.session_state.selected_functional_areas = selected_functional_areas
34
+
35
+ if "All" in selected_functional_areas:
36
+ selected_functional_areas = combined_data['Functional area'].unique()
37
+
38
+ # Filter data based on selected environments and functional areas
39
+ filtered_data = combined_data[
40
+ (combined_data['Environment'].isin(selected_environments)) &
41
+ (combined_data['Functional area'].isin(selected_functional_areas))
42
+ ]
43
+
44
+ # Group data by Environment, Functional area, Scenario name, and Status
45
+ grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)
46
+
47
+ # Ensure 'PASSED' and 'FAILED' columns exist
48
+ if 'PASSED' not in grouped_data.columns:
49
+ grouped_data['PASSED'] = 0
50
+ if 'FAILED' not in grouped_data.columns:
51
+ grouped_data['FAILED'] = 0
52
+
53
+ # Calculate total scenarios
54
+ grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
55
+
56
+ # Reset index to make Environment, Functional area, and Scenario name as columns
57
+ grouped_data = grouped_data.reset_index()
58
+
59
+ # Reorder columns
60
+ grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]
61
+
62
+ # Display the grouped data
63
+ st.write("### Scenario Counts by Environment and Functional Area")
64
+ # st.dataframe(grouped_data.style.highlight_max(axis=0, subset=['Total', 'PASSED', 'FAILED']))
65
+
66
+ # Display summary statistics
67
+ st.write("### Summary Statistics")
68
+ summary = grouped_data.groupby('Environment').agg({
69
+ 'Total': 'sum',
70
+ 'PASSED': 'sum',
71
+ 'FAILED': 'sum'
72
+ }).reset_index()
73
+
74
+ # Add column names as the first row
75
+ summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True)
76
+
77
+ # Display the DataFrame
78
+ st.dataframe(summary_with_headers)
79
+ # Define scenarios_by_env here
80
+ scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}
81
+
82
+ # Debug: Print the number of scenarios in each environment
83
+ for env, scenarios in scenarios_by_env.items():
84
+ st.write(f"Number of scenarios in {env}: {len(scenarios)}")
85
+
86
+ missing_scenarios = []
87
+ mismatched_scenarios = []
88
+
89
+ # New section for efficient inconsistency analysis
90
+ st.write("### Inconsistent Scenario Count Analysis by Functional Area")
91
+
92
+ if len(selected_environments) > 1:
93
+ # Group data by Environment and Functional area, count scenarios
94
+ scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)
95
+
96
+ # Calculate the difference between max and min counts for each functional area
97
+ count_diff = scenario_counts.max() - scenario_counts.min()
98
+
99
+ # Sort functional areas by count difference, descending
100
+ inconsistent_areas = count_diff.sort_values(ascending=False)
101
+
102
+ st.write("Functional areas with inconsistent scenario counts across environments:")
103
+ for area, diff in inconsistent_areas.items():
104
+ if diff > 0:
105
+ st.write(f"- {area}: Difference of {diff} scenarios")
106
+ st.write(scenario_counts[area])
107
+ st.write("\n")
108
+
109
+ # Option to show detailed breakdown
110
+ if st.checkbox("Show detailed scenario count breakdown"):
111
+ st.write(scenario_counts)
112
+
113
+ else:
114
+ st.write("Please select at least two environments for comparison.")
115
+
116
+ # Debug: Print the number of missing and mismatched scenarios
117
+ st.write(f"Number of truly missing scenarios: {len(missing_scenarios)}")
118
+ st.write(f"Number of scenarios with name differences: {len(mismatched_scenarios)}")
119
+
120
+ if missing_scenarios:
121
+ st.write("### Truly Missing Scenarios")
122
+ missing_df = pd.DataFrame(missing_scenarios)
123
+ st.dataframe(missing_df)
124
+ else:
125
+ st.write("No truly missing scenarios found across environments.")
126
+
127
+ if mismatched_scenarios:
128
+ st.write("### Scenarios with Name Differences")
129
+ mismatched_df = pd.DataFrame(mismatched_scenarios)
130
+ st.dataframe(mismatched_df)
131
+ else:
132
+ st.write("No scenarios with name differences found across environments.")
133
+
134
+ def multi_env_compare_main():
135
+ st.title("Multi-Environment Comparison")
136
+
137
+ # Get the number of environments from the user
138
+ num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
139
+
140
+ # Initialize list to store uploaded dataframes
141
+ uploaded_dataframes = []
142
+
143
+ # Loop through the number of environments and create file uploaders
144
+ for i in range(num_environments):
145
+ uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
146
+
147
+ for uploaded_file in uploaded_files:
148
+ # Preprocess the uploaded CSV file
149
+ data = preprocess_uploaded_file(uploaded_file)
150
+
151
+ # Append the dataframe to the list
152
+ uploaded_dataframes.append(data)
153
+
154
+ # Check if any files were uploaded
155
+ if uploaded_dataframes:
156
+ # Perform analysis for uploaded data
157
+ perform_multi_env_analysis(uploaded_dataframes)
158
+ else:
159
+ st.write("Please upload at least one CSV file.")
160
+
161
+ if __name__ == "__main__":
162
+ multi_env_compare_main()