acmc commited on
Commit
fb238c8
Β·
verified Β·
1 Parent(s): 414ab61

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +205 -16
streamlit_app.py CHANGED
@@ -12,17 +12,179 @@ import plotly.express as px
12
  import plotly.graph_objects as go
13
  from plotly.subplots import make_subplots
14
  import warnings
 
 
15
  warnings.filterwarnings('ignore')
16
 
17
- # Import visualization utilities
18
- from visualization.utils import (
19
- load_and_prepare_dataset,
20
- get_available_turn_metrics,
21
- get_human_friendly_metric_name,
22
- clean_metric_values,
23
- PLOT_PALETTE,
24
- setup_plot_style
25
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Setup page config
28
  st.set_page_config(
@@ -113,6 +275,11 @@ def main():
113
  if not data_loaded:
114
  st.stop()
115
 
 
 
 
 
 
116
  # Sidebar controls
117
  st.sidebar.header("πŸŽ›οΈ Controls")
118
 
@@ -127,13 +294,32 @@ def main():
127
 
128
  # Role filter
129
  if 'turn.role' in df_exploded.columns:
130
- roles = df_exploded['turn.role'].unique()
131
- selected_roles = st.sidebar.multiselect(
132
- "Select Roles",
133
- options=roles,
134
- default=roles,
135
- help="Filter by turn role"
136
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  else:
138
  selected_roles = None
139
 
@@ -303,6 +489,9 @@ def main():
303
 
304
  if selected_roles and 'turn.role' in filtered_df_exploded.columns:
305
  filtered_df_exploded = filtered_df_exploded[filtered_df_exploded['turn.role'].isin(selected_roles)]
 
 
 
306
 
307
  # Main content tabs
308
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Distributions", "πŸ”— Correlations", "πŸ“ˆ Comparisons", "🎯 Details"])
 
12
  import plotly.graph_objects as go
13
  from plotly.subplots import make_subplots
14
  import warnings
15
+ import datasets
16
+ import logging
17
  warnings.filterwarnings('ignore')
18
 
19
+ # Configure logging
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Constants
24
+ PLOT_PALETTE = {
25
+ "jailbreak": "#D000D8", # Purple
26
+ "benign": "#008393", # Cyan
27
+ "control": "#EF0000", # Red
28
+ }
29
+
30
+ # Utility functions
31
+ def load_and_prepare_dataset(dataset_config):
32
+ """Load the risky conversations dataset and prepare it for analysis."""
33
+ logger.info("Loading dataset...")
34
+
35
+ dataset_name = dataset_config["dataset_name"]
36
+ logger.info(f"Loading dataset: {dataset_name}")
37
+
38
+ # Load the dataset
39
+ dataset = datasets.load_dataset(dataset_name, split="train")
40
+ logger.info(f"Dataset loaded with {len(dataset)} conversations")
41
+
42
+ # Convert to pandas
43
+ pandas_dataset = dataset.to_pandas()
44
+
45
+ # Explode the conversation column
46
+ pandas_dataset_exploded = pandas_dataset.explode("conversation")
47
+ pandas_dataset_exploded = pandas_dataset_exploded.reset_index(drop=True)
48
+
49
+ # Normalize conversation data
50
+ conversations_unfolded = pd.json_normalize(
51
+ pandas_dataset_exploded["conversation"],
52
+ )
53
+ conversations_unfolded = conversations_unfolded.add_prefix("turn.")
54
+
55
+ # Ensure there's a 'conversation_metrics' column, even if empty
56
+ if "conversation_metrics" not in pandas_dataset_exploded.columns:
57
+ pandas_dataset_exploded["conversation_metrics"] = [{}] * len(
58
+ pandas_dataset_exploded
59
+ )
60
+
61
+ # Normalize conversation metrics
62
+ conversations_metrics_unfolded = pd.json_normalize(
63
+ pandas_dataset_exploded["conversation_metrics"]
64
+ )
65
+ conversations_metrics_unfolded = conversations_metrics_unfolded.add_prefix(
66
+ "conversation_metrics."
67
+ )
68
+
69
+ # Concatenate all dataframes
70
+ pandas_dataset_exploded = pd.concat(
71
+ [
72
+ pandas_dataset_exploded.drop(
73
+ columns=["conversation", "conversation_metrics"]
74
+ ),
75
+ conversations_unfolded,
76
+ conversations_metrics_unfolded,
77
+ ],
78
+ axis=1,
79
+ )
80
+
81
+ logger.info(f"Dataset prepared with {len(pandas_dataset_exploded)} turns")
82
+ return pandas_dataset, pandas_dataset_exploded
83
+
84
+ def get_available_turn_metrics(dataset_exploded):
85
+ """Dynamically discover all available turn metrics from the dataset."""
86
+ # Find all columns that contain turn metrics
87
+ turn_metric_columns = [
88
+ col for col in dataset_exploded.columns if col.startswith("turn.turn_metrics.")
89
+ ]
90
+
91
+ # Extract the metric names by removing the prefix
92
+ metrics = [col.replace("turn.turn_metrics.", "") for col in turn_metric_columns]
93
+
94
+ # Sort metrics for consistent ordering
95
+ metrics = sorted(metrics)
96
+
97
+ logger.info(f"Discovered {len(metrics)} available turn metrics")
98
+ return metrics
99
+
100
+ def get_human_friendly_metric_name(metric_name):
101
+ """Convert a metric name to a human-friendly display name."""
102
+ # Mapping of metric names to human-friendly display names
103
+ metric_display_names = {
104
+ # Length metrics
105
+ "conversational_length_tokens": "Length in Tokens",
106
+ "conversational_length_words": "Length in Words",
107
+ "conversational_length_bytes": "Length in Bytes",
108
+
109
+ # Readability metrics
110
+ "readability_flesch_score": "Flesch Reading Ease",
111
+ "readability_standard_score": "Reading Grade Level",
112
+
113
+ # Compression metrics
114
+ "lzw_compression": "LZW Compression Ratio",
115
+
116
+ # Language model metrics
117
+ "ll_mean_logprobs": "Mean Log Probability",
118
+ "ll_max_logprobs": "Maximum Log Probability",
119
+ "ll_min_logprobs": "Minimum Log Probability",
120
+ "rll_mean_logprobs": "Reverse Mean Log Probability",
121
+ "rll_max_logprobs": "Reverse Maximum Log Probability",
122
+ "rll_min_logprobs": "Reverse Minimum Log Probability",
123
+
124
+ # Lexical diversity
125
+ "type_token_ratio": "Type-Token Ratio",
126
+
127
+ # Language distribution
128
+ "zipf_score": "Zipf Distribution Score",
129
+
130
+ # Evaluation metrics
131
+ "rubric_evaluation.Stealth": "Stealth Score",
132
+
133
+ # Working memory metrics
134
+ "wm_total_entity_references": "Total Entity References",
135
+ "wm_unique_entities_count": "Unique Entities Count",
136
+ "wm_named_entity_mentions": "Named Entity Mentions",
137
+ "wm_concept_noun_mentions": "Concept Noun Mentions",
138
+ "wm_pronoun_references": "Pronoun References",
139
+ "wm_entity_density_per_word": "Entity Density per Word",
140
+ "wm_entity_density_per_100_words": "Entity Density per 100 Words",
141
+ "wm_entity_density_per_100_chars": "Entity Density per 100 Characters",
142
+ "wm_entity_diversity_ratio": "Entity Diversity Ratio",
143
+ "wm_entity_repetition_ratio": "Entity Repetition Ratio",
144
+ "wm_cognitive_load_score": "Cognitive Load Score",
145
+ "wm_high_cognitive_load": "High Cognitive Load",
146
+
147
+ # Discourse coherence metrics
148
+ "discourse_coherence_to_next_user": "Coherence to Next User Turn",
149
+ "discourse_coherence_to_next_turn": "Coherence to Next Turn",
150
+ "discourse_mean_user_coherence": "Mean User Coherence",
151
+ "discourse_user_coherence_variance": "User Coherence Variance",
152
+ "discourse_user_topic_drift": "User Topic Drift",
153
+ "discourse_user_entity_continuity": "User Entity Continuity",
154
+ "discourse_num_user_turns": "Number of User Turns",
155
+
156
+ # Tokens per byte
157
+ "tokens_per_byte": "Tokens per Byte",
158
+ }
159
+
160
+ # Check exact match first
161
+ if metric_name in metric_display_names:
162
+ return metric_display_names[metric_name]
163
+
164
+ # Handle conversation-level aggregations
165
+ for suffix in ["_conversation_mean", "_conversation_min", "_conversation_max", "_conversation_std", "_conversation_count"]:
166
+ if metric_name.endswith(suffix):
167
+ base_metric = metric_name[:-len(suffix)]
168
+ if base_metric in metric_display_names:
169
+ agg_type = suffix.split("_")[-1].title()
170
+ return f"{metric_display_names[base_metric]} ({agg_type})"
171
+
172
+ # Handle turn-level metrics with "turn.turn_metrics." prefix
173
+ if metric_name.startswith("turn.turn_metrics."):
174
+ base_metric = metric_name[len("turn.turn_metrics."):]
175
+ if base_metric in metric_display_names:
176
+ return metric_display_names[base_metric]
177
+
178
+ # Fallback: convert underscores to spaces and title case
179
+ clean_name = metric_name
180
+ for prefix in ["turn.turn_metrics.", "conversation_metrics.", "turn_metrics."]:
181
+ if clean_name.startswith(prefix):
182
+ clean_name = clean_name[len(prefix):]
183
+ break
184
+
185
+ # Convert to human-readable format
186
+ clean_name = clean_name.replace("_", " ").title()
187
+ return clean_name
188
 
189
  # Setup page config
190
  st.set_page_config(
 
275
  if not data_loaded:
276
  st.stop()
277
 
278
+ # Check if we have data after filtering
279
+ if len(filtered_df_exploded) == 0:
280
+ st.error("No data available with current filters. Please adjust your selection.")
281
+ st.stop()
282
+
283
  # Sidebar controls
284
  st.sidebar.header("πŸŽ›οΈ Controls")
285
 
 
294
 
295
  # Role filter
296
  if 'turn.role' in df_exploded.columns:
297
+ roles = df_exploded['turn.role'].dropna().unique()
298
+ # Assert only user and assistant roles exist
299
+ expected_roles = {'user', 'assistant'}
300
+ actual_roles = set(roles)
301
+ assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
302
+
303
+ st.sidebar.subheader("πŸ‘₯ Role Filter")
304
+ col1, col2 = st.sidebar.columns(2)
305
+
306
+ with col1:
307
+ include_user = st.checkbox("User", value=True, help="Include user turns")
308
+ with col2:
309
+ include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
310
+
311
+ # Build selected roles list
312
+ selected_roles = []
313
+ if include_user and 'user' in roles:
314
+ selected_roles.append('user')
315
+ if include_assistant and 'assistant' in roles:
316
+ selected_roles.append('assistant')
317
+
318
+ # Show selection info
319
+ if selected_roles:
320
+ st.sidebar.success(f"Including: {', '.join(selected_roles)}")
321
+ else:
322
+ st.sidebar.warning("No roles selected")
323
  else:
324
  selected_roles = None
325
 
 
489
 
490
  if selected_roles and 'turn.role' in filtered_df_exploded.columns:
491
  filtered_df_exploded = filtered_df_exploded[filtered_df_exploded['turn.role'].isin(selected_roles)]
492
+ elif selected_roles is not None and len(selected_roles) == 0:
493
+ # If roles exist but none are selected, show empty dataset
494
+ filtered_df_exploded = filtered_df_exploded.iloc[0:0] # Empty dataframe with same structure
495
 
496
  # Main content tabs
497
  tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Distributions", "πŸ”— Correlations", "πŸ“ˆ Comparisons", "🎯 Details"])