acmc commited on
Commit
b442037
Β·
verified Β·
1 Parent(s): 21a08b0

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +311 -212
streamlit_app.py CHANGED
@@ -212,8 +212,8 @@ def main():
212
  st.title("πŸ” Complexity Metrics Explorer")
213
  st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
214
 
215
- # Dataset selection at the top
216
- st.header("πŸ—‚οΈ Dataset Selection")
217
 
218
  # Available datasets
219
  available_datasets = [
@@ -223,36 +223,31 @@ def main():
223
  "Custom..."
224
  ]
225
 
226
- col1, col2 = st.columns([3, 1])
227
-
228
- with col1:
229
- selected_option = st.selectbox(
230
- "Select Dataset",
231
- options=available_datasets,
232
- index=0, # Default to reduced dataset
233
- help="Choose which dataset to analyze",
234
- format_func=lambda x: x.split('/')[-1] if x != "Custom..." else x # Show only the dataset name part
235
- )
236
-
237
- with col2:
238
- # Add refresh button
239
- if st.button("πŸ”„ Refresh Data", help="Clear cache and reload dataset"):
240
- st.cache_data.clear()
241
- st.rerun()
242
 
243
  # Handle custom dataset input
244
  if selected_option == "Custom...":
245
- selected_dataset = st.text_input(
246
  "Custom Dataset Name",
247
  value="risky-conversations/jailbreaks_dataset_with_results_reduced",
248
  help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
249
  )
250
  if not selected_dataset.strip():
251
- st.warning("Please enter a dataset name")
252
  st.stop()
253
  else:
254
  selected_dataset = selected_option
255
 
 
 
 
 
 
256
  # Load data
257
  with st.spinner(f"Loading dataset: {selected_dataset}..."):
258
  try:
@@ -280,52 +275,48 @@ def main():
280
  if not data_loaded:
281
  st.stop()
282
 
283
- # Controls at the top of the page
284
- st.header("πŸŽ›οΈ Analysis Controls")
285
 
286
  # Dataset type filter
287
  dataset_types = df['type'].unique()
288
- col1, col2 = st.columns(2)
289
-
290
- with col1:
291
- selected_types = st.multiselect(
292
- "Select Dataset Types",
293
- options=dataset_types,
294
- default=dataset_types,
295
- help="Filter by conversation type"
296
- )
297
 
298
  # Role filter
299
- with col2:
300
- if 'turn.role' in df_exploded.columns:
301
- roles = df_exploded['turn.role'].dropna().unique()
302
- # Assert only user and assistant roles exist
303
- expected_roles = {'user', 'assistant'}
304
- actual_roles = set(roles)
305
- assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
306
-
307
- st.subheader("πŸ‘₯ Role Filter")
308
- col2_1, col2_2 = st.columns(2)
309
-
310
- with col2_1:
311
- include_user = st.checkbox("User", value=True, help="Include user turns")
312
- with col2_2:
313
- include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
 
 
 
 
 
 
314
 
315
- # Build selected roles list
316
- selected_roles = []
317
- if include_user and 'user' in roles:
318
- selected_roles.append('user')
319
- if include_assistant and 'assistant' in roles:
320
- selected_roles.append('assistant')
321
-
322
- # Show selection info
323
- if selected_roles:
324
- st.success(f"Including: {', '.join(selected_roles)}")
325
- else:
326
- st.warning("No roles selected")
327
  else:
328
- selected_roles = None
 
 
329
 
330
  # Filter data based on selections
331
  filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
@@ -343,7 +334,7 @@ def main():
343
  st.stop()
344
 
345
  # Metric selection
346
- st.header("πŸ“Š Metrics Selection")
347
 
348
  # Dynamic metric categorization based on common patterns
349
  def categorize_metrics(metrics):
@@ -386,28 +377,24 @@ def main():
386
  metric_categories = categorize_metrics(available_metrics)
387
 
388
  # Metric selection interface
389
- selection_mode = st.radio(
390
  "Selection Mode",
391
  ["By Category", "Search/Filter", "Select All"],
392
- help="Choose how to select metrics",
393
- horizontal=True
394
  )
395
 
396
  if selection_mode == "By Category":
397
- col1, col2 = st.columns([2, 1])
398
-
399
- with col1:
400
- selected_category = st.selectbox(
401
- "Metric Category",
402
- options=list(metric_categories.keys()),
403
- help=f"Found {len(metric_categories)} categories"
404
- )
405
 
406
  available_in_category = metric_categories[selected_category]
407
  default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
408
 
409
  # Add select all button for category
410
- col1, col2 = st.columns(2)
411
  with col1:
412
  if st.button("Select All", key="select_all_category"):
413
  st.session_state.selected_metrics_category = available_in_category
@@ -419,7 +406,7 @@ def main():
419
  if "selected_metrics_category" not in st.session_state:
420
  st.session_state.selected_metrics_category = default_selection
421
 
422
- selected_metrics = st.multiselect(
423
  f"Select Metrics ({len(available_in_category)} available)",
424
  options=available_in_category,
425
  default=st.session_state.selected_metrics_category,
@@ -428,7 +415,7 @@ def main():
428
  )
429
 
430
  elif selection_mode == "Search/Filter":
431
- search_term = st.text_input(
432
  "Search Metrics",
433
  placeholder="Enter keywords to filter metrics...",
434
  help="Search for metrics containing specific terms"
@@ -439,10 +426,10 @@ def main():
439
  else:
440
  filtered_metrics = available_metrics
441
 
442
- st.write(f"Found {len(filtered_metrics)} metrics")
443
 
444
  # Add select all button for search results
445
- col1, col2 = st.columns(2)
446
  with col1:
447
  if st.button("Select All", key="select_all_search"):
448
  st.session_state.selected_metrics_search = filtered_metrics
@@ -454,7 +441,7 @@ def main():
454
  if "selected_metrics_search" not in st.session_state:
455
  st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
456
 
457
- selected_metrics = st.multiselect(
458
  "Select Metrics",
459
  options=filtered_metrics,
460
  default=st.session_state.selected_metrics_search,
@@ -464,7 +451,7 @@ def main():
464
 
465
  else: # Select All
466
  # Add select all button for all metrics
467
- col1, col2 = st.columns(2)
468
  with col1:
469
  if st.button("Select All", key="select_all_all"):
470
  st.session_state.selected_metrics_all = available_metrics
@@ -476,7 +463,7 @@ def main():
476
  if "selected_metrics_all" not in st.session_state:
477
  st.session_state.selected_metrics_all = available_metrics[:10] # Limit default to first 10 for performance
478
 
479
- selected_metrics = st.multiselect(
480
  f"All Metrics ({len(available_metrics)} total)",
481
  options=available_metrics,
482
  default=st.session_state.selected_metrics_all,
@@ -486,18 +473,18 @@ def main():
486
 
487
  # Show selection summary
488
  if selected_metrics:
489
- st.success(f"Selected {len(selected_metrics)} metrics")
490
 
491
  # Performance warning for large selections
492
  if len(selected_metrics) > 20:
493
- st.warning(f"⚠️ Large selection ({len(selected_metrics)} metrics) may impact performance")
494
  elif len(selected_metrics) > 50:
495
- st.error(f"🚨 Very large selection ({len(selected_metrics)} metrics) - consider reducing for better performance")
496
  else:
497
- st.warning("No metrics selected")
498
 
499
  # Metric info expander
500
- with st.expander("ℹ️ Metric Information", expanded=False):
501
  st.write(f"**Total Available Metrics:** {len(available_metrics)}")
502
  st.write(f"**Categories Found:** {len(metric_categories)}")
503
 
@@ -506,8 +493,6 @@ def main():
506
  for i, metric in enumerate(available_metrics, 1):
507
  st.write(f"{i}. `{metric}`")
508
 
509
- st.divider() # Visual separator before main content
510
-
511
  # Main content tabs
512
  tab1, tab2, tab3, tab4, tab5 = st.tabs(["πŸ“Š Distributions", "πŸ”— Correlations", "πŸ“ˆ Comparisons", "πŸ” Conversation", "🎯 Details"])
513
 
@@ -704,6 +689,7 @@ def main():
704
  # Display conversation metadata
705
  st.subheader("πŸ“‹ Conversation Overview")
706
 
 
707
  col1, col2, col3, col4 = st.columns(4)
708
  with col1:
709
  st.metric("Type", selected_conversation['type'])
@@ -718,6 +704,68 @@ def main():
718
  assistant_turns = roles.count('assistant')
719
  st.metric("User/Assistant", f"{user_turns}/{assistant_turns}")
720
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  # Get conversation turns with metrics
722
  conv_turns_data = filtered_df_exploded[filtered_df_exploded.index.isin(
723
  filtered_df_exploded[filtered_df_exploded.index // len(filtered_df_exploded) * len(filtered_df) +
@@ -739,146 +787,197 @@ def main():
739
  # Simpler approach: get all turns from the conversation directly
740
  conversation_turns = selected_conversation.get('conversation', [])
741
 
742
- if conversation_turns:
743
- # Display conversation content
744
- st.subheader("πŸ’¬ Conversation Content")
745
-
746
- # Show/hide content toggle
747
- show_content = st.checkbox("Show conversation content", value=True)
 
 
 
748
 
749
- if show_content:
750
- for i, turn in enumerate(conversation_turns):
751
- role = turn.get('role', 'unknown')
752
- content = turn.get('content', 'No content')
753
-
754
- # Style based on role
755
- if role == 'user':
756
- st.markdown(f"**πŸ‘€ User (Turn {i+1}):**")
757
- st.info(content)
758
- elif role == 'assistant':
759
- st.markdown(f"**πŸ€– Assistant (Turn {i+1}):**")
760
- st.success(content)
761
- else:
762
- st.markdown(f"**❓ {role.title()} (Turn {i+1}):**")
763
- st.warning(content)
764
 
765
- # Display turn-level metrics if available
766
- st.subheader("πŸ“Š Turn-Level Metrics")
 
 
 
 
 
767
 
768
- if selected_metrics:
769
- # Get actual turn-level data for this conversation
770
- # Find matching turns in the exploded dataframe
771
- conv_turn_metrics = []
772
 
773
- # Simple approach: try to match turns by content or position
774
- # This is a best-effort approach since exact matching is complex
775
- turn_metric_columns = [f"turn.turn_metrics.{m}" for m in selected_metrics]
776
- available_columns = [col for col in turn_metric_columns if col in filtered_df_exploded.columns]
 
 
 
 
 
 
777
 
778
- if available_columns:
779
- # Try to get metrics for turns from this conversation type
780
- type_turns = filtered_df_exploded[filtered_df_exploded['type'] == selected_conversation['type']]
 
 
 
 
 
 
 
 
 
 
 
781
 
782
- # Take a sample of turns for this conversation type (since exact matching is complex)
783
- sample_size = min(len(conversation_turns), len(type_turns))
784
- if sample_size > 0:
785
- sample_turns = type_turns.head(sample_size)
 
 
 
 
 
 
 
 
 
 
786
 
787
- # Create metrics table
788
- metrics_display_data = []
789
- for i, (_, turn_row) in enumerate(sample_turns.iterrows()):
790
- if i < len(conversation_turns):
791
- turn_data = {
792
- 'Turn': i + 1,
793
- 'Role': conversation_turns[i].get('role', 'unknown')
794
- }
795
-
796
- # Add actual metric values
797
- for col in available_columns:
798
- metric_name = col.replace('turn.turn_metrics.', '')
799
- friendly_name = get_human_friendly_metric_name(metric_name)
800
- value = turn_row.get(col, 'N/A')
801
- if pd.notna(value) and isinstance(value, (int, float)):
802
- turn_data[friendly_name] = round(value, 3)
 
 
 
 
 
 
 
 
 
 
 
 
803
  else:
804
- turn_data[friendly_name] = 'N/A'
805
-
806
- metrics_display_data.append(turn_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
807
 
808
- if metrics_display_data:
809
- metrics_df = pd.DataFrame(metrics_display_data)
810
- st.dataframe(metrics_df, use_container_width=True)
811
-
812
- # Plot metrics over turns with real data
813
- st.subheader("πŸ“ˆ Metrics Over Turns")
814
-
815
- fig = go.Figure()
816
-
817
- # Add traces for each selected metric (real data)
818
- for col in available_columns[:5]: # Limit to first 5 for readability
819
- metric_name = col.replace('turn.turn_metrics.', '')
820
- friendly_name = get_human_friendly_metric_name(metric_name)
821
-
822
- # Get values for this metric
823
- y_values = []
824
- for _, turn_row in sample_turns.iterrows():
825
- value = turn_row.get(col, None)
826
- if pd.notna(value) and isinstance(value, (int, float)):
827
- y_values.append(value)
828
  else:
829
- y_values.append(None)
830
-
831
- if any(v is not None for v in y_values):
832
- fig.add_trace(go.Scatter(
833
- x=list(range(1, len(y_values) + 1)),
834
- y=y_values,
835
- mode='lines+markers',
836
- name=friendly_name,
837
- line=dict(width=2),
838
- marker=dict(size=8),
839
- connectgaps=False
840
- ))
841
-
842
- if fig.data: # Only show if we have data
843
- fig.update_layout(
844
- title="Complexity Metrics Across Conversation Turns",
845
- xaxis_title="Turn Number",
846
- yaxis_title="Metric Value",
847
- height=400,
848
- hovermode='x unified'
849
- )
850
-
851
- st.plotly_chart(fig, use_container_width=True)
852
- else:
853
- st.info("No numeric metric data available to plot for this conversation type.")
 
 
 
 
 
 
 
854
  else:
855
- st.info("No matching turn-level metrics found for this conversation.")
856
- else:
857
- st.info("No turn-level data available for this conversation type.")
858
- else:
859
- st.warning("No turn-level metrics available in the dataset for the selected metrics.")
860
 
861
- # Show raw turn content with role highlighting
862
- with st.expander("πŸ” Detailed Turn Analysis", expanded=False):
863
- for i, turn in enumerate(conversation_turns):
864
- role = turn.get('role', 'unknown')
865
- content = turn.get('content', 'No content')
866
-
867
- st.markdown(f"**Turn {i+1} ({role}):**")
868
- st.text_area(
869
- f"Content",
870
- content,
871
- height=100,
872
- key=f"turn_content_{i}",
873
- disabled=True
874
- )
875
-
876
- # Show turn statistics
877
- st.caption(f"Characters: {len(content)} | Words: {len(content.split())} | Role: {role}")
878
- st.divider()
 
 
 
 
 
879
 
 
 
880
  else:
881
- st.warning("Select some metrics to see turn-level analysis.")
882
 
883
  else:
884
  st.warning("No conversation data available for the selected conversation.")
 
212
  st.title("πŸ” Complexity Metrics Explorer")
213
  st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
214
 
215
+ # Dataset selection
216
+ st.sidebar.header("πŸ—‚οΈ Dataset Selection")
217
 
218
  # Available datasets
219
  available_datasets = [
 
223
  "Custom..."
224
  ]
225
 
226
+ selected_option = st.sidebar.selectbox(
227
+ "Select Dataset",
228
+ options=available_datasets,
229
+ index=0, # Default to reduced dataset
230
+ help="Choose which dataset to analyze"
231
+ )
 
 
 
 
 
 
 
 
 
 
232
 
233
  # Handle custom dataset input
234
  if selected_option == "Custom...":
235
+ selected_dataset = st.sidebar.text_input(
236
  "Custom Dataset Name",
237
  value="risky-conversations/jailbreaks_dataset_with_results_reduced",
238
  help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
239
  )
240
  if not selected_dataset.strip():
241
+ st.sidebar.warning("Please enter a dataset name")
242
  st.stop()
243
  else:
244
  selected_dataset = selected_option
245
 
246
+ # Add refresh button
247
+ if st.sidebar.button("πŸ”„ Refresh Data", help="Clear cache and reload dataset"):
248
+ st.cache_data.clear()
249
+ st.rerun()
250
+
251
  # Load data
252
  with st.spinner(f"Loading dataset: {selected_dataset}..."):
253
  try:
 
275
  if not data_loaded:
276
  st.stop()
277
 
278
+ # Sidebar controls
279
+ st.sidebar.header("πŸŽ›οΈ Controls")
280
 
281
  # Dataset type filter
282
  dataset_types = df['type'].unique()
283
+ selected_types = st.sidebar.multiselect(
284
+ "Select Dataset Types",
285
+ options=dataset_types,
286
+ default=dataset_types,
287
+ help="Filter by conversation type"
288
+ )
 
 
 
289
 
290
  # Role filter
291
+ if 'turn.role' in df_exploded.columns:
292
+ roles = df_exploded['turn.role'].dropna().unique()
293
+ # Assert only user and assistant roles exist
294
+ expected_roles = {'user', 'assistant'}
295
+ actual_roles = set(roles)
296
+ assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
297
+
298
+ st.sidebar.subheader("πŸ‘₯ Role Filter")
299
+ col1, col2 = st.sidebar.columns(2)
300
+
301
+ with col1:
302
+ include_user = st.checkbox("User", value=True, help="Include user turns")
303
+ with col2:
304
+ include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
305
+
306
+ # Build selected roles list
307
+ selected_roles = []
308
+ if include_user and 'user' in roles:
309
+ selected_roles.append('user')
310
+ if include_assistant and 'assistant' in roles:
311
+ selected_roles.append('assistant')
312
 
313
+ # Show selection info
314
+ if selected_roles:
315
+ st.sidebar.success(f"Including: {', '.join(selected_roles)}")
 
 
 
 
 
 
 
 
 
316
  else:
317
+ st.sidebar.warning("No roles selected")
318
+ else:
319
+ selected_roles = None
320
 
321
  # Filter data based on selections
322
  filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
 
334
  st.stop()
335
 
336
  # Metric selection
337
+ st.sidebar.header("πŸ“Š Metrics")
338
 
339
  # Dynamic metric categorization based on common patterns
340
  def categorize_metrics(metrics):
 
377
  metric_categories = categorize_metrics(available_metrics)
378
 
379
  # Metric selection interface
380
+ selection_mode = st.sidebar.radio(
381
  "Selection Mode",
382
  ["By Category", "Search/Filter", "Select All"],
383
+ help="Choose how to select metrics"
 
384
  )
385
 
386
  if selection_mode == "By Category":
387
+ selected_category = st.sidebar.selectbox(
388
+ "Metric Category",
389
+ options=list(metric_categories.keys()),
390
+ help=f"Found {len(metric_categories)} categories"
391
+ )
 
 
 
392
 
393
  available_in_category = metric_categories[selected_category]
394
  default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
395
 
396
  # Add select all button for category
397
+ col1, col2 = st.sidebar.columns(2)
398
  with col1:
399
  if st.button("Select All", key="select_all_category"):
400
  st.session_state.selected_metrics_category = available_in_category
 
406
  if "selected_metrics_category" not in st.session_state:
407
  st.session_state.selected_metrics_category = default_selection
408
 
409
+ selected_metrics = st.sidebar.multiselect(
410
  f"Select Metrics ({len(available_in_category)} available)",
411
  options=available_in_category,
412
  default=st.session_state.selected_metrics_category,
 
415
  )
416
 
417
  elif selection_mode == "Search/Filter":
418
+ search_term = st.sidebar.text_input(
419
  "Search Metrics",
420
  placeholder="Enter keywords to filter metrics...",
421
  help="Search for metrics containing specific terms"
 
426
  else:
427
  filtered_metrics = available_metrics
428
 
429
+ st.sidebar.write(f"Found {len(filtered_metrics)} metrics")
430
 
431
  # Add select all button for search results
432
+ col1, col2 = st.sidebar.columns(2)
433
  with col1:
434
  if st.button("Select All", key="select_all_search"):
435
  st.session_state.selected_metrics_search = filtered_metrics
 
441
  if "selected_metrics_search" not in st.session_state:
442
  st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
443
 
444
+ selected_metrics = st.sidebar.multiselect(
445
  "Select Metrics",
446
  options=filtered_metrics,
447
  default=st.session_state.selected_metrics_search,
 
451
 
452
  else: # Select All
453
  # Add select all button for all metrics
454
+ col1, col2 = st.sidebar.columns(2)
455
  with col1:
456
  if st.button("Select All", key="select_all_all"):
457
  st.session_state.selected_metrics_all = available_metrics
 
463
  if "selected_metrics_all" not in st.session_state:
464
  st.session_state.selected_metrics_all = available_metrics[:10] # Limit default to first 10 for performance
465
 
466
+ selected_metrics = st.sidebar.multiselect(
467
  f"All Metrics ({len(available_metrics)} total)",
468
  options=available_metrics,
469
  default=st.session_state.selected_metrics_all,
 
473
 
474
  # Show selection summary
475
  if selected_metrics:
476
+ st.sidebar.success(f"Selected {len(selected_metrics)} metrics")
477
 
478
  # Performance warning for large selections
479
  if len(selected_metrics) > 20:
480
+ st.sidebar.warning(f"⚠️ Large selection ({len(selected_metrics)} metrics) may impact performance")
481
  elif len(selected_metrics) > 50:
482
+ st.sidebar.error(f"🚨 Very large selection ({len(selected_metrics)} metrics) - consider reducing for better performance")
483
  else:
484
+ st.sidebar.warning("No metrics selected")
485
 
486
  # Metric info expander
487
+ with st.sidebar.expander("ℹ️ Metric Information", expanded=False):
488
  st.write(f"**Total Available Metrics:** {len(available_metrics)}")
489
  st.write(f"**Categories Found:** {len(metric_categories)}")
490
 
 
493
  for i, metric in enumerate(available_metrics, 1):
494
  st.write(f"{i}. `{metric}`")
495
 
 
 
496
  # Main content tabs
497
  tab1, tab2, tab3, tab4, tab5 = st.tabs(["πŸ“Š Distributions", "πŸ”— Correlations", "πŸ“ˆ Comparisons", "πŸ” Conversation", "🎯 Details"])
498
 
 
689
  # Display conversation metadata
690
  st.subheader("πŸ“‹ Conversation Overview")
691
 
692
+ # First row - basic info
693
  col1, col2, col3, col4 = st.columns(4)
694
  with col1:
695
  st.metric("Type", selected_conversation['type'])
 
704
  assistant_turns = roles.count('assistant')
705
  st.metric("User/Assistant", f"{user_turns}/{assistant_turns}")
706
 
707
+ # Second row - additional metadata
708
+ col1, col2, col3 = st.columns(3)
709
+ with col1:
710
+ provenance = selected_conversation.get('provenance_dataset', 'Unknown')
711
+ st.metric("Dataset Source", provenance)
712
+ with col2:
713
+ language = selected_conversation.get('language', 'Unknown')
714
+ st.metric("Language", language.upper() if language else 'Unknown')
715
+ with col3:
716
+ timestamp = selected_conversation.get('timestamp', None)
717
+ if timestamp:
718
+ # Handle different timestamp formats
719
+ if isinstance(timestamp, str):
720
+ st.metric("Timestamp", timestamp)
721
+ else:
722
+ st.metric("Timestamp", str(timestamp))
723
+ else:
724
+ st.metric("Timestamp", "Not Available")
725
+
726
+ # Add toxicity summary
727
+ conversation_turns_temp = selected_conversation.get('conversation', [])
728
+ if hasattr(conversation_turns_temp, 'tolist'):
729
+ conversation_turns_temp = conversation_turns_temp.tolist()
730
+ elif conversation_turns_temp is None:
731
+ conversation_turns_temp = []
732
+
733
+ if len(conversation_turns_temp) > 0:
734
+ # Calculate overall toxicity statistics
735
+ all_toxicities = []
736
+ for turn in conversation_turns_temp:
737
+ toxicities = turn.get('toxicities', {})
738
+ if toxicities and 'toxicity' in toxicities:
739
+ all_toxicities.append(toxicities['toxicity'])
740
+
741
+ if all_toxicities:
742
+ avg_toxicity = sum(all_toxicities) / len(all_toxicities)
743
+ max_toxicity = max(all_toxicities)
744
+
745
+ st.markdown("**πŸ” Toxicity Summary:**")
746
+ col1, col2, col3 = st.columns(3)
747
+ with col1:
748
+ # Color code average toxicity
749
+ if avg_toxicity > 0.5:
750
+ st.metric("Average Toxicity", f"{avg_toxicity:.4f}", delta="HIGH", delta_color="inverse")
751
+ elif avg_toxicity > 0.1:
752
+ st.metric("Average Toxicity", f"{avg_toxicity:.4f}", delta="MED", delta_color="off")
753
+ else:
754
+ st.metric("Average Toxicity", f"{avg_toxicity:.4f}", delta="LOW", delta_color="normal")
755
+
756
+ with col2:
757
+ # Color code max toxicity
758
+ if max_toxicity > 0.5:
759
+ st.metric("Max Toxicity", f"{max_toxicity:.4f}", delta="HIGH", delta_color="inverse")
760
+ elif max_toxicity > 0.1:
761
+ st.metric("Max Toxicity", f"{max_toxicity:.4f}", delta="MED", delta_color="off")
762
+ else:
763
+ st.metric("Max Toxicity", f"{max_toxicity:.4f}", delta="LOW", delta_color="normal")
764
+
765
+ with col3:
766
+ high_tox_turns = sum(1 for t in all_toxicities if t > 0.5)
767
+ st.metric("High Toxicity Turns", high_tox_turns)
768
+
769
  # Get conversation turns with metrics
770
  conv_turns_data = filtered_df_exploded[filtered_df_exploded.index.isin(
771
  filtered_df_exploded[filtered_df_exploded.index // len(filtered_df_exploded) * len(filtered_df) +
 
787
  # Simpler approach: get all turns from the conversation directly
788
  conversation_turns = selected_conversation.get('conversation', [])
789
 
790
+ # Ensure conversation_turns is a list and handle different data types
791
+ if hasattr(conversation_turns, 'tolist'):
792
+ conversation_turns = conversation_turns.tolist()
793
+ elif conversation_turns is None:
794
+ conversation_turns = []
795
+
796
+ if len(conversation_turns) > 0:
797
+ # Display conversation content with metrics
798
+ st.subheader("πŸ’¬ Conversation with Metrics")
799
 
800
+ # Get actual turn-level data for this conversation
801
+ turn_metric_columns = [f"turn.turn_metrics.{m}" for m in selected_metrics]
802
+ available_columns = [col for col in turn_metric_columns if col in filtered_df_exploded.columns]
 
 
 
 
 
 
 
 
 
 
 
 
803
 
804
+ # Get sample metrics for this conversation type (since exact matching is complex)
805
+ sample_metrics = None
806
+ if available_columns:
807
+ type_turns = filtered_df_exploded[filtered_df_exploded['type'] == selected_conversation['type']]
808
+ sample_size = min(len(conversation_turns), len(type_turns))
809
+ if sample_size > 0:
810
+ sample_metrics = type_turns.head(sample_size)
811
 
812
+ # Display each turn with its metrics
813
+ for i, turn in enumerate(conversation_turns):
814
+ role = turn.get('role', 'unknown')
815
+ content = turn.get('content', 'No content')
816
 
817
+ # Display turn content with role styling
818
+ if role == 'user':
819
+ st.markdown(f"**πŸ‘€ User (Turn {i+1}):**")
820
+ st.info(content)
821
+ elif role == 'assistant':
822
+ st.markdown(f"**πŸ€– Assistant (Turn {i+1}):**")
823
+ st.success(content)
824
+ else:
825
+ st.markdown(f"**❓ {role.title()} (Turn {i+1}):**")
826
+ st.warning(content)
827
 
828
+ # Display metrics for this turn
829
+ if sample_metrics is not None and i < len(sample_metrics):
830
+ turn_row = sample_metrics.iloc[i]
831
+
832
+ # Create metrics display
833
+ metrics_for_turn = {}
834
+ for col in available_columns:
835
+ metric_name = col.replace('turn.turn_metrics.', '')
836
+ friendly_name = get_human_friendly_metric_name(metric_name)
837
+ value = turn_row.get(col, 'N/A')
838
+ if pd.notna(value) and isinstance(value, (int, float)):
839
+ metrics_for_turn[friendly_name] = round(value, 3)
840
+ else:
841
+ metrics_for_turn[friendly_name] = 'N/A'
842
 
843
+ # Add toxicity metrics if available
844
+ toxicities = turn.get('toxicities', {})
845
+ if toxicities:
846
+ st.markdown("**πŸ” Toxicity Scores:**")
847
+ tox_cols = st.columns(4)
848
+ tox_metrics = [
849
+ ('toxicity', 'Overall Toxicity'),
850
+ ('severe_toxicity', 'Severe Toxicity'),
851
+ ('identity_attack', 'Identity Attack'),
852
+ ('insult', 'Insult'),
853
+ ('obscene', 'Obscene'),
854
+ ('sexual_explicit', 'Sexual Explicit'),
855
+ ('threat', 'Threat')
856
+ ]
857
 
858
+ for idx, (tox_key, tox_name) in enumerate(tox_metrics):
859
+ if tox_key in toxicities:
860
+ col_idx = idx % 4
861
+ with tox_cols[col_idx]:
862
+ tox_value = toxicities[tox_key]
863
+ if isinstance(tox_value, (int, float)):
864
+ # Color code based on toxicity level
865
+ if tox_value > 0.5:
866
+ st.metric(tox_name, f"{tox_value:.4f}", delta="HIGH", delta_color="inverse")
867
+ elif tox_value > 0.1:
868
+ st.metric(tox_name, f"{tox_value:.4f}", delta="MED", delta_color="off")
869
+ else:
870
+ st.metric(tox_name, f"{tox_value:.4f}", delta="LOW", delta_color="normal")
871
+ else:
872
+ st.metric(tox_name, str(tox_value))
873
+
874
+ # Display complexity metrics
875
+ if metrics_for_turn:
876
+ st.markdown("**πŸ“Š Complexity Metrics:**")
877
+ # Display metrics in columns
878
+ num_cols = min(4, len(metrics_for_turn))
879
+ if num_cols > 0:
880
+ cols = st.columns(num_cols)
881
+ for idx, (metric_name, value) in enumerate(metrics_for_turn.items()):
882
+ col_idx = idx % num_cols
883
+ with cols[col_idx]:
884
+ if isinstance(value, (int, float)) and value != 'N/A':
885
+ st.metric(metric_name, value)
886
  else:
887
+ st.metric(metric_name, str(value))
888
+ else:
889
+ # Show toxicity even when no complexity metrics available
890
+ toxicities = turn.get('toxicities', {})
891
+ if toxicities:
892
+ st.markdown("**πŸ” Toxicity Scores:**")
893
+ tox_cols = st.columns(4)
894
+ tox_metrics = [
895
+ ('toxicity', 'Overall Toxicity'),
896
+ ('severe_toxicity', 'Severe Toxicity'),
897
+ ('identity_attack', 'Identity Attack'),
898
+ ('insult', 'Insult'),
899
+ ('obscene', 'Obscene'),
900
+ ('sexual_explicit', 'Sexual Explicit'),
901
+ ('threat', 'Threat')
902
+ ]
903
 
904
+ for idx, (tox_key, tox_name) in enumerate(tox_metrics):
905
+ if tox_key in toxicities:
906
+ col_idx = idx % 4
907
+ with tox_cols[col_idx]:
908
+ tox_value = toxicities[tox_key]
909
+ if isinstance(tox_value, (int, float)):
910
+ # Color code based on toxicity level
911
+ if tox_value > 0.5:
912
+ st.metric(tox_name, f"{tox_value:.4f}", delta="HIGH", delta_color="inverse")
913
+ elif tox_value > 0.1:
914
+ st.metric(tox_name, f"{tox_value:.4f}", delta="MED", delta_color="off")
915
+ else:
916
+ st.metric(tox_name, f"{tox_value:.4f}", delta="LOW", delta_color="normal")
 
 
 
 
 
 
 
917
  else:
918
+ st.metric(tox_name, str(tox_value))
919
+
920
+ # Show basic turn statistics when no complexity metrics available
921
+ st.markdown("**πŸ“ˆ Basic Statistics:**")
922
+ col1, col2, col3 = st.columns(3)
923
+ with col1:
924
+ st.metric("Characters", len(content))
925
+ with col2:
926
+ st.metric("Words", len(content.split()))
927
+ with col3:
928
+ st.metric("Role", role.title())
929
+
930
+ # Add separator between turns
931
+ st.divider()
932
+
933
+ # Plot metrics over turns with real data if available
934
+ if available_columns and sample_metrics is not None:
935
+ st.subheader("πŸ“ˆ Metrics Over Turns")
936
+
937
+ fig = go.Figure()
938
+
939
+ # Add traces for each selected metric (real data)
940
+ for col in available_columns[:5]: # Limit to first 5 for readability
941
+ metric_name = col.replace('turn.turn_metrics.', '')
942
+ friendly_name = get_human_friendly_metric_name(metric_name)
943
+
944
+ # Get values for this metric
945
+ y_values = []
946
+ for _, turn_row in sample_metrics.iterrows():
947
+ value = turn_row.get(col, None)
948
+ if pd.notna(value) and isinstance(value, (int, float)):
949
+ y_values.append(value)
950
  else:
951
+ y_values.append(None)
 
 
 
 
952
 
953
+ if any(v is not None for v in y_values):
954
+ fig.add_trace(go.Scatter(
955
+ x=list(range(1, len(y_values) + 1)),
956
+ y=y_values,
957
+ mode='lines+markers',
958
+ name=friendly_name,
959
+ line=dict(width=2),
960
+ marker=dict(size=8),
961
+ connectgaps=False
962
+ ))
963
+
964
+ if fig.data: # Only show if we have data
965
+ fig.update_layout(
966
+ title="Complexity Metrics Across Conversation Turns",
967
+ xaxis_title="Turn Number",
968
+ yaxis_title="Metric Value",
969
+ height=400,
970
+ hovermode='x unified'
971
+ )
972
+
973
+ st.plotly_chart(fig, use_container_width=True)
974
+ else:
975
+ st.info("No numeric metric data available to plot for this conversation type.")
976
 
977
+ elif selected_metrics:
978
+ st.info("Select metrics that are available in the dataset to see turn-level analysis.")
979
  else:
980
+ st.warning("Select some metrics to see detailed turn-level analysis.")
981
 
982
  else:
983
  st.warning("No conversation data available for the selected conversation.")