acmc commited on
Commit
21a08b0
Β·
verified Β·
1 Parent(s): 5a46117

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +312 -72
streamlit_app.py CHANGED
@@ -212,8 +212,8 @@ def main():
212
  st.title("πŸ” Complexity Metrics Explorer")
213
  st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
214
 
215
- # Dataset selection
216
- st.sidebar.header("πŸ—‚οΈ Dataset Selection")
217
 
218
  # Available datasets
219
  available_datasets = [
@@ -223,31 +223,36 @@ def main():
223
  "Custom..."
224
  ]
225
 
226
- selected_option = st.sidebar.selectbox(
227
- "Select Dataset",
228
- options=available_datasets,
229
- index=0, # Default to reduced dataset
230
- help="Choose which dataset to analyze"
231
- )
 
 
 
 
 
 
 
 
 
 
232
 
233
  # Handle custom dataset input
234
  if selected_option == "Custom...":
235
- selected_dataset = st.sidebar.text_input(
236
  "Custom Dataset Name",
237
  value="risky-conversations/jailbreaks_dataset_with_results_reduced",
238
  help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
239
  )
240
  if not selected_dataset.strip():
241
- st.sidebar.warning("Please enter a dataset name")
242
  st.stop()
243
  else:
244
  selected_dataset = selected_option
245
 
246
- # Add refresh button
247
- if st.sidebar.button("πŸ”„ Refresh Data", help="Clear cache and reload dataset"):
248
- st.cache_data.clear()
249
- st.rerun()
250
-
251
  # Load data
252
  with st.spinner(f"Loading dataset: {selected_dataset}..."):
253
  try:
@@ -275,48 +280,52 @@ def main():
275
  if not data_loaded:
276
  st.stop()
277
 
278
- # Sidebar controls
279
- st.sidebar.header("πŸŽ›οΈ Controls")
280
 
281
  # Dataset type filter
282
  dataset_types = df['type'].unique()
283
- selected_types = st.sidebar.multiselect(
284
- "Select Dataset Types",
285
- options=dataset_types,
286
- default=dataset_types,
287
- help="Filter by conversation type"
288
- )
 
 
 
289
 
290
  # Role filter
291
- if 'turn.role' in df_exploded.columns:
292
- roles = df_exploded['turn.role'].dropna().unique()
293
- # Assert only user and assistant roles exist
294
- expected_roles = {'user', 'assistant'}
295
- actual_roles = set(roles)
296
- assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
297
-
298
- st.sidebar.subheader("πŸ‘₯ Role Filter")
299
- col1, col2 = st.sidebar.columns(2)
300
-
301
- with col1:
302
- include_user = st.checkbox("User", value=True, help="Include user turns")
303
- with col2:
304
- include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
305
-
306
- # Build selected roles list
307
- selected_roles = []
308
- if include_user and 'user' in roles:
309
- selected_roles.append('user')
310
- if include_assistant and 'assistant' in roles:
311
- selected_roles.append('assistant')
312
 
313
- # Show selection info
314
- if selected_roles:
315
- st.sidebar.success(f"Including: {', '.join(selected_roles)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  else:
317
- st.sidebar.warning("No roles selected")
318
- else:
319
- selected_roles = None
320
 
321
  # Filter data based on selections
322
  filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
@@ -334,7 +343,7 @@ def main():
334
  st.stop()
335
 
336
  # Metric selection
337
- st.sidebar.header("πŸ“Š Metrics")
338
 
339
  # Dynamic metric categorization based on common patterns
340
  def categorize_metrics(metrics):
@@ -377,24 +386,28 @@ def main():
377
  metric_categories = categorize_metrics(available_metrics)
378
 
379
  # Metric selection interface
380
- selection_mode = st.sidebar.radio(
381
  "Selection Mode",
382
  ["By Category", "Search/Filter", "Select All"],
383
- help="Choose how to select metrics"
 
384
  )
385
 
386
  if selection_mode == "By Category":
387
- selected_category = st.sidebar.selectbox(
388
- "Metric Category",
389
- options=list(metric_categories.keys()),
390
- help=f"Found {len(metric_categories)} categories"
391
- )
 
 
 
392
 
393
  available_in_category = metric_categories[selected_category]
394
  default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
395
 
396
  # Add select all button for category
397
- col1, col2 = st.sidebar.columns(2)
398
  with col1:
399
  if st.button("Select All", key="select_all_category"):
400
  st.session_state.selected_metrics_category = available_in_category
@@ -406,7 +419,7 @@ def main():
406
  if "selected_metrics_category" not in st.session_state:
407
  st.session_state.selected_metrics_category = default_selection
408
 
409
- selected_metrics = st.sidebar.multiselect(
410
  f"Select Metrics ({len(available_in_category)} available)",
411
  options=available_in_category,
412
  default=st.session_state.selected_metrics_category,
@@ -415,7 +428,7 @@ def main():
415
  )
416
 
417
  elif selection_mode == "Search/Filter":
418
- search_term = st.sidebar.text_input(
419
  "Search Metrics",
420
  placeholder="Enter keywords to filter metrics...",
421
  help="Search for metrics containing specific terms"
@@ -426,10 +439,10 @@ def main():
426
  else:
427
  filtered_metrics = available_metrics
428
 
429
- st.sidebar.write(f"Found {len(filtered_metrics)} metrics")
430
 
431
  # Add select all button for search results
432
- col1, col2 = st.sidebar.columns(2)
433
  with col1:
434
  if st.button("Select All", key="select_all_search"):
435
  st.session_state.selected_metrics_search = filtered_metrics
@@ -441,7 +454,7 @@ def main():
441
  if "selected_metrics_search" not in st.session_state:
442
  st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
443
 
444
- selected_metrics = st.sidebar.multiselect(
445
  "Select Metrics",
446
  options=filtered_metrics,
447
  default=st.session_state.selected_metrics_search,
@@ -451,7 +464,7 @@ def main():
451
 
452
  else: # Select All
453
  # Add select all button for all metrics
454
- col1, col2 = st.sidebar.columns(2)
455
  with col1:
456
  if st.button("Select All", key="select_all_all"):
457
  st.session_state.selected_metrics_all = available_metrics
@@ -463,7 +476,7 @@ def main():
463
  if "selected_metrics_all" not in st.session_state:
464
  st.session_state.selected_metrics_all = available_metrics[:10] # Limit default to first 10 for performance
465
 
466
- selected_metrics = st.sidebar.multiselect(
467
  f"All Metrics ({len(available_metrics)} total)",
468
  options=available_metrics,
469
  default=st.session_state.selected_metrics_all,
@@ -473,18 +486,18 @@ def main():
473
 
474
  # Show selection summary
475
  if selected_metrics:
476
- st.sidebar.success(f"Selected {len(selected_metrics)} metrics")
477
 
478
  # Performance warning for large selections
479
  if len(selected_metrics) > 20:
480
- st.sidebar.warning(f"⚠️ Large selection ({len(selected_metrics)} metrics) may impact performance")
481
  elif len(selected_metrics) > 50:
482
- st.sidebar.error(f"🚨 Very large selection ({len(selected_metrics)} metrics) - consider reducing for better performance")
483
  else:
484
- st.sidebar.warning("No metrics selected")
485
 
486
  # Metric info expander
487
- with st.sidebar.expander("ℹ️ Metric Information", expanded=False):
488
  st.write(f"**Total Available Metrics:** {len(available_metrics)}")
489
  st.write(f"**Categories Found:** {len(metric_categories)}")
490
 
@@ -493,8 +506,10 @@ def main():
493
  for i, metric in enumerate(available_metrics, 1):
494
  st.write(f"{i}. `{metric}`")
495
 
 
 
496
  # Main content tabs
497
- tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Distributions", "πŸ”— Correlations", "πŸ“ˆ Comparisons", "🎯 Details"])
498
 
499
  with tab1:
500
  st.header("Distribution Analysis")
@@ -644,6 +659,231 @@ def main():
644
  st.plotly_chart(fig, use_container_width=True)
645
 
646
  with tab4:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
647
  st.header("Detailed View")
648
 
649
  # Data overview
 
212
  st.title("πŸ” Complexity Metrics Explorer")
213
  st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
214
 
215
+ # Dataset selection at the top
216
+ st.header("πŸ—‚οΈ Dataset Selection")
217
 
218
  # Available datasets
219
  available_datasets = [
 
223
  "Custom..."
224
  ]
225
 
226
+ col1, col2 = st.columns([3, 1])
227
+
228
+ with col1:
229
+ selected_option = st.selectbox(
230
+ "Select Dataset",
231
+ options=available_datasets,
232
+ index=0, # Default to reduced dataset
233
+ help="Choose which dataset to analyze",
234
+ format_func=lambda x: x.split('/')[-1] if x != "Custom..." else x # Show only the dataset name part
235
+ )
236
+
237
+ with col2:
238
+ # Add refresh button
239
+ if st.button("πŸ”„ Refresh Data", help="Clear cache and reload dataset"):
240
+ st.cache_data.clear()
241
+ st.rerun()
242
 
243
  # Handle custom dataset input
244
  if selected_option == "Custom...":
245
+ selected_dataset = st.text_input(
246
  "Custom Dataset Name",
247
  value="risky-conversations/jailbreaks_dataset_with_results_reduced",
248
  help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
249
  )
250
  if not selected_dataset.strip():
251
+ st.warning("Please enter a dataset name")
252
  st.stop()
253
  else:
254
  selected_dataset = selected_option
255
 
 
 
 
 
 
256
  # Load data
257
  with st.spinner(f"Loading dataset: {selected_dataset}..."):
258
  try:
 
280
  if not data_loaded:
281
  st.stop()
282
 
283
+ # Controls at the top of the page
284
+ st.header("πŸŽ›οΈ Analysis Controls")
285
 
286
  # Dataset type filter
287
  dataset_types = df['type'].unique()
288
+ col1, col2 = st.columns(2)
289
+
290
+ with col1:
291
+ selected_types = st.multiselect(
292
+ "Select Dataset Types",
293
+ options=dataset_types,
294
+ default=dataset_types,
295
+ help="Filter by conversation type"
296
+ )
297
 
298
  # Role filter
299
+ with col2:
300
+ if 'turn.role' in df_exploded.columns:
301
+ roles = df_exploded['turn.role'].dropna().unique()
302
+ # Assert only user and assistant roles exist
303
+ expected_roles = {'user', 'assistant'}
304
+ actual_roles = set(roles)
305
+ assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ st.subheader("πŸ‘₯ Role Filter")
308
+ col2_1, col2_2 = st.columns(2)
309
+
310
+ with col2_1:
311
+ include_user = st.checkbox("User", value=True, help="Include user turns")
312
+ with col2_2:
313
+ include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
314
+
315
+ # Build selected roles list
316
+ selected_roles = []
317
+ if include_user and 'user' in roles:
318
+ selected_roles.append('user')
319
+ if include_assistant and 'assistant' in roles:
320
+ selected_roles.append('assistant')
321
+
322
+ # Show selection info
323
+ if selected_roles:
324
+ st.success(f"Including: {', '.join(selected_roles)}")
325
+ else:
326
+ st.warning("No roles selected")
327
  else:
328
+ selected_roles = None
 
 
329
 
330
  # Filter data based on selections
331
  filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
 
343
  st.stop()
344
 
345
  # Metric selection
346
+ st.header("πŸ“Š Metrics Selection")
347
 
348
  # Dynamic metric categorization based on common patterns
349
  def categorize_metrics(metrics):
 
386
  metric_categories = categorize_metrics(available_metrics)
387
 
388
  # Metric selection interface
389
+ selection_mode = st.radio(
390
  "Selection Mode",
391
  ["By Category", "Search/Filter", "Select All"],
392
+ help="Choose how to select metrics",
393
+ horizontal=True
394
  )
395
 
396
  if selection_mode == "By Category":
397
+ col1, col2 = st.columns([2, 1])
398
+
399
+ with col1:
400
+ selected_category = st.selectbox(
401
+ "Metric Category",
402
+ options=list(metric_categories.keys()),
403
+ help=f"Found {len(metric_categories)} categories"
404
+ )
405
 
406
  available_in_category = metric_categories[selected_category]
407
  default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
408
 
409
  # Add select all button for category
410
+ col1, col2 = st.columns(2)
411
  with col1:
412
  if st.button("Select All", key="select_all_category"):
413
  st.session_state.selected_metrics_category = available_in_category
 
419
  if "selected_metrics_category" not in st.session_state:
420
  st.session_state.selected_metrics_category = default_selection
421
 
422
+ selected_metrics = st.multiselect(
423
  f"Select Metrics ({len(available_in_category)} available)",
424
  options=available_in_category,
425
  default=st.session_state.selected_metrics_category,
 
428
  )
429
 
430
  elif selection_mode == "Search/Filter":
431
+ search_term = st.text_input(
432
  "Search Metrics",
433
  placeholder="Enter keywords to filter metrics...",
434
  help="Search for metrics containing specific terms"
 
439
  else:
440
  filtered_metrics = available_metrics
441
 
442
+ st.write(f"Found {len(filtered_metrics)} metrics")
443
 
444
  # Add select all button for search results
445
+ col1, col2 = st.columns(2)
446
  with col1:
447
  if st.button("Select All", key="select_all_search"):
448
  st.session_state.selected_metrics_search = filtered_metrics
 
454
  if "selected_metrics_search" not in st.session_state:
455
  st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
456
 
457
+ selected_metrics = st.multiselect(
458
  "Select Metrics",
459
  options=filtered_metrics,
460
  default=st.session_state.selected_metrics_search,
 
464
 
465
  else: # Select All
466
  # Add select all button for all metrics
467
+ col1, col2 = st.columns(2)
468
  with col1:
469
  if st.button("Select All", key="select_all_all"):
470
  st.session_state.selected_metrics_all = available_metrics
 
476
  if "selected_metrics_all" not in st.session_state:
477
  st.session_state.selected_metrics_all = available_metrics[:10] # Limit default to first 10 for performance
478
 
479
+ selected_metrics = st.multiselect(
480
  f"All Metrics ({len(available_metrics)} total)",
481
  options=available_metrics,
482
  default=st.session_state.selected_metrics_all,
 
486
 
487
  # Show selection summary
488
  if selected_metrics:
489
+ st.success(f"Selected {len(selected_metrics)} metrics")
490
 
491
  # Performance warning for large selections
492
  if len(selected_metrics) > 20:
493
+ st.warning(f"⚠️ Large selection ({len(selected_metrics)} metrics) may impact performance")
494
  elif len(selected_metrics) > 50:
495
+ st.error(f"🚨 Very large selection ({len(selected_metrics)} metrics) - consider reducing for better performance")
496
  else:
497
+ st.warning("No metrics selected")
498
 
499
  # Metric info expander
500
+ with st.expander("ℹ️ Metric Information", expanded=False):
501
  st.write(f"**Total Available Metrics:** {len(available_metrics)}")
502
  st.write(f"**Categories Found:** {len(metric_categories)}")
503
 
 
506
  for i, metric in enumerate(available_metrics, 1):
507
  st.write(f"{i}. `{metric}`")
508
 
509
+ st.divider() # Visual separator before main content
510
+
511
  # Main content tabs
512
+ tab1, tab2, tab3, tab4, tab5 = st.tabs(["πŸ“Š Distributions", "πŸ”— Correlations", "πŸ“ˆ Comparisons", "πŸ” Conversation", "🎯 Details"])
513
 
514
  with tab1:
515
  st.header("Distribution Analysis")
 
659
  st.plotly_chart(fig, use_container_width=True)
660
 
661
  with tab4:
662
+ st.header("Individual Conversation Analysis")
663
+
664
+ # Conversation selector
665
+ st.subheader("πŸ” Select Conversation")
666
+
667
+ # Get unique conversations with some metadata
668
+ conversation_info = []
669
+ for idx, row in filtered_df.iterrows():
670
+ conv_type = row['type']
671
+ # Get basic info about the conversation
672
+ conv_turns = len(row.get('conversation', []))
673
+ conversation_info.append({
674
+ 'index': idx,
675
+ 'type': conv_type,
676
+ 'turns': conv_turns,
677
+ 'display': f"Conversation {idx} ({conv_type}) - {conv_turns} turns"
678
+ })
679
+
680
+ # Sort by type and number of turns for better organization
681
+ conversation_info = sorted(conversation_info, key=lambda x: (x['type'], -x['turns']))
682
+
683
+ # Conversation selection
684
+ col1, col2 = st.columns([3, 1])
685
+
686
+ with col1:
687
+ selected_conv_display = st.selectbox(
688
+ "Choose a conversation to analyze",
689
+ options=[conv['display'] for conv in conversation_info],
690
+ help="Select a conversation to view detailed metrics and content"
691
+ )
692
+
693
+ with col2:
694
+ if st.button("🎲 Random", help="Select a random conversation"):
695
+ import random
696
+ selected_conv_display = random.choice([conv['display'] for conv in conversation_info])
697
+ st.rerun()
698
+
699
+ # Get the selected conversation data
700
+ selected_conv_info = next(conv for conv in conversation_info if conv['display'] == selected_conv_display)
701
+ selected_idx = selected_conv_info['index']
702
+ selected_conversation = filtered_df.iloc[selected_idx]
703
+
704
+ # Display conversation metadata
705
+ st.subheader("πŸ“‹ Conversation Overview")
706
+
707
+ col1, col2, col3, col4 = st.columns(4)
708
+ with col1:
709
+ st.metric("Type", selected_conversation['type'])
710
+ with col2:
711
+ st.metric("Index", selected_idx)
712
+ with col3:
713
+ st.metric("Total Turns", len(selected_conversation.get('conversation', [])))
714
+ with col4:
715
+ # Count user vs assistant turns
716
+ roles = [turn.get('role', 'unknown') for turn in selected_conversation.get('conversation', [])]
717
+ user_turns = roles.count('user')
718
+ assistant_turns = roles.count('assistant')
719
+ st.metric("User/Assistant", f"{user_turns}/{assistant_turns}")
720
+
721
+ # Get conversation turns with metrics
722
+ conv_turns_data = filtered_df_exploded[filtered_df_exploded.index.isin(
723
+ filtered_df_exploded[filtered_df_exploded.index // len(filtered_df_exploded) * len(filtered_df) +
724
+ filtered_df_exploded.index % len(filtered_df) == selected_idx].index
725
+ )].copy()
726
+
727
+ # Alternative approach: filter by matching all conversation data
728
+ # This is more reliable but less efficient
729
+ conv_turns_data = []
730
+ start_idx = None
731
+ for idx, row in filtered_df_exploded.iterrows():
732
+ # Check if this row belongs to our selected conversation
733
+ if (row['type'] == selected_conversation['type'] and
734
+ hasattr(row, 'conversation') and
735
+ row.get('conversation') is not None):
736
+ # This is a simplified approach - in reality you'd need better conversation matching
737
+ pass
738
+
739
+ # Simpler approach: get all turns from the conversation directly
740
+ conversation_turns = selected_conversation.get('conversation', [])
741
+
742
+ if conversation_turns:
743
+ # Display conversation content
744
+ st.subheader("πŸ’¬ Conversation Content")
745
+
746
+ # Show/hide content toggle
747
+ show_content = st.checkbox("Show conversation content", value=True)
748
+
749
+ if show_content:
750
+ for i, turn in enumerate(conversation_turns):
751
+ role = turn.get('role', 'unknown')
752
+ content = turn.get('content', 'No content')
753
+
754
+ # Style based on role
755
+ if role == 'user':
756
+ st.markdown(f"**πŸ‘€ User (Turn {i+1}):**")
757
+ st.info(content)
758
+ elif role == 'assistant':
759
+ st.markdown(f"**πŸ€– Assistant (Turn {i+1}):**")
760
+ st.success(content)
761
+ else:
762
+ st.markdown(f"**❓ {role.title()} (Turn {i+1}):**")
763
+ st.warning(content)
764
+
765
+ # Display turn-level metrics if available
766
+ st.subheader("πŸ“Š Turn-Level Metrics")
767
+
768
+ if selected_metrics:
769
+ # Get actual turn-level data for this conversation
770
+ # Find matching turns in the exploded dataframe
771
+ conv_turn_metrics = []
772
+
773
+ # Simple approach: try to match turns by content or position
774
+ # This is a best-effort approach since exact matching is complex
775
+ turn_metric_columns = [f"turn.turn_metrics.{m}" for m in selected_metrics]
776
+ available_columns = [col for col in turn_metric_columns if col in filtered_df_exploded.columns]
777
+
778
+ if available_columns:
779
+ # Try to get metrics for turns from this conversation type
780
+ type_turns = filtered_df_exploded[filtered_df_exploded['type'] == selected_conversation['type']]
781
+
782
+ # Take a sample of turns for this conversation type (since exact matching is complex)
783
+ sample_size = min(len(conversation_turns), len(type_turns))
784
+ if sample_size > 0:
785
+ sample_turns = type_turns.head(sample_size)
786
+
787
+ # Create metrics table
788
+ metrics_display_data = []
789
+ for i, (_, turn_row) in enumerate(sample_turns.iterrows()):
790
+ if i < len(conversation_turns):
791
+ turn_data = {
792
+ 'Turn': i + 1,
793
+ 'Role': conversation_turns[i].get('role', 'unknown')
794
+ }
795
+
796
+ # Add actual metric values
797
+ for col in available_columns:
798
+ metric_name = col.replace('turn.turn_metrics.', '')
799
+ friendly_name = get_human_friendly_metric_name(metric_name)
800
+ value = turn_row.get(col, 'N/A')
801
+ if pd.notna(value) and isinstance(value, (int, float)):
802
+ turn_data[friendly_name] = round(value, 3)
803
+ else:
804
+ turn_data[friendly_name] = 'N/A'
805
+
806
+ metrics_display_data.append(turn_data)
807
+
808
+ if metrics_display_data:
809
+ metrics_df = pd.DataFrame(metrics_display_data)
810
+ st.dataframe(metrics_df, use_container_width=True)
811
+
812
+ # Plot metrics over turns with real data
813
+ st.subheader("πŸ“ˆ Metrics Over Turns")
814
+
815
+ fig = go.Figure()
816
+
817
+ # Add traces for each selected metric (real data)
818
+ for col in available_columns[:5]: # Limit to first 5 for readability
819
+ metric_name = col.replace('turn.turn_metrics.', '')
820
+ friendly_name = get_human_friendly_metric_name(metric_name)
821
+
822
+ # Get values for this metric
823
+ y_values = []
824
+ for _, turn_row in sample_turns.iterrows():
825
+ value = turn_row.get(col, None)
826
+ if pd.notna(value) and isinstance(value, (int, float)):
827
+ y_values.append(value)
828
+ else:
829
+ y_values.append(None)
830
+
831
+ if any(v is not None for v in y_values):
832
+ fig.add_trace(go.Scatter(
833
+ x=list(range(1, len(y_values) + 1)),
834
+ y=y_values,
835
+ mode='lines+markers',
836
+ name=friendly_name,
837
+ line=dict(width=2),
838
+ marker=dict(size=8),
839
+ connectgaps=False
840
+ ))
841
+
842
+ if fig.data: # Only show if we have data
843
+ fig.update_layout(
844
+ title="Complexity Metrics Across Conversation Turns",
845
+ xaxis_title="Turn Number",
846
+ yaxis_title="Metric Value",
847
+ height=400,
848
+ hovermode='x unified'
849
+ )
850
+
851
+ st.plotly_chart(fig, use_container_width=True)
852
+ else:
853
+ st.info("No numeric metric data available to plot for this conversation type.")
854
+ else:
855
+ st.info("No matching turn-level metrics found for this conversation.")
856
+ else:
857
+ st.info("No turn-level data available for this conversation type.")
858
+ else:
859
+ st.warning("No turn-level metrics available in the dataset for the selected metrics.")
860
+
861
+ # Show raw turn content with role highlighting
862
+ with st.expander("πŸ” Detailed Turn Analysis", expanded=False):
863
+ for i, turn in enumerate(conversation_turns):
864
+ role = turn.get('role', 'unknown')
865
+ content = turn.get('content', 'No content')
866
+
867
+ st.markdown(f"**Turn {i+1} ({role}):**")
868
+ st.text_area(
869
+ f"Content",
870
+ content,
871
+ height=100,
872
+ key=f"turn_content_{i}",
873
+ disabled=True
874
+ )
875
+
876
+ # Show turn statistics
877
+ st.caption(f"Characters: {len(content)} | Words: {len(content.split())} | Role: {role}")
878
+ st.divider()
879
+
880
+ else:
881
+ st.warning("Select some metrics to see turn-level analysis.")
882
+
883
+ else:
884
+ st.warning("No conversation data available for the selected conversation.")
885
+
886
+ with tab5:
887
  st.header("Detailed View")
888
 
889
  # Data overview