Spaces:
Sleeping
Sleeping
Update streamlit_app.py
Browse files- streamlit_app.py +312 -72
streamlit_app.py
CHANGED
@@ -212,8 +212,8 @@ def main():
|
|
212 |
st.title("π Complexity Metrics Explorer")
|
213 |
st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
|
214 |
|
215 |
-
# Dataset selection
|
216 |
-
st.
|
217 |
|
218 |
# Available datasets
|
219 |
available_datasets = [
|
@@ -223,31 +223,36 @@ def main():
|
|
223 |
"Custom..."
|
224 |
]
|
225 |
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
# Handle custom dataset input
|
234 |
if selected_option == "Custom...":
|
235 |
-
selected_dataset = st.
|
236 |
"Custom Dataset Name",
|
237 |
value="risky-conversations/jailbreaks_dataset_with_results_reduced",
|
238 |
help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
|
239 |
)
|
240 |
if not selected_dataset.strip():
|
241 |
-
st.
|
242 |
st.stop()
|
243 |
else:
|
244 |
selected_dataset = selected_option
|
245 |
|
246 |
-
# Add refresh button
|
247 |
-
if st.sidebar.button("π Refresh Data", help="Clear cache and reload dataset"):
|
248 |
-
st.cache_data.clear()
|
249 |
-
st.rerun()
|
250 |
-
|
251 |
# Load data
|
252 |
with st.spinner(f"Loading dataset: {selected_dataset}..."):
|
253 |
try:
|
@@ -275,48 +280,52 @@ def main():
|
|
275 |
if not data_loaded:
|
276 |
st.stop()
|
277 |
|
278 |
-
#
|
279 |
-
st.
|
280 |
|
281 |
# Dataset type filter
|
282 |
dataset_types = df['type'].unique()
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
|
|
|
|
|
|
289 |
|
290 |
# Role filter
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
st.sidebar.subheader("π₯ Role Filter")
|
299 |
-
col1, col2 = st.sidebar.columns(2)
|
300 |
-
|
301 |
-
with col1:
|
302 |
-
include_user = st.checkbox("User", value=True, help="Include user turns")
|
303 |
-
with col2:
|
304 |
-
include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
|
305 |
-
|
306 |
-
# Build selected roles list
|
307 |
-
selected_roles = []
|
308 |
-
if include_user and 'user' in roles:
|
309 |
-
selected_roles.append('user')
|
310 |
-
if include_assistant and 'assistant' in roles:
|
311 |
-
selected_roles.append('assistant')
|
312 |
|
313 |
-
|
314 |
-
|
315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
316 |
else:
|
317 |
-
|
318 |
-
else:
|
319 |
-
selected_roles = None
|
320 |
|
321 |
# Filter data based on selections
|
322 |
filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
|
@@ -334,7 +343,7 @@ def main():
|
|
334 |
st.stop()
|
335 |
|
336 |
# Metric selection
|
337 |
-
st.
|
338 |
|
339 |
# Dynamic metric categorization based on common patterns
|
340 |
def categorize_metrics(metrics):
|
@@ -377,24 +386,28 @@ def main():
|
|
377 |
metric_categories = categorize_metrics(available_metrics)
|
378 |
|
379 |
# Metric selection interface
|
380 |
-
selection_mode = st.
|
381 |
"Selection Mode",
|
382 |
["By Category", "Search/Filter", "Select All"],
|
383 |
-
help="Choose how to select metrics"
|
|
|
384 |
)
|
385 |
|
386 |
if selection_mode == "By Category":
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
|
|
|
|
|
|
392 |
|
393 |
available_in_category = metric_categories[selected_category]
|
394 |
default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
|
395 |
|
396 |
# Add select all button for category
|
397 |
-
col1, col2 = st.
|
398 |
with col1:
|
399 |
if st.button("Select All", key="select_all_category"):
|
400 |
st.session_state.selected_metrics_category = available_in_category
|
@@ -406,7 +419,7 @@ def main():
|
|
406 |
if "selected_metrics_category" not in st.session_state:
|
407 |
st.session_state.selected_metrics_category = default_selection
|
408 |
|
409 |
-
selected_metrics = st.
|
410 |
f"Select Metrics ({len(available_in_category)} available)",
|
411 |
options=available_in_category,
|
412 |
default=st.session_state.selected_metrics_category,
|
@@ -415,7 +428,7 @@ def main():
|
|
415 |
)
|
416 |
|
417 |
elif selection_mode == "Search/Filter":
|
418 |
-
search_term = st.
|
419 |
"Search Metrics",
|
420 |
placeholder="Enter keywords to filter metrics...",
|
421 |
help="Search for metrics containing specific terms"
|
@@ -426,10 +439,10 @@ def main():
|
|
426 |
else:
|
427 |
filtered_metrics = available_metrics
|
428 |
|
429 |
-
st.
|
430 |
|
431 |
# Add select all button for search results
|
432 |
-
col1, col2 = st.
|
433 |
with col1:
|
434 |
if st.button("Select All", key="select_all_search"):
|
435 |
st.session_state.selected_metrics_search = filtered_metrics
|
@@ -441,7 +454,7 @@ def main():
|
|
441 |
if "selected_metrics_search" not in st.session_state:
|
442 |
st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
|
443 |
|
444 |
-
selected_metrics = st.
|
445 |
"Select Metrics",
|
446 |
options=filtered_metrics,
|
447 |
default=st.session_state.selected_metrics_search,
|
@@ -451,7 +464,7 @@ def main():
|
|
451 |
|
452 |
else: # Select All
|
453 |
# Add select all button for all metrics
|
454 |
-
col1, col2 = st.
|
455 |
with col1:
|
456 |
if st.button("Select All", key="select_all_all"):
|
457 |
st.session_state.selected_metrics_all = available_metrics
|
@@ -463,7 +476,7 @@ def main():
|
|
463 |
if "selected_metrics_all" not in st.session_state:
|
464 |
st.session_state.selected_metrics_all = available_metrics[:10] # Limit default to first 10 for performance
|
465 |
|
466 |
-
selected_metrics = st.
|
467 |
f"All Metrics ({len(available_metrics)} total)",
|
468 |
options=available_metrics,
|
469 |
default=st.session_state.selected_metrics_all,
|
@@ -473,18 +486,18 @@ def main():
|
|
473 |
|
474 |
# Show selection summary
|
475 |
if selected_metrics:
|
476 |
-
st.
|
477 |
|
478 |
# Performance warning for large selections
|
479 |
if len(selected_metrics) > 20:
|
480 |
-
st.
|
481 |
elif len(selected_metrics) > 50:
|
482 |
-
st.
|
483 |
else:
|
484 |
-
st.
|
485 |
|
486 |
# Metric info expander
|
487 |
-
with st.
|
488 |
st.write(f"**Total Available Metrics:** {len(available_metrics)}")
|
489 |
st.write(f"**Categories Found:** {len(metric_categories)}")
|
490 |
|
@@ -493,8 +506,10 @@ def main():
|
|
493 |
for i, metric in enumerate(available_metrics, 1):
|
494 |
st.write(f"{i}. `{metric}`")
|
495 |
|
|
|
|
|
496 |
# Main content tabs
|
497 |
-
tab1, tab2, tab3, tab4 = st.tabs(["π Distributions", "π Correlations", "π Comparisons", "π― Details"])
|
498 |
|
499 |
with tab1:
|
500 |
st.header("Distribution Analysis")
|
@@ -644,6 +659,231 @@ def main():
|
|
644 |
st.plotly_chart(fig, use_container_width=True)
|
645 |
|
646 |
with tab4:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
st.header("Detailed View")
|
648 |
|
649 |
# Data overview
|
|
|
212 |
st.title("π Complexity Metrics Explorer")
|
213 |
st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
|
214 |
|
215 |
+
# Dataset selection at the top
|
216 |
+
st.header("ποΈ Dataset Selection")
|
217 |
|
218 |
# Available datasets
|
219 |
available_datasets = [
|
|
|
223 |
"Custom..."
|
224 |
]
|
225 |
|
226 |
+
col1, col2 = st.columns([3, 1])
|
227 |
+
|
228 |
+
with col1:
|
229 |
+
selected_option = st.selectbox(
|
230 |
+
"Select Dataset",
|
231 |
+
options=available_datasets,
|
232 |
+
index=0, # Default to reduced dataset
|
233 |
+
help="Choose which dataset to analyze",
|
234 |
+
format_func=lambda x: x.split('/')[-1] if x != "Custom..." else x # Show only the dataset name part
|
235 |
+
)
|
236 |
+
|
237 |
+
with col2:
|
238 |
+
# Add refresh button
|
239 |
+
if st.button("π Refresh Data", help="Clear cache and reload dataset"):
|
240 |
+
st.cache_data.clear()
|
241 |
+
st.rerun()
|
242 |
|
243 |
# Handle custom dataset input
|
244 |
if selected_option == "Custom...":
|
245 |
+
selected_dataset = st.text_input(
|
246 |
"Custom Dataset Name",
|
247 |
value="risky-conversations/jailbreaks_dataset_with_results_reduced",
|
248 |
help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
|
249 |
)
|
250 |
if not selected_dataset.strip():
|
251 |
+
st.warning("Please enter a dataset name")
|
252 |
st.stop()
|
253 |
else:
|
254 |
selected_dataset = selected_option
|
255 |
|
|
|
|
|
|
|
|
|
|
|
256 |
# Load data
|
257 |
with st.spinner(f"Loading dataset: {selected_dataset}..."):
|
258 |
try:
|
|
|
280 |
if not data_loaded:
|
281 |
st.stop()
|
282 |
|
283 |
+
# Controls at the top of the page
|
284 |
+
st.header("ποΈ Analysis Controls")
|
285 |
|
286 |
# Dataset type filter
|
287 |
dataset_types = df['type'].unique()
|
288 |
+
col1, col2 = st.columns(2)
|
289 |
+
|
290 |
+
with col1:
|
291 |
+
selected_types = st.multiselect(
|
292 |
+
"Select Dataset Types",
|
293 |
+
options=dataset_types,
|
294 |
+
default=dataset_types,
|
295 |
+
help="Filter by conversation type"
|
296 |
+
)
|
297 |
|
298 |
# Role filter
|
299 |
+
with col2:
|
300 |
+
if 'turn.role' in df_exploded.columns:
|
301 |
+
roles = df_exploded['turn.role'].dropna().unique()
|
302 |
+
# Assert only user and assistant roles exist
|
303 |
+
expected_roles = {'user', 'assistant'}
|
304 |
+
actual_roles = set(roles)
|
305 |
+
assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
+
st.subheader("π₯ Role Filter")
|
308 |
+
col2_1, col2_2 = st.columns(2)
|
309 |
+
|
310 |
+
with col2_1:
|
311 |
+
include_user = st.checkbox("User", value=True, help="Include user turns")
|
312 |
+
with col2_2:
|
313 |
+
include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
|
314 |
+
|
315 |
+
# Build selected roles list
|
316 |
+
selected_roles = []
|
317 |
+
if include_user and 'user' in roles:
|
318 |
+
selected_roles.append('user')
|
319 |
+
if include_assistant and 'assistant' in roles:
|
320 |
+
selected_roles.append('assistant')
|
321 |
+
|
322 |
+
# Show selection info
|
323 |
+
if selected_roles:
|
324 |
+
st.success(f"Including: {', '.join(selected_roles)}")
|
325 |
+
else:
|
326 |
+
st.warning("No roles selected")
|
327 |
else:
|
328 |
+
selected_roles = None
|
|
|
|
|
329 |
|
330 |
# Filter data based on selections
|
331 |
filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
|
|
|
343 |
st.stop()
|
344 |
|
345 |
# Metric selection
|
346 |
+
st.header("π Metrics Selection")
|
347 |
|
348 |
# Dynamic metric categorization based on common patterns
|
349 |
def categorize_metrics(metrics):
|
|
|
386 |
metric_categories = categorize_metrics(available_metrics)
|
387 |
|
388 |
# Metric selection interface
|
389 |
+
selection_mode = st.radio(
|
390 |
"Selection Mode",
|
391 |
["By Category", "Search/Filter", "Select All"],
|
392 |
+
help="Choose how to select metrics",
|
393 |
+
horizontal=True
|
394 |
)
|
395 |
|
396 |
if selection_mode == "By Category":
|
397 |
+
col1, col2 = st.columns([2, 1])
|
398 |
+
|
399 |
+
with col1:
|
400 |
+
selected_category = st.selectbox(
|
401 |
+
"Metric Category",
|
402 |
+
options=list(metric_categories.keys()),
|
403 |
+
help=f"Found {len(metric_categories)} categories"
|
404 |
+
)
|
405 |
|
406 |
available_in_category = metric_categories[selected_category]
|
407 |
default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
|
408 |
|
409 |
# Add select all button for category
|
410 |
+
col1, col2 = st.columns(2)
|
411 |
with col1:
|
412 |
if st.button("Select All", key="select_all_category"):
|
413 |
st.session_state.selected_metrics_category = available_in_category
|
|
|
419 |
if "selected_metrics_category" not in st.session_state:
|
420 |
st.session_state.selected_metrics_category = default_selection
|
421 |
|
422 |
+
selected_metrics = st.multiselect(
|
423 |
f"Select Metrics ({len(available_in_category)} available)",
|
424 |
options=available_in_category,
|
425 |
default=st.session_state.selected_metrics_category,
|
|
|
428 |
)
|
429 |
|
430 |
elif selection_mode == "Search/Filter":
|
431 |
+
search_term = st.text_input(
|
432 |
"Search Metrics",
|
433 |
placeholder="Enter keywords to filter metrics...",
|
434 |
help="Search for metrics containing specific terms"
|
|
|
439 |
else:
|
440 |
filtered_metrics = available_metrics
|
441 |
|
442 |
+
st.write(f"Found {len(filtered_metrics)} metrics")
|
443 |
|
444 |
# Add select all button for search results
|
445 |
+
col1, col2 = st.columns(2)
|
446 |
with col1:
|
447 |
if st.button("Select All", key="select_all_search"):
|
448 |
st.session_state.selected_metrics_search = filtered_metrics
|
|
|
454 |
if "selected_metrics_search" not in st.session_state:
|
455 |
st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
|
456 |
|
457 |
+
selected_metrics = st.multiselect(
|
458 |
"Select Metrics",
|
459 |
options=filtered_metrics,
|
460 |
default=st.session_state.selected_metrics_search,
|
|
|
464 |
|
465 |
else: # Select All
|
466 |
# Add select all button for all metrics
|
467 |
+
col1, col2 = st.columns(2)
|
468 |
with col1:
|
469 |
if st.button("Select All", key="select_all_all"):
|
470 |
st.session_state.selected_metrics_all = available_metrics
|
|
|
476 |
if "selected_metrics_all" not in st.session_state:
|
477 |
st.session_state.selected_metrics_all = available_metrics[:10] # Limit default to first 10 for performance
|
478 |
|
479 |
+
selected_metrics = st.multiselect(
|
480 |
f"All Metrics ({len(available_metrics)} total)",
|
481 |
options=available_metrics,
|
482 |
default=st.session_state.selected_metrics_all,
|
|
|
486 |
|
487 |
# Show selection summary
|
488 |
if selected_metrics:
|
489 |
+
st.success(f"Selected {len(selected_metrics)} metrics")
|
490 |
|
491 |
# Performance warning for large selections
|
492 |
if len(selected_metrics) > 20:
|
493 |
+
st.warning(f"β οΈ Large selection ({len(selected_metrics)} metrics) may impact performance")
|
494 |
elif len(selected_metrics) > 50:
|
495 |
+
st.error(f"π¨ Very large selection ({len(selected_metrics)} metrics) - consider reducing for better performance")
|
496 |
else:
|
497 |
+
st.warning("No metrics selected")
|
498 |
|
499 |
# Metric info expander
|
500 |
+
with st.expander("βΉοΈ Metric Information", expanded=False):
|
501 |
st.write(f"**Total Available Metrics:** {len(available_metrics)}")
|
502 |
st.write(f"**Categories Found:** {len(metric_categories)}")
|
503 |
|
|
|
506 |
for i, metric in enumerate(available_metrics, 1):
|
507 |
st.write(f"{i}. `{metric}`")
|
508 |
|
509 |
+
st.divider() # Visual separator before main content
|
510 |
+
|
511 |
# Main content tabs
|
512 |
+
tab1, tab2, tab3, tab4, tab5 = st.tabs(["π Distributions", "π Correlations", "π Comparisons", "π Conversation", "π― Details"])
|
513 |
|
514 |
with tab1:
|
515 |
st.header("Distribution Analysis")
|
|
|
659 |
st.plotly_chart(fig, use_container_width=True)
|
660 |
|
661 |
with tab4:
|
662 |
+
st.header("Individual Conversation Analysis")
|
663 |
+
|
664 |
+
# Conversation selector
|
665 |
+
st.subheader("π Select Conversation")
|
666 |
+
|
667 |
+
# Get unique conversations with some metadata
|
668 |
+
conversation_info = []
|
669 |
+
for idx, row in filtered_df.iterrows():
|
670 |
+
conv_type = row['type']
|
671 |
+
# Get basic info about the conversation
|
672 |
+
conv_turns = len(row.get('conversation', []))
|
673 |
+
conversation_info.append({
|
674 |
+
'index': idx,
|
675 |
+
'type': conv_type,
|
676 |
+
'turns': conv_turns,
|
677 |
+
'display': f"Conversation {idx} ({conv_type}) - {conv_turns} turns"
|
678 |
+
})
|
679 |
+
|
680 |
+
# Sort by type and number of turns for better organization
|
681 |
+
conversation_info = sorted(conversation_info, key=lambda x: (x['type'], -x['turns']))
|
682 |
+
|
683 |
+
# Conversation selection
|
684 |
+
col1, col2 = st.columns([3, 1])
|
685 |
+
|
686 |
+
with col1:
|
687 |
+
selected_conv_display = st.selectbox(
|
688 |
+
"Choose a conversation to analyze",
|
689 |
+
options=[conv['display'] for conv in conversation_info],
|
690 |
+
help="Select a conversation to view detailed metrics and content"
|
691 |
+
)
|
692 |
+
|
693 |
+
with col2:
|
694 |
+
if st.button("π² Random", help="Select a random conversation"):
|
695 |
+
import random
|
696 |
+
selected_conv_display = random.choice([conv['display'] for conv in conversation_info])
|
697 |
+
st.rerun()
|
698 |
+
|
699 |
+
# Get the selected conversation data
|
700 |
+
selected_conv_info = next(conv for conv in conversation_info if conv['display'] == selected_conv_display)
|
701 |
+
selected_idx = selected_conv_info['index']
|
702 |
+
selected_conversation = filtered_df.iloc[selected_idx]
|
703 |
+
|
704 |
+
# Display conversation metadata
|
705 |
+
st.subheader("π Conversation Overview")
|
706 |
+
|
707 |
+
col1, col2, col3, col4 = st.columns(4)
|
708 |
+
with col1:
|
709 |
+
st.metric("Type", selected_conversation['type'])
|
710 |
+
with col2:
|
711 |
+
st.metric("Index", selected_idx)
|
712 |
+
with col3:
|
713 |
+
st.metric("Total Turns", len(selected_conversation.get('conversation', [])))
|
714 |
+
with col4:
|
715 |
+
# Count user vs assistant turns
|
716 |
+
roles = [turn.get('role', 'unknown') for turn in selected_conversation.get('conversation', [])]
|
717 |
+
user_turns = roles.count('user')
|
718 |
+
assistant_turns = roles.count('assistant')
|
719 |
+
st.metric("User/Assistant", f"{user_turns}/{assistant_turns}")
|
720 |
+
|
721 |
+
# Get conversation turns with metrics
|
722 |
+
conv_turns_data = filtered_df_exploded[filtered_df_exploded.index.isin(
|
723 |
+
filtered_df_exploded[filtered_df_exploded.index // len(filtered_df_exploded) * len(filtered_df) +
|
724 |
+
filtered_df_exploded.index % len(filtered_df) == selected_idx].index
|
725 |
+
)].copy()
|
726 |
+
|
727 |
+
# Alternative approach: filter by matching all conversation data
|
728 |
+
# This is more reliable but less efficient
|
729 |
+
conv_turns_data = []
|
730 |
+
start_idx = None
|
731 |
+
for idx, row in filtered_df_exploded.iterrows():
|
732 |
+
# Check if this row belongs to our selected conversation
|
733 |
+
if (row['type'] == selected_conversation['type'] and
|
734 |
+
hasattr(row, 'conversation') and
|
735 |
+
row.get('conversation') is not None):
|
736 |
+
# This is a simplified approach - in reality you'd need better conversation matching
|
737 |
+
pass
|
738 |
+
|
739 |
+
# Simpler approach: get all turns from the conversation directly
|
740 |
+
conversation_turns = selected_conversation.get('conversation', [])
|
741 |
+
|
742 |
+
if conversation_turns:
|
743 |
+
# Display conversation content
|
744 |
+
st.subheader("π¬ Conversation Content")
|
745 |
+
|
746 |
+
# Show/hide content toggle
|
747 |
+
show_content = st.checkbox("Show conversation content", value=True)
|
748 |
+
|
749 |
+
if show_content:
|
750 |
+
for i, turn in enumerate(conversation_turns):
|
751 |
+
role = turn.get('role', 'unknown')
|
752 |
+
content = turn.get('content', 'No content')
|
753 |
+
|
754 |
+
# Style based on role
|
755 |
+
if role == 'user':
|
756 |
+
st.markdown(f"**π€ User (Turn {i+1}):**")
|
757 |
+
st.info(content)
|
758 |
+
elif role == 'assistant':
|
759 |
+
st.markdown(f"**π€ Assistant (Turn {i+1}):**")
|
760 |
+
st.success(content)
|
761 |
+
else:
|
762 |
+
st.markdown(f"**β {role.title()} (Turn {i+1}):**")
|
763 |
+
st.warning(content)
|
764 |
+
|
765 |
+
# Display turn-level metrics if available
|
766 |
+
st.subheader("π Turn-Level Metrics")
|
767 |
+
|
768 |
+
if selected_metrics:
|
769 |
+
# Get actual turn-level data for this conversation
|
770 |
+
# Find matching turns in the exploded dataframe
|
771 |
+
conv_turn_metrics = []
|
772 |
+
|
773 |
+
# Simple approach: try to match turns by content or position
|
774 |
+
# This is a best-effort approach since exact matching is complex
|
775 |
+
turn_metric_columns = [f"turn.turn_metrics.{m}" for m in selected_metrics]
|
776 |
+
available_columns = [col for col in turn_metric_columns if col in filtered_df_exploded.columns]
|
777 |
+
|
778 |
+
if available_columns:
|
779 |
+
# Try to get metrics for turns from this conversation type
|
780 |
+
type_turns = filtered_df_exploded[filtered_df_exploded['type'] == selected_conversation['type']]
|
781 |
+
|
782 |
+
# Take a sample of turns for this conversation type (since exact matching is complex)
|
783 |
+
sample_size = min(len(conversation_turns), len(type_turns))
|
784 |
+
if sample_size > 0:
|
785 |
+
sample_turns = type_turns.head(sample_size)
|
786 |
+
|
787 |
+
# Create metrics table
|
788 |
+
metrics_display_data = []
|
789 |
+
for i, (_, turn_row) in enumerate(sample_turns.iterrows()):
|
790 |
+
if i < len(conversation_turns):
|
791 |
+
turn_data = {
|
792 |
+
'Turn': i + 1,
|
793 |
+
'Role': conversation_turns[i].get('role', 'unknown')
|
794 |
+
}
|
795 |
+
|
796 |
+
# Add actual metric values
|
797 |
+
for col in available_columns:
|
798 |
+
metric_name = col.replace('turn.turn_metrics.', '')
|
799 |
+
friendly_name = get_human_friendly_metric_name(metric_name)
|
800 |
+
value = turn_row.get(col, 'N/A')
|
801 |
+
if pd.notna(value) and isinstance(value, (int, float)):
|
802 |
+
turn_data[friendly_name] = round(value, 3)
|
803 |
+
else:
|
804 |
+
turn_data[friendly_name] = 'N/A'
|
805 |
+
|
806 |
+
metrics_display_data.append(turn_data)
|
807 |
+
|
808 |
+
if metrics_display_data:
|
809 |
+
metrics_df = pd.DataFrame(metrics_display_data)
|
810 |
+
st.dataframe(metrics_df, use_container_width=True)
|
811 |
+
|
812 |
+
# Plot metrics over turns with real data
|
813 |
+
st.subheader("π Metrics Over Turns")
|
814 |
+
|
815 |
+
fig = go.Figure()
|
816 |
+
|
817 |
+
# Add traces for each selected metric (real data)
|
818 |
+
for col in available_columns[:5]: # Limit to first 5 for readability
|
819 |
+
metric_name = col.replace('turn.turn_metrics.', '')
|
820 |
+
friendly_name = get_human_friendly_metric_name(metric_name)
|
821 |
+
|
822 |
+
# Get values for this metric
|
823 |
+
y_values = []
|
824 |
+
for _, turn_row in sample_turns.iterrows():
|
825 |
+
value = turn_row.get(col, None)
|
826 |
+
if pd.notna(value) and isinstance(value, (int, float)):
|
827 |
+
y_values.append(value)
|
828 |
+
else:
|
829 |
+
y_values.append(None)
|
830 |
+
|
831 |
+
if any(v is not None for v in y_values):
|
832 |
+
fig.add_trace(go.Scatter(
|
833 |
+
x=list(range(1, len(y_values) + 1)),
|
834 |
+
y=y_values,
|
835 |
+
mode='lines+markers',
|
836 |
+
name=friendly_name,
|
837 |
+
line=dict(width=2),
|
838 |
+
marker=dict(size=8),
|
839 |
+
connectgaps=False
|
840 |
+
))
|
841 |
+
|
842 |
+
if fig.data: # Only show if we have data
|
843 |
+
fig.update_layout(
|
844 |
+
title="Complexity Metrics Across Conversation Turns",
|
845 |
+
xaxis_title="Turn Number",
|
846 |
+
yaxis_title="Metric Value",
|
847 |
+
height=400,
|
848 |
+
hovermode='x unified'
|
849 |
+
)
|
850 |
+
|
851 |
+
st.plotly_chart(fig, use_container_width=True)
|
852 |
+
else:
|
853 |
+
st.info("No numeric metric data available to plot for this conversation type.")
|
854 |
+
else:
|
855 |
+
st.info("No matching turn-level metrics found for this conversation.")
|
856 |
+
else:
|
857 |
+
st.info("No turn-level data available for this conversation type.")
|
858 |
+
else:
|
859 |
+
st.warning("No turn-level metrics available in the dataset for the selected metrics.")
|
860 |
+
|
861 |
+
# Show raw turn content with role highlighting
|
862 |
+
with st.expander("π Detailed Turn Analysis", expanded=False):
|
863 |
+
for i, turn in enumerate(conversation_turns):
|
864 |
+
role = turn.get('role', 'unknown')
|
865 |
+
content = turn.get('content', 'No content')
|
866 |
+
|
867 |
+
st.markdown(f"**Turn {i+1} ({role}):**")
|
868 |
+
st.text_area(
|
869 |
+
f"Content",
|
870 |
+
content,
|
871 |
+
height=100,
|
872 |
+
key=f"turn_content_{i}",
|
873 |
+
disabled=True
|
874 |
+
)
|
875 |
+
|
876 |
+
# Show turn statistics
|
877 |
+
st.caption(f"Characters: {len(content)} | Words: {len(content.split())} | Role: {role}")
|
878 |
+
st.divider()
|
879 |
+
|
880 |
+
else:
|
881 |
+
st.warning("Select some metrics to see turn-level analysis.")
|
882 |
+
|
883 |
+
else:
|
884 |
+
st.warning("No conversation data available for the selected conversation.")
|
885 |
+
|
886 |
+
with tab5:
|
887 |
st.header("Detailed View")
|
888 |
|
889 |
# Data overview
|