Spaces:

risky-conversations
/

Visualizer

Sleeping

App Files Files Community

acmc commited on Jun 15

Commit

21a08b0

verified ·

1 Parent(s): 5a46117

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +312 -72

streamlit_app.py CHANGED Viewed

@@ -212,8 +212,8 @@ def main():
     st.title("🔍 Complexity Metrics Explorer")
     st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
-    # Dataset selection
-    st.sidebar.header("🗂️ Dataset Selection")
     # Available datasets
     available_datasets = [
@@ -223,31 +223,36 @@ def main():
         "Custom..."
     ]
-    selected_option = st.sidebar.selectbox(
-        "Select Dataset",
-        options=available_datasets,
-        index=0,  # Default to reduced dataset
-        help="Choose which dataset to analyze"
-    )
     # Handle custom dataset input
     if selected_option == "Custom...":
-        selected_dataset = st.sidebar.text_input(
             "Custom Dataset Name",
             value="risky-conversations/jailbreaks_dataset_with_results_reduced",
             help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
         )
         if not selected_dataset.strip():
-            st.sidebar.warning("Please enter a dataset name")
             st.stop()
     else:
         selected_dataset = selected_option
-    # Add refresh button
-    if st.sidebar.button("🔄 Refresh Data", help="Clear cache and reload dataset"):
-        st.cache_data.clear()
-        st.rerun()
     # Load data
     with st.spinner(f"Loading dataset: {selected_dataset}..."):
         try:
@@ -275,48 +280,52 @@ def main():
     if not data_loaded:
         st.stop()
-    # Sidebar controls
-    st.sidebar.header("🎛️ Controls")
     # Dataset type filter
     dataset_types = df['type'].unique()
-    selected_types = st.sidebar.multiselect(
-        "Select Dataset Types",
-        options=dataset_types,
-        default=dataset_types,
-        help="Filter by conversation type"
-    )
     # Role filter
-    if 'turn.role' in df_exploded.columns:
-        roles = df_exploded['turn.role'].dropna().unique()
-        # Assert only user and assistant roles exist
-        expected_roles = {'user', 'assistant'}
-        actual_roles = set(roles)
-        assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
-        st.sidebar.subheader("👥 Role Filter")
-        col1, col2 = st.sidebar.columns(2)
-        with col1:
-            include_user = st.checkbox("User", value=True, help="Include user turns")
-        with col2:
-            include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
-        # Build selected roles list
-        selected_roles = []
-        if include_user and 'user' in roles:
-            selected_roles.append('user')
-        if include_assistant and 'assistant' in roles:
-            selected_roles.append('assistant')
-        # Show selection info
-        if selected_roles:
-            st.sidebar.success(f"Including: {', '.join(selected_roles)}")
         else:
-            st.sidebar.warning("No roles selected")
-    else:
-        selected_roles = None
     # Filter data based on selections
     filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
@@ -334,7 +343,7 @@ def main():
         st.stop()
     # Metric selection
-    st.sidebar.header("📊 Metrics")
     # Dynamic metric categorization based on common patterns
     def categorize_metrics(metrics):
@@ -377,24 +386,28 @@ def main():
     metric_categories = categorize_metrics(available_metrics)
     # Metric selection interface
-    selection_mode = st.sidebar.radio(
         "Selection Mode",
         ["By Category", "Search/Filter", "Select All"],
-        help="Choose how to select metrics"
     )
     if selection_mode == "By Category":
-        selected_category = st.sidebar.selectbox(
-            "Metric Category",
-            options=list(metric_categories.keys()),
-            help=f"Found {len(metric_categories)} categories"
-        )
         available_in_category = metric_categories[selected_category]
         default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
         # Add select all button for category
-        col1, col2 = st.sidebar.columns(2)
         with col1:
             if st.button("Select All", key="select_all_category"):
                 st.session_state.selected_metrics_category = available_in_category
@@ -406,7 +419,7 @@ def main():
         if "selected_metrics_category" not in st.session_state:
             st.session_state.selected_metrics_category = default_selection
-        selected_metrics = st.sidebar.multiselect(
             f"Select Metrics ({len(available_in_category)} available)",
             options=available_in_category,
             default=st.session_state.selected_metrics_category,
@@ -415,7 +428,7 @@ def main():
         )
     elif selection_mode == "Search/Filter":
-        search_term = st.sidebar.text_input(
             "Search Metrics",
             placeholder="Enter keywords to filter metrics...",
             help="Search for metrics containing specific terms"
@@ -426,10 +439,10 @@ def main():
         else:
             filtered_metrics = available_metrics
-        st.sidebar.write(f"Found {len(filtered_metrics)} metrics")
         # Add select all button for search results
-        col1, col2 = st.sidebar.columns(2)
         with col1:
             if st.button("Select All", key="select_all_search"):
                 st.session_state.selected_metrics_search = filtered_metrics
@@ -441,7 +454,7 @@ def main():
         if "selected_metrics_search" not in st.session_state:
             st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
-        selected_metrics = st.sidebar.multiselect(
             "Select Metrics",
             options=filtered_metrics,
             default=st.session_state.selected_metrics_search,
@@ -451,7 +464,7 @@ def main():
     else:  # Select All
         # Add select all button for all metrics
-        col1, col2 = st.sidebar.columns(2)
         with col1:
             if st.button("Select All", key="select_all_all"):
                 st.session_state.selected_metrics_all = available_metrics
@@ -463,7 +476,7 @@ def main():
         if "selected_metrics_all" not in st.session_state:
             st.session_state.selected_metrics_all = available_metrics[:10]  # Limit default to first 10 for performance
-        selected_metrics = st.sidebar.multiselect(
             f"All Metrics ({len(available_metrics)} total)",
             options=available_metrics,
             default=st.session_state.selected_metrics_all,
@@ -473,18 +486,18 @@ def main():
     # Show selection summary
     if selected_metrics:
-        st.sidebar.success(f"Selected {len(selected_metrics)} metrics")
         # Performance warning for large selections
         if len(selected_metrics) > 20:
-            st.sidebar.warning(f"⚠️ Large selection ({len(selected_metrics)} metrics) may impact performance")
         elif len(selected_metrics) > 50:
-            st.sidebar.error(f"🚨 Very large selection ({len(selected_metrics)} metrics) - consider reducing for better performance")
     else:
-        st.sidebar.warning("No metrics selected")
     # Metric info expander
-    with st.sidebar.expander("ℹ️ Metric Information", expanded=False):
         st.write(f"**Total Available Metrics:** {len(available_metrics)}")
         st.write(f"**Categories Found:** {len(metric_categories)}")
@@ -493,8 +506,10 @@ def main():
             for i, metric in enumerate(available_metrics, 1):
                 st.write(f"{i}. `{metric}`")
     # Main content tabs
-    tab1, tab2, tab3, tab4 = st.tabs(["📊 Distributions", "🔗 Correlations", "📈 Comparisons", "🎯 Details"])
     with tab1:
         st.header("Distribution Analysis")
@@ -644,6 +659,231 @@ def main():
                 st.plotly_chart(fig, use_container_width=True)
     with tab4:
         st.header("Detailed View")
         # Data overview

     st.title("🔍 Complexity Metrics Explorer")
     st.markdown("Interactive visualization of conversation complexity metrics across different dataset types.")
+    # Dataset selection at the top
+    st.header("🗂️ Dataset Selection")
     # Available datasets
     available_datasets = [
         "Custom..."
     ]
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        selected_option = st.selectbox(
+            "Select Dataset",
+            options=available_datasets,
+            index=0,  # Default to reduced dataset
+            help="Choose which dataset to analyze",
+            format_func=lambda x: x.split('/')[-1] if x != "Custom..." else x  # Show only the dataset name part
+        )
+    with col2:
+        # Add refresh button
+        if st.button("🔄 Refresh Data", help="Clear cache and reload dataset"):
+            st.cache_data.clear()
+            st.rerun()
     # Handle custom dataset input
     if selected_option == "Custom...":
+        selected_dataset = st.text_input(
             "Custom Dataset Name",
             value="risky-conversations/jailbreaks_dataset_with_results_reduced",
             help="Enter the full dataset name (e.g., 'risky-conversations/jailbreaks_dataset_with_results_reduced')"
         )
         if not selected_dataset.strip():
+            st.warning("Please enter a dataset name")
             st.stop()
     else:
         selected_dataset = selected_option
     # Load data
     with st.spinner(f"Loading dataset: {selected_dataset}..."):
         try:
     if not data_loaded:
         st.stop()
+    # Controls at the top of the page
+    st.header("🎛️ Analysis Controls")
     # Dataset type filter
     dataset_types = df['type'].unique()
+    col1, col2 = st.columns(2)
+    with col1:
+        selected_types = st.multiselect(
+            "Select Dataset Types",
+            options=dataset_types,
+            default=dataset_types,
+            help="Filter by conversation type"
+        )
     # Role filter
+    with col2:
+        if 'turn.role' in df_exploded.columns:
+            roles = df_exploded['turn.role'].dropna().unique()
+            # Assert only user and assistant roles exist
+            expected_roles = {'user', 'assistant'}
+            actual_roles = set(roles)
+            assert actual_roles.issubset(expected_roles), f"Unexpected roles found: {actual_roles - expected_roles}. Expected only 'user' and 'assistant'"
+            st.subheader("👥 Role Filter")
+            col2_1, col2_2 = st.columns(2)
+            with col2_1:
+                include_user = st.checkbox("User", value=True, help="Include user turns")
+            with col2_2:
+                include_assistant = st.checkbox("Assistant", value=True, help="Include assistant turns")
+            # Build selected roles list
+            selected_roles = []
+            if include_user and 'user' in roles:
+                selected_roles.append('user')
+            if include_assistant and 'assistant' in roles:
+                selected_roles.append('assistant')
+            # Show selection info
+            if selected_roles:
+                st.success(f"Including: {', '.join(selected_roles)}")
+            else:
+                st.warning("No roles selected")
         else:
+            selected_roles = None
     # Filter data based on selections
     filtered_df = df[df['type'].isin(selected_types)] if selected_types else df
         st.stop()
     # Metric selection
+    st.header("📊 Metrics Selection")
     # Dynamic metric categorization based on common patterns
     def categorize_metrics(metrics):
     metric_categories = categorize_metrics(available_metrics)
     # Metric selection interface
+    selection_mode = st.radio(
         "Selection Mode",
         ["By Category", "Search/Filter", "Select All"],
+        help="Choose how to select metrics",
+        horizontal=True
     )
     if selection_mode == "By Category":
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            selected_category = st.selectbox(
+                "Metric Category",
+                options=list(metric_categories.keys()),
+                help=f"Found {len(metric_categories)} categories"
+            )
         available_in_category = metric_categories[selected_category]
         default_selection = available_in_category[:5] if len(available_in_category) > 5 else available_in_category
         # Add select all button for category
+        col1, col2 = st.columns(2)
         with col1:
             if st.button("Select All", key="select_all_category"):
                 st.session_state.selected_metrics_category = available_in_category
         if "selected_metrics_category" not in st.session_state:
             st.session_state.selected_metrics_category = default_selection
+        selected_metrics = st.multiselect(
             f"Select Metrics ({len(available_in_category)} available)",
             options=available_in_category,
             default=st.session_state.selected_metrics_category,
         )
     elif selection_mode == "Search/Filter":
+        search_term = st.text_input(
             "Search Metrics",
             placeholder="Enter keywords to filter metrics...",
             help="Search for metrics containing specific terms"
         else:
             filtered_metrics = available_metrics
+        st.write(f"Found {len(filtered_metrics)} metrics")
         # Add select all button for search results
+        col1, col2 = st.columns(2)
         with col1:
             if st.button("Select All", key="select_all_search"):
                 st.session_state.selected_metrics_search = filtered_metrics
         if "selected_metrics_search" not in st.session_state:
             st.session_state.selected_metrics_search = filtered_metrics[:5] if len(filtered_metrics) > 5 else filtered_metrics[:3]
+        selected_metrics = st.multiselect(
             "Select Metrics",
             options=filtered_metrics,
             default=st.session_state.selected_metrics_search,
     else:  # Select All
         # Add select all button for all metrics
+        col1, col2 = st.columns(2)
         with col1:
             if st.button("Select All", key="select_all_all"):
                 st.session_state.selected_metrics_all = available_metrics
         if "selected_metrics_all" not in st.session_state:
             st.session_state.selected_metrics_all = available_metrics[:10]  # Limit default to first 10 for performance
+        selected_metrics = st.multiselect(
             f"All Metrics ({len(available_metrics)} total)",
             options=available_metrics,
             default=st.session_state.selected_metrics_all,
     # Show selection summary
     if selected_metrics:
+        st.success(f"Selected {len(selected_metrics)} metrics")
         # Performance warning for large selections
         if len(selected_metrics) > 20:
+            st.warning(f"⚠️ Large selection ({len(selected_metrics)} metrics) may impact performance")
         elif len(selected_metrics) > 50:
+            st.error(f"🚨 Very large selection ({len(selected_metrics)} metrics) - consider reducing for better performance")
     else:
+        st.warning("No metrics selected")
     # Metric info expander
+    with st.expander("ℹ️ Metric Information", expanded=False):
         st.write(f"**Total Available Metrics:** {len(available_metrics)}")
         st.write(f"**Categories Found:** {len(metric_categories)}")
             for i, metric in enumerate(available_metrics, 1):
                 st.write(f"{i}. `{metric}`")
+    st.divider()  # Visual separator before main content
     # Main content tabs
+    tab1, tab2, tab3, tab4, tab5 = st.tabs(["📊 Distributions", "🔗 Correlations", "📈 Comparisons", "🔍 Conversation", "🎯 Details"])
     with tab1:
         st.header("Distribution Analysis")
                 st.plotly_chart(fig, use_container_width=True)
     with tab4:
+        st.header("Individual Conversation Analysis")
+        # Conversation selector
+        st.subheader("🔍 Select Conversation")
+        # Get unique conversations with some metadata
+        conversation_info = []
+        for idx, row in filtered_df.iterrows():
+            conv_type = row['type']
+            # Get basic info about the conversation
+            conv_turns = len(row.get('conversation', []))
+            conversation_info.append({
+                'index': idx,
+                'type': conv_type,
+                'turns': conv_turns,
+                'display': f"Conversation {idx} ({conv_type}) - {conv_turns} turns"
+            })
+        # Sort by type and number of turns for better organization
+        conversation_info = sorted(conversation_info, key=lambda x: (x['type'], -x['turns']))
+        # Conversation selection
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            selected_conv_display = st.selectbox(
+                "Choose a conversation to analyze",
+                options=[conv['display'] for conv in conversation_info],
+                help="Select a conversation to view detailed metrics and content"
+            )
+        with col2:
+            if st.button("🎲 Random", help="Select a random conversation"):
+                import random
+                selected_conv_display = random.choice([conv['display'] for conv in conversation_info])
+                st.rerun()
+        # Get the selected conversation data
+        selected_conv_info = next(conv for conv in conversation_info if conv['display'] == selected_conv_display)
+        selected_idx = selected_conv_info['index']
+        selected_conversation = filtered_df.iloc[selected_idx]
+        # Display conversation metadata
+        st.subheader("📋 Conversation Overview")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Type", selected_conversation['type'])
+        with col2:
+            st.metric("Index", selected_idx)
+        with col3:
+            st.metric("Total Turns", len(selected_conversation.get('conversation', [])))
+        with col4:
+            # Count user vs assistant turns
+            roles = [turn.get('role', 'unknown') for turn in selected_conversation.get('conversation', [])]
+            user_turns = roles.count('user')
+            assistant_turns = roles.count('assistant')
+            st.metric("User/Assistant", f"{user_turns}/{assistant_turns}")
+        # Get conversation turns with metrics
+        conv_turns_data = filtered_df_exploded[filtered_df_exploded.index.isin(
+            filtered_df_exploded[filtered_df_exploded.index // len(filtered_df_exploded) * len(filtered_df) +
+                                filtered_df_exploded.index % len(filtered_df) == selected_idx].index
+        )].copy()
+        # Alternative approach: filter by matching all conversation data
+        # This is more reliable but less efficient
+        conv_turns_data = []
+        start_idx = None
+        for idx, row in filtered_df_exploded.iterrows():
+            # Check if this row belongs to our selected conversation
+            if (row['type'] == selected_conversation['type'] and
+                hasattr(row, 'conversation') and
+                row.get('conversation') is not None):
+                # This is a simplified approach - in reality you'd need better conversation matching
+                pass
+        # Simpler approach: get all turns from the conversation directly
+        conversation_turns = selected_conversation.get('conversation', [])
+        if conversation_turns:
+            # Display conversation content
+            st.subheader("💬 Conversation Content")
+            # Show/hide content toggle
+            show_content = st.checkbox("Show conversation content", value=True)
+            if show_content:
+                for i, turn in enumerate(conversation_turns):
+                    role = turn.get('role', 'unknown')
+                    content = turn.get('content', 'No content')
+                    # Style based on role
+                    if role == 'user':
+                        st.markdown(f"**👤 User (Turn {i+1}):**")
+                        st.info(content)
+                    elif role == 'assistant':
+                        st.markdown(f"**🤖 Assistant (Turn {i+1}):**")
+                        st.success(content)
+                    else:
+                        st.markdown(f"**❓ {role.title()} (Turn {i+1}):**")
+                        st.warning(content)
+            # Display turn-level metrics if available
+            st.subheader("📊 Turn-Level Metrics")
+            if selected_metrics:
+                # Get actual turn-level data for this conversation
+                # Find matching turns in the exploded dataframe
+                conv_turn_metrics = []
+                # Simple approach: try to match turns by content or position
+                # This is a best-effort approach since exact matching is complex
+                turn_metric_columns = [f"turn.turn_metrics.{m}" for m in selected_metrics]
+                available_columns = [col for col in turn_metric_columns if col in filtered_df_exploded.columns]
+                if available_columns:
+                    # Try to get metrics for turns from this conversation type
+                    type_turns = filtered_df_exploded[filtered_df_exploded['type'] == selected_conversation['type']]
+                    # Take a sample of turns for this conversation type (since exact matching is complex)
+                    sample_size = min(len(conversation_turns), len(type_turns))
+                    if sample_size > 0:
+                        sample_turns = type_turns.head(sample_size)
+                        # Create metrics table
+                        metrics_display_data = []
+                        for i, (_, turn_row) in enumerate(sample_turns.iterrows()):
+                            if i < len(conversation_turns):
+                                turn_data = {
+                                    'Turn': i + 1,
+                                    'Role': conversation_turns[i].get('role', 'unknown')
+                                }
+                                # Add actual metric values
+                                for col in available_columns:
+                                    metric_name = col.replace('turn.turn_metrics.', '')
+                                    friendly_name = get_human_friendly_metric_name(metric_name)
+                                    value = turn_row.get(col, 'N/A')
+                                    if pd.notna(value) and isinstance(value, (int, float)):
+                                        turn_data[friendly_name] = round(value, 3)
+                                    else:
+                                        turn_data[friendly_name] = 'N/A'
+                                metrics_display_data.append(turn_data)
+                        if metrics_display_data:
+                            metrics_df = pd.DataFrame(metrics_display_data)
+                            st.dataframe(metrics_df, use_container_width=True)
+                            # Plot metrics over turns with real data
+                            st.subheader("📈 Metrics Over Turns")
+                            fig = go.Figure()
+                            # Add traces for each selected metric (real data)
+                            for col in available_columns[:5]:  # Limit to first 5 for readability
+                                metric_name = col.replace('turn.turn_metrics.', '')
+                                friendly_name = get_human_friendly_metric_name(metric_name)
+                                # Get values for this metric
+                                y_values = []
+                                for _, turn_row in sample_turns.iterrows():
+                                    value = turn_row.get(col, None)
+                                    if pd.notna(value) and isinstance(value, (int, float)):
+                                        y_values.append(value)
+                                    else:
+                                        y_values.append(None)
+                                if any(v is not None for v in y_values):
+                                    fig.add_trace(go.Scatter(
+                                        x=list(range(1, len(y_values) + 1)),
+                                        y=y_values,
+                                        mode='lines+markers',
+                                        name=friendly_name,
+                                        line=dict(width=2),
+                                        marker=dict(size=8),
+                                        connectgaps=False
+                                    ))
+                            if fig.data:  # Only show if we have data
+                                fig.update_layout(
+                                    title="Complexity Metrics Across Conversation Turns",
+                                    xaxis_title="Turn Number",
+                                    yaxis_title="Metric Value",
+                                    height=400,
+                                    hovermode='x unified'
+                                )
+                                st.plotly_chart(fig, use_container_width=True)
+                            else:
+                                st.info("No numeric metric data available to plot for this conversation type.")
+                        else:
+                            st.info("No matching turn-level metrics found for this conversation.")
+                    else:
+                        st.info("No turn-level data available for this conversation type.")
+                else:
+                    st.warning("No turn-level metrics available in the dataset for the selected metrics.")
+                # Show raw turn content with role highlighting
+                with st.expander("🔍 Detailed Turn Analysis", expanded=False):
+                    for i, turn in enumerate(conversation_turns):
+                        role = turn.get('role', 'unknown')
+                        content = turn.get('content', 'No content')
+                        st.markdown(f"**Turn {i+1} ({role}):**")
+                        st.text_area(
+                            f"Content",
+                            content,
+                            height=100,
+                            key=f"turn_content_{i}",
+                            disabled=True
+                        )
+                        # Show turn statistics
+                        st.caption(f"Characters: {len(content)} | Words: {len(content.split())} | Role: {role}")
+                        st.divider()
+            else:
+                st.warning("Select some metrics to see turn-level analysis.")
+        else:
+            st.warning("No conversation data available for the selected conversation.")
+    with tab5:
         st.header("Detailed View")
         # Data overview