import base64
import io
import json
import random
import dash
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import Input, Output, State, callback, dcc, html
# Initialize the Dash app
app = dash.Dash(__name__, suppress_callback_exceptions=True)
# Define app layout
app.layout = html.Div(
[
# Header
html.Div(
[
html.H1(
"Sessions Observatory by helvia.ai ðŸ”📊",
className="app-header",
),
html.P(
"Upload a CSV/Excel file to visualize the chatbot's dialog topics.",
className="app-description",
),
],
className="header-container",
),
# File Upload Component
html.Div(
[
dcc.Upload(
id="upload-data",
children=html.Div(
[
html.Div("Drag and Drop", className="upload-text"),
html.Div("or", className="upload-divider"),
html.Div(
html.Button("Select a File", className="upload-button")
),
],
className="upload-content",
),
style={
"width": "100%",
"height": "120px",
"lineHeight": "60px",
"borderWidth": "1px",
"borderStyle": "dashed",
"borderRadius": "0.5rem",
"textAlign": "center",
"margin": "10px 0",
"backgroundColor": "hsl(210, 40%, 98%)",
"borderColor": "hsl(214.3, 31.8%, 91.4%)",
"cursor": "pointer",
},
multiple=False,
),
# Status message with more padding and emphasis
html.Div(
id="upload-status",
className="upload-status-message",
style={"display": "none"}, # Initially hidden
),
],
className="upload-container",
),
# Main Content Area (hidden until file is uploaded)
html.Div(
[
# Dashboard layout with flexible grid
html.Div(
[
# Left side: Bubble chart
html.Div(
[
html.H3(
id="topic-distribution-header",
children="Sessions Observatory",
className="section-header",
),
# dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
dcc.Graph(
id="bubble-chart",
style={"height": "calc(100% - 154px)"},
), # this does not work for some reason
html.Div(
[
# Only keep Color by
html.Div(
[
html.Div(
html.Label(
"Color by:",
className="control-label",
),
className="control-label-container",
),
],
className="control-labels-row",
),
# Only keep Color by options
html.Div(
[
html.Div(
dcc.RadioItems(
id="color-metric",
options=[
{
"label": "Sentiment",
"value": "negative_rate",
},
{
"label": "Resolution",
"value": "unresolved_rate",
},
{
"label": "Urgency",
"value": "urgent_rate",
},
],
value="negative_rate",
inline=True,
className="radio-group",
inputClassName="radio-input",
labelClassName="radio-label",
),
className="radio-container",
),
],
className="control-options-row",
),
],
className="chart-controls",
),
],
className="chart-container",
),
# Right side: Interactive sidebar with topic details
html.Div(
[
html.Div(
[
html.H3(
"Topic Details", className="section-header"
),
html.Div(
id="topic-title", className="topic-title"
),
html.Div(
[
html.Div(
[
html.H4(
"Metadata",
className="subsection-header",
),
html.Div(
id="topic-metadata",
className="metadata-container",
),
],
className="metadata-section",
),
html.Div(
[
html.H4(
"Key Metrics",
className="subsection-header",
),
html.Div(
id="topic-metrics",
className="metrics-container",
),
],
className="metrics-section",
),
# Added Root Causes section
html.Div(
[
html.H4(
[
"Root Causes",
html.I(
className="fas fa-info-circle",
title="Root cause detection is experimental and may require manual review since it is generated by AI models. Root causes are only shown in clusters with identifiable root causes.",
# Added title for info icon
style={
"marginLeft": "0.2rem",
"color": "#6c757d", # General gray
"fontSize": "0.9rem",
"cursor": "pointer",
"verticalAlign": "middle",
},
),
],
className="subsection-header",
),
html.Div(
id="root-causes",
className="root-causes-container",
),
],
id="root-causes-section",
style={
"display": "none"
}, # Initially hidden
),
# Added Tags section
html.Div(
[
html.H4(
"Tags",
className="subsection-header",
),
html.Div(
id="important-tags",
className="tags-container",
),
],
id="tags-section",
style={
"display": "none"
}, # Initially hidden
),
],
className="details-section",
),
html.Div(
[
html.Div(
[
html.H4(
[
"Sample Dialogs (Summary)",
html.Button(
html.I(
className="fas fa-sync-alt"
),
id="refresh-dialogs-btn",
className="refresh-button",
title="Refresh dialogs",
n_clicks=0,
),
],
className="subsection-header",
style={
"margin": "0",
"display": "flex",
"alignItems": "center",
},
),
],
),
html.Div(
id="sample-dialogs",
className="sample-dialogs-container",
),
],
className="samples-section",
),
],
className="topic-details-content",
),
html.Div(
id="no-topic-selected",
children=[
html.Div(
[
html.I(
className="fas fa-info-circle info-icon"
),
html.H3("No topic selected"),
html.P(
"Click or hover on a bubble to view topic details."
),
],
className="no-selection-message",
)
],
className="no-selection-container",
),
],
className="sidebar-container",
),
],
className="dashboard-container",
)
],
id="main-content",
style={"display": "none"},
),
# Conversation Modal
html.Div(
id="conversation-modal",
children=[
html.Div(
children=[
html.Div(
[
html.H3(
"Full Conversation",
style={"margin": "0", "flex": "1"},
),
html.Button(
html.I(className="fas fa-times"),
id="close-modal-btn",
className="close-modal-btn",
title="Close",
),
],
className="modal-header",
),
html.Div(
id="conversation-subheader",
className="conversation-subheader",
),
html.Div(
id="conversation-content", className="conversation-content"
),
],
className="modal-content",
),
],
className="modal-overlay-conversation",
style={"display": "none"},
),
# Dialogs Table Modal
html.Div(
id="dialogs-table-modal",
children=[
html.Div(
children=[
html.Div(
[
html.H3(
id="dialogs-modal-title",
style={"margin": "0", "flex": "1"},
),
html.Button(
html.I(className="fas fa-times"),
id="close-dialogs-modal-btn",
className="close-modal-btn",
title="Close",
),
],
className="modal-header",
),
html.Div(
id="dialogs-table-content",
className="dialogs-table-content",
),
],
className="modal-content-large",
),
],
className="modal-overlay",
style={"display": "none"},
),
# Root Cause Dialogs Modal
html.Div(
id="root-cause-modal",
children=[
html.Div(
children=[
html.Div(
[
html.H3(
id="root-cause-modal-title",
style={"margin": "0", "flex": "1"},
),
html.Button(
html.I(className="fas fa-times"),
id="close-root-cause-modal-btn",
className="close-modal-btn",
title="Close",
),
],
className="modal-header",
),
html.Div(
id="root-cause-table-content",
className="dialogs-table-content",
),
],
className="modal-content-large",
),
],
className="modal-overlay",
style={"display": "none"},
),
# Store the processed data
dcc.Store(id="stored-data"),
# Store the current selected topic for dialogs modal
dcc.Store(id="selected-topic-store"),
# Store the current selected root cause for root cause modal
dcc.Store(id="selected-root-cause-store"),
],
className="app-container",
)
# Define CSS for the app
app.index_string = """
{%metas%}
Sessions Observatory by helvia.ai ðŸ”📊
{%favicon%}
{%css%}
{%app_entry%}
"""
@callback(
Output("topic-distribution-header", "children"),
Input("stored-data", "data"),
)
def update_topic_distribution_header(data):
if not data:
return "Sessions Observatory" # Default when no data is available
df = pd.DataFrame(data)
total_dialogs = df["count"].sum() # Sum up the 'count' column
return f"Sessions Observatory ({total_dialogs} dialogs)"
# Define callback to process uploaded file
@callback(
[
Output("stored-data", "data"),
Output("upload-status", "children"),
Output("upload-status", "style"), # Add style output for visibility
Output("main-content", "style"),
],
[Input("upload-data", "contents")],
[State("upload-data", "filename")],
)
def process_upload(contents, filename):
if contents is None:
return None, "", {"display": "none"}, {"display": "none"} # Keep hidden
try:
# Parse uploaded file
content_type, content_string = contents.split(",")
decoded = base64.b64decode(content_string)
if "csv" in filename.lower():
df = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
elif "xls" in filename.lower():
df = pd.read_excel(io.BytesIO(decoded))
# DEBUG
# --- Print unique root_cause_subcluster values for each deduplicated_topic_name ---
if (
"deduplicated_topic_name" in df.columns
and "root_cause_subcluster" in df.columns
):
print(
"\n[INFO] Unique root_cause_subcluster values for each deduplicated_topic_name:"
)
for topic in df["deduplicated_topic_name"].unique():
subclusters = (
df[df["deduplicated_topic_name"] == topic]["root_cause_subcluster"]
.dropna()
.unique()
)
print(f"- {topic}:")
for sub in subclusters:
print(f" - {sub}")
print()
# --- End of DEBUG ---
# Hardcoded flag to exclude 'Unclustered' topics
EXCLUDE_UNCLUSTERED = True
if EXCLUDE_UNCLUSTERED and "deduplicated_topic_name" in df.columns:
df = df[df["deduplicated_topic_name"] != "Unclustered"].copy()
# If we strip leading and trailing `"` or `'` from the topic name here, then
# we will have a problem with the deduplicated names, as they will not match the
# original topic names in the dataset.
# Better do it in the first script.
else:
return (
None,
html.Div(
[
html.I(
className="fas fa-exclamation-circle",
style={"color": "var(--destructive)", "marginRight": "8px"},
),
"Please upload a CSV or Excel file.",
],
style={"color": "var(--destructive)"},
),
{"display": "block"}, # Make visible after error
{"display": "none"},
)
# Process the dataframe to get topic statistics
topic_stats = analyze_topics(df)
return (
topic_stats.to_dict("records"),
html.Div(
[
html.I(
className="fas fa-check-circle",
style={
"color": "hsl(142.1, 76.2%, 36.3%)",
"marginRight": "8px",
},
),
f'Successfully uploaded "{filename}"',
],
style={"color": "hsl(142.1, 76.2%, 36.3%)"},
),
{"display": "block"}, # maybe add the above line here too #TODO
{
"display": "block",
"height": "calc(100vh - 40px)",
}, # Make visible after successful upload
)
except Exception as e:
return (
None,
html.Div(
[
html.I(
className="fas fa-exclamation-triangle",
style={"color": "var(--destructive)", "marginRight": "8px"},
),
f"Error processing file: {str(e)}",
],
style={"color": "var(--destructive)"},
),
{"display": "block"}, # Make visible after error
{"display": "none"},
)
# Function to analyze the topics and create statistics
def analyze_topics(df):
# Group by topic name and calculate metrics
topic_stats = (
# IMPORTANT!
# As deduplicated_topic_name, we have either the deduplicated names (if enabled by the process),
# either the kmeans_reclustered name (where available) and the ClusterNames.
df.groupby("deduplicated_topic_name")
.agg(
count=("id", "count"),
negative_count=("Sentiment", lambda x: (x == "negative").sum()),
unresolved_count=("Resolution", lambda x: (x == "unresolved").sum()),
urgent_count=("Urgency", lambda x: (x == "urgent").sum()),
)
.reset_index()
)
# Calculate rates
topic_stats["negative_rate"] = (
topic_stats["negative_count"] / topic_stats["count"] * 100
).round(1)
topic_stats["unresolved_rate"] = (
topic_stats["unresolved_count"] / topic_stats["count"] * 100
).round(1)
topic_stats["urgent_rate"] = (
topic_stats["urgent_count"] / topic_stats["count"] * 100
).round(1)
# Apply binned layout
topic_stats = apply_binned_layout(topic_stats)
return topic_stats
# New binned layout function
def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
"""
Apply a binned layout where bubbles are grouped into rows based on dialog count.
Bubbles in each row will be centered horizontally.
Args:
df: DataFrame containing the topic data
padding: Padding from edges as percentage
bin_config: List of tuples defining bin ranges and descriptions.
Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
max_items_per_row: Maximum number of items to display in a single row
Returns:
DataFrame with updated x, y positions
"""
# Create a copy of the dataframe to avoid modifying the original
df_sorted = df.copy()
# Default bin configuration if none is provided
# 8 rows x 6 bubbles is usually good
if bin_config is None:
bin_config = [
(100, None, "100+ dialogs"),
(50, 99, "50-99 dialogs"),
(25, 49, "25-49 dialogs"),
(9, 24, "9-24 dialogs"),
(7, 8, "7-8 dialogs"),
(5, 7, "5-6 dialogs"),
(4, 4, "4 dialogs"),
(0, 3, "0-3 dialogs"),
]
# Generate bin descriptions and conditions dynamically
bin_descriptions = {}
conditions = []
bin_values = []
for i, (lower, upper, description) in enumerate(bin_config):
bin_name = f"Bin {i + 1}"
bin_descriptions[bin_name] = description
bin_values.append(bin_name)
if upper is None: # No upper limit
conditions.append(df_sorted["count"] >= lower)
else:
conditions.append(
(df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
)
# Apply the conditions to create the bin column
df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)
# Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
# Now split bins that have more than max_items_per_row items
original_bins = df_sorted["bin"].unique()
new_rows = []
new_bin_descriptions = bin_descriptions.copy()
for bin_name in original_bins:
bin_mask = df_sorted["bin"] == bin_name
bin_group = df_sorted[bin_mask]
bin_size = len(bin_group)
# If bin has more items than max_items_per_row, split it
if bin_size > max_items_per_row:
# Calculate how many sub-bins we need
num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row
# Calculate items per sub-bin (distribute evenly)
items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins
# Distribute the remainder one by one to achieve balance
remainder = bin_size % num_sub_bins
for i in range(remainder):
items_per_sub_bin[i] += 1
# Original bin description
original_description = bin_descriptions[bin_name]
# Create new row entries and update bin assignments
start_idx = 0
for i in range(num_sub_bins):
# Create new bin name with sub-bin index
new_bin_name = f"{bin_name}_{i + 1}"
# Create new bin description with sub-bin index
new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
new_bin_descriptions[new_bin_name] = new_description
# Get slice of dataframe for this sub-bin
end_idx = start_idx + items_per_sub_bin[i]
sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()
# Update bin name and description
sub_bin_rows["bin"] = new_bin_name
sub_bin_rows["bin_description"] = new_description
# Add to new rows
new_rows.append(sub_bin_rows)
# Update start index for next iteration
start_idx = end_idx
# Remove the original bin from df_sorted
df_sorted = df_sorted[~bin_mask]
# Combine the original dataframe (with small bins) and the new split bins
if new_rows:
df_sorted = pd.concat([df_sorted] + new_rows)
# Re-sort with the new bin names
df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])
# Calculate the vertical positions for each row (bin)
bins_with_topics = sorted(df_sorted["bin"].unique())
num_rows = len(bins_with_topics)
available_height = 100 - (2 * padding)
row_height = available_height / num_rows
# Calculate and assign y-positions (vertical positions)
row_positions = {}
for i, bin_name in enumerate(bins_with_topics):
# Calculate row position (centered within its allocated space)
row_pos = padding + i * row_height + (row_height / 2)
row_positions[bin_name] = row_pos
df_sorted["y"] = df_sorted["bin"].map(row_positions)
# Center the bubbles in each row horizontally
center_point = 50 # Middle of the chart (0-100 scale)
for bin_name in bins_with_topics:
# Get topics in this bin
bin_mask = df_sorted["bin"] == bin_name
num_topics_in_bin = bin_mask.sum()
if num_topics_in_bin == 1:
# If there's only one bubble, place it in the center
df_sorted.loc[bin_mask, "x"] = center_point
else:
if num_topics_in_bin < max_items_per_row:
# For fewer bubbles, add a little bit of spacing between them
# Calculate the total width needed
total_width = (num_topics_in_bin - 1) * 17.5 # 10 units between bubbles
# Calculate starting position (to center the group)
start_pos = center_point - (total_width / 2)
# Assign positions
positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
df_sorted.loc[bin_mask, "x"] = positions
else:
# For multiple bubbles, distribute them evenly around the center
# Calculate the total width needed
total_width = (num_topics_in_bin - 1) * 15 # 15 units between bubbles
# Calculate starting position (to center the group)
start_pos = center_point - (total_width / 2)
# Assign positions
positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
df_sorted.loc[bin_mask, "x"] = positions
# Add original rank for reference
df_sorted["size_rank"] = range(1, len(df_sorted) + 1)
return df_sorted
# New function to update positions based on selected size metric
def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
# For the main chart, we always use the binned layout
return apply_binned_layout(df)
# Callback to update the bubble chart
@callback(
Output("bubble-chart", "figure"),
[
Input("stored-data", "data"),
Input("color-metric", "value"),
],
)
def update_bubble_chart(data, color_metric):
if not data:
return go.Figure()
df = pd.DataFrame(data)
# Update positions using binned layout
df = update_bubble_positions(df)
# Always use count for sizing
size_values = df["count"]
raw_sizes = df["count"]
size_title = "Dialog Count"
# Apply log scaling to the size values for better visualization
# To make the smallest bubble bigger, increase the min_size value (currently 2.5).
min_size = 1 # Minimum bubble size
if size_values.max() > size_values.min():
# Log-scale the sizes
log_sizes = np.log1p(size_values)
# Scale to a reasonable range for visualization
# To make the biggest bubble smaller, reduce the multiplier (currently 50).
size_values = (
min_size
+ (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
)
else:
# If all values are the same, use a default size
size_values = np.ones(len(df)) * 12.5
# DEBUG: Print sizes of bubbles in the first and second bins
bins = sorted(df["bin"].unique())
if len(bins) >= 1:
# first_bin = bins[0]
# print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
# first_bin_df = df[df["bin"] == first_bin]
# for idx, row in first_bin_df.iterrows():
# print(
# f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
# )
pass
if len(bins) >= 2:
# second_bin = bins[1]
# print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
# second_bin_df = df[df["bin"] == second_bin]
# for idx, row in second_bin_df.iterrows():
# print(
# f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
# )
pass
# Determine color based on selected metric
if color_metric == "negative_rate":
color_values = df["negative_rate"]
# color_title = "Negative Sentiment (%)"
color_title = "Negativity (%)"
# color_scale = "RdBu" # no ice, RdBu - og is Reds - matter is good too
# color_scale = "Portland"
# color_scale = "RdYlGn_r"
# color_scale = "Teal"
color_scale = "Teal"
elif color_metric == "unresolved_rate":
color_values = df["unresolved_rate"]
color_title = "Unresolved (%)"
# color_scale = "Burg" # og is YlOrRd
# color_scale = "Temps"
# color_scale = "Armyrose"
# color_scale = "YlOrRd"
color_scale = "Teal"
else:
color_values = df["urgent_rate"]
color_title = "Urgency (%)"
# color_scale = "Magenta" # og is Blues
# color_scale = "Tealrose"
# color_scale = "Portland"
color_scale = "Teal"
# Create enhanced hover text that includes bin information
hover_text = [
f"Topic: {topic} {size_title}: {raw:.1f} {color_title}: {color:.1f} Group: {bin_desc}"
for topic, raw, color, bin_desc in zip(
df["deduplicated_topic_name"],
raw_sizes,
color_values,
df["bin_description"],
)
]
# Create bubble chart
fig = px.scatter(
df,
x="x",
y="y",
size=size_values,
color=color_values,
# text="deduplicated_topic_name", # Remove text here
hover_name="deduplicated_topic_name",
hover_data={
"x": False,
"y": False,
"bin_description": True,
},
size_max=42.5, # Maximum size of the bubbles, change this to adjust the size
color_continuous_scale=color_scale,
custom_data=[
"deduplicated_topic_name",
"count",
"negative_rate",
"unresolved_rate",
"urgent_rate",
"bin_description",
],
)
# Update traces: Remove text related properties
fig.update_traces(
mode="markers", # Remove '+text'
marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
hovertemplate="%{hovertext}",
hovertext=hover_text,
)
# Create annotations for the bubbles
annotations = []
for i, row in df.iterrows():
# Wrap text every 2 words
words = row["deduplicated_topic_name"].split()
wrapped_text = " ".join(
[" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
)
# Calculate size for vertical offset (approximately based on the bubble size)
# Add vertical offset based on bubble size to place text below the bubble
marker_size = (
size_values[i] / 20 # type: ignore # FIXME: size_values[df.index.get_loc(i)] / 20
) # Adjust this divisor as needed to get proper spacing
annotations.append(
dict(
x=row["x"],
y=row["y"]
+ 0.125 # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
+ marker_size, # Add vertical offset to position text below the bubble
text=wrapped_text,
showarrow=False,
textangle=0,
font=dict(
# size=10,
# size=15,
size=9,
color="var(--foreground)",
family="Arial, sans-serif",
weight="bold",
),
xanchor="center",
yanchor="top", # Anchor to top of text box so it hangs below the bubble
bgcolor="rgba(255,255,255,0.7)", # Add semi-transparent background for better readability
bordercolor="rgba(0,0,0,0.1)", # Add a subtle border color
borderwidth=1,
borderpad=1,
# TODO: Radius for rounded corners
)
)
# Add bin labels and separator lines
unique_bins = sorted(df["bin"].unique())
bin_y_positions = [
df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
]
# Dynamically extract bin descriptions
bin_descriptions = df.set_index("bin")["bin_description"].to_dict()
for bin_name, bin_y in zip(unique_bins, bin_y_positions):
# Add horizontal line
fig.add_shape(
type="line",
x0=0,
y0=bin_y,
x1=100,
y1=bin_y,
line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
layer="below",
)
# Add subtle lines for each bin and bin labels
for bin_name, bin_y in zip(unique_bins, bin_y_positions):
# Add horizontal line
fig.add_shape(
type="line",
x0=0,
y0=bin_y,
x1=100,
y1=bin_y,
line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
layer="below",
)
# Add bin label annotation
annotations.append(
dict(
x=0, # Position the label on the left side
y=bin_y,
xref="x",
yref="y",
text=bin_descriptions[bin_name],
showarrow=False,
font=dict(size=8.25, color="var(--muted-foreground)"),
align="left",
xanchor="left",
yanchor="middle",
bgcolor="rgba(255,255,255,0.7)",
borderpad=1,
)
)
fig.update_layout(
title=None,
xaxis=dict(
showgrid=False,
zeroline=False,
showticklabels=False,
title=None,
range=[0, 100],
),
yaxis=dict(
showgrid=False,
zeroline=False,
showticklabels=False,
title=None,
range=[0, 100],
autorange="reversed", # Keep largest at top
),
hovermode="closest",
margin=dict(l=0, r=0, t=10, b=10),
coloraxis_colorbar=dict(
title=color_title,
title_font=dict(size=9),
tickfont=dict(size=8),
thickness=10,
len=0.6,
yanchor="middle",
y=0.5,
xpad=0,
),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
annotations=annotations, # Add bin labels as annotations
)
return fig
# Update the update_topic_details callback to use grayscale colors for tags based on frequency
@callback(
[
Output("topic-title", "children"),
Output("topic-metadata", "children"),
Output("topic-metrics", "children"),
Output("root-causes", "children"),
Output("root-causes-section", "style"),
Output("important-tags", "children"),
Output("tags-section", "style"),
Output("sample-dialogs", "children"),
Output("no-topic-selected", "style"),
Output("selected-topic-store", "data"),
],
[
Input("bubble-chart", "hoverData"),
Input("bubble-chart", "clickData"),
Input("refresh-dialogs-btn", "n_clicks"),
],
[State("stored-data", "data"), State("upload-data", "contents")],
)
def update_topic_details(
hover_data, click_data, refresh_clicks, stored_data, file_contents
):
# Determine which data to use (prioritize click over hover)
hover_info = hover_data or click_data
if not hover_info or not stored_data or not file_contents:
return (
"",
[],
[],
"",
{"display": "none"},
"",
{"display": "none"},
[],
{"display": "flex"},
None,
)
# Extract topic name from the hover data
topic_name = hover_info["points"][0]["customdata"][0]
# Get stored data for this topic
df_stored = pd.DataFrame(stored_data)
topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]
# Get original data to sample conversations
content_type, content_string = file_contents.split(",")
decoded = base64.b64decode(content_string)
if (
content_type
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
):
df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
else: # Assume CSV
df_full = pd.read_csv(
io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
)
# Filter to this topic
topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
# Create the title
title = html.Div([html.Span(topic_name)])
# Create metadata items
metadata_items = [
html.Div(
[
html.I(className="fas fa-comments metadata-icon"),
html.Span(f"{int(topic_data['count'])} dialogs"),
html.Button(
[
html.I(
className="fas fa-table", style={"marginRight": "0.25rem"}
),
"Show all dialogs inside",
],
id="show-all-dialogs-btn",
className="show-dialogs-btn",
n_clicks=0,
),
],
className="metadata-item",
style={"display": "flex", "alignItems": "center", "width": "100%"},
),
]
# Create metrics boxes
metrics_boxes = [
html.Div(
[
html.Div(f"{topic_data['negative_rate']}%", className="metric-value"),
html.Div("Negative Sentiment", className="metric-label"),
],
className="metric-box negative",
),
html.Div(
[
html.Div(f"{topic_data['unresolved_rate']}%", className="metric-value"),
html.Div("Unresolved", className="metric-label"),
],
className="metric-box unresolved",
),
html.Div(
[
html.Div(f"{topic_data['urgent_rate']}%", className="metric-value"),
html.Div("Urgent", className="metric-label"),
],
className="metric-box urgent",
),
]
# Extract and process root causes
root_causes_output = ""
root_causes_section_style = {"display": "none"}
# Check if root_cause_subcluster column exists in the data
if "root_cause_subcluster" in topic_conversations.columns:
# Get unique root causes for this specific cluster
root_causes = topic_conversations["root_cause_subcluster"].dropna().unique()
# Filter out common non-informative values including "Unclustered"
filtered_root_causes = [
rc
for rc in root_causes
if rc
not in [
"Sub-clustering disabled",
"Not eligible for sub-clustering",
"No valid root causes",
"No Subcluster",
"Unclustered",
"",
]
]
# Debug: Print the unique root causes for this cluster
print(f"\n[DEBUG] Root causes for cluster '{topic_name}':")
print(f" All root causes: {list(root_causes)}")
print(f" Filtered root causes: {filtered_root_causes}")
if filtered_root_causes:
# Create beautifully styled root cause tags with clickable icons
root_causes_output = html.Div(
[
html.Div(
[
html.I(
className="fas fa-exclamation-triangle root-cause-tag-icon"
),
html.Span(root_cause, style={"marginRight": "6px"}),
html.I(
className="fas fa-external-link-alt root-cause-click-icon",
id={"type": "root-cause-icon", "index": root_cause},
title="Click to see specific chats assigned with this root cause.",
style={
"cursor": "pointer",
"fontSize": "0.55rem",
"opacity": "0.8",
},
),
],
className="root-cause-tag",
style={"display": "inline-flex", "alignItems": "center"},
)
for root_cause in filtered_root_causes
],
className="root-causes-container",
)
root_causes_section_style = {"display": "block"}
# Extract and process consolidated_tags with improved styling
tags_list = []
for _, row in topic_conversations.iterrows():
tags_str = row.get("consolidated_tags", "")
if pd.notna(tags_str):
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
tags_list.extend(tags)
# Count tag frequencies for better insight
tag_counts = {}
for tag in tags_list:
tag_counts[tag] = tag_counts.get(tag, 0) + 1
# Sort by frequency (most common first) and then alphabetically for ties
sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))
# Keep only the top K tags
TOP_K = 15
sorted_tags = sorted_tags[:TOP_K]
# Set tags section visibility and output
tags_section_style = {"display": "none"}
if sorted_tags:
# Create beautifully styled tags with count indicators and consistent color
tags_output = html.Div(
[
html.Div(
[
html.I(className="fas fa-tag topic-tag-icon"),
html.Span(f"{tag} ({count})"),
],
className="topic-tag",
)
for tag, count in sorted_tags
],
className="tags-container",
)
tags_section_style = {"display": "block"}
else:
tags_output = html.Div(
[
html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
"No tags found for this topic",
],
className="no-tags-message",
)
# Sample up to 5 random dialogs
sample_size = min(5, len(topic_conversations))
if sample_size > 0:
sample_indices = random.sample(range(len(topic_conversations)), sample_size)
samples = topic_conversations.iloc[sample_indices]
dialog_items = []
for _, row in samples.iterrows():
# Create dialog item with tags
sentiment_tag = html.Span(
row["Sentiment"], className="dialog-tag tag-sentiment"
)
resolution_tag = html.Span(
row["Resolution"], className="dialog-tag tag-resolution"
)
urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")
# Add Chat ID tag if 'id' column exists
chat_id_tag = None
if "id" in row:
chat_id_tag = html.Span(
[
f"Chat ID: {row['id']} ",
html.I(
className="fas fa-arrow-up-right-from-square conversation-icon",
id={"type": "conversation-icon", "index": row["id"]},
title="View full conversation",
style={"marginLeft": "0.25rem"},
),
],
className="dialog-tag tag-chat-id",
style={"display": "inline-flex", "alignItems": "center"},
)
# Add Root Cause tag if 'Root Cause' column exists
root_cause_tag = None
if (
"Root_Cause" in row
and pd.notna(row["Root_Cause"])
and row["Root_Cause"] != "na"
):
root_cause_tag = html.Span(
f"Root Cause: {row['Root_Cause']}",
className="dialog-tag tag-root-cause",
)
# Compile all tags, including the new Chat ID and Root Cause tags if available
tags = [sentiment_tag, resolution_tag, urgency_tag]
if chat_id_tag:
tags.append(chat_id_tag)
if root_cause_tag:
tags.append(root_cause_tag)
dialog_items.append(
html.Div(
[
html.Div(row["Summary"], className="dialog-summary"),
html.Div(
tags,
className="dialog-metadata",
),
],
className="dialog-item",
)
)
sample_dialogs = dialog_items
else:
sample_dialogs = [
html.Div(
"No sample dialogs available for this topic.",
style={"color": "var(--muted-foreground)"},
)
]
return (
title,
metadata_items,
metrics_boxes,
root_causes_output,
root_causes_section_style,
tags_output,
tags_section_style,
sample_dialogs,
{"display": "none"},
{"topic_name": topic_name, "file_contents": file_contents},
)
# Callback to open modal when conversation icon is clicked
@callback(
[
Output("conversation-modal", "style"),
Output("conversation-content", "children"),
Output("conversation-subheader", "children"),
],
[Input({"type": "conversation-icon", "index": dash.dependencies.ALL}, "n_clicks")],
[State("upload-data", "contents")],
prevent_initial_call=True,
)
def open_conversation_modal(n_clicks_list, file_contents):
# Check if any icon was clicked
if not any(n_clicks_list) or not file_contents:
return {"display": "none"}, "", ""
# Get which icon was clicked
ctx = dash.callback_context
if not ctx.triggered:
return (
{"display": "none"},
"",
"",
) # Extract the chat ID from the triggered input
triggered_id = ctx.triggered[0]["prop_id"]
chat_id = json.loads(triggered_id.split(".")[0])["index"]
# Get the full conversation from the uploaded file
content_type, content_string = file_contents.split(",")
decoded = base64.b64decode(content_string)
if (
content_type
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
):
df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
else: # Assume CSV
df_full = pd.read_csv(
io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
)
# Find the conversation with this chat ID
conversation_row = df_full[df_full["id"] == chat_id]
if len(conversation_row) == 0:
conversation_text = "Conversation not found."
subheader_content = f"Chat ID: {chat_id}"
else:
row = conversation_row.iloc[0]
conversation_text = row.get("conversation", "No conversation data available.")
# Get cluster name if available
cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
# Create subheader with both Chat ID and cluster name
subheader_content = html.Div(
[
html.Span(
f"Chat ID: {chat_id}",
style={"fontWeight": "600", "marginRight": "1rem"},
),
html.Span(
f"Cluster: {cluster_name}",
style={"color": "hsl(215.4, 16.3%, 46.9%)"},
),
]
)
return {"display": "flex"}, conversation_text, subheader_content
# Callback to close modal
@callback(
Output("conversation-modal", "style", allow_duplicate=True),
[Input("close-modal-btn", "n_clicks")],
prevent_initial_call=True,
)
def close_conversation_modal(n_clicks):
if n_clicks:
return {"display": "none"}
return {"display": "none"}
# Callback to open dialogs table modal when "Show all dialogs inside" button is clicked
@callback(
[
Output("dialogs-table-modal", "style"),
Output("dialogs-modal-title", "children"),
Output("dialogs-table-content", "children"),
],
[Input("show-all-dialogs-btn", "n_clicks")],
[State("selected-topic-store", "data")],
prevent_initial_call=True,
)
def open_dialogs_table_modal(n_clicks, selected_topic_data):
if not n_clicks or not selected_topic_data:
return {"display": "none"}, "", ""
topic_name = selected_topic_data["topic_name"]
file_contents = selected_topic_data["file_contents"]
# Get the full data
content_type, content_string = file_contents.split(",")
decoded = base64.b64decode(content_string)
if (
content_type
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
):
df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
else: # Assume CSV
df_full = pd.read_csv(
io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
)
# Filter to this topic
topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]
# Create the table
table_rows = []
# Header row
table_rows.append(
html.Tr(
[
html.Th("Chat ID"),
html.Th("Summary"),
html.Th("Root Cause"),
html.Th("Sentiment"),
html.Th("Resolution"),
html.Th("Urgency"),
html.Th("Tags"),
html.Th("Action"),
]
)
)
# Data rows
for _, row in topic_conversations.iterrows():
# Process tags
tags_str = row.get("consolidated_tags", "")
if pd.notna(tags_str):
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
tags_display = html.Div(
[
html.Span(
tag,
className="dialog-tag-small",
style={"backgroundColor": "#6c757d", "color": "white"},
)
for tag in tags[:3] # Show only first 3 tags
]
+ (
[
html.Span(
f"+{len(tags) - 3}",
className="dialog-tag-small",
style={"backgroundColor": "#6c757d", "color": "white"},
)
]
if len(tags) > 3
else []
),
className="dialog-tags-cell",
)
else:
tags_display = html.Span(
"No tags",
style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
)
table_rows.append(
html.Tr(
[
html.Td(
row["id"],
style={"fontFamily": "monospace", "fontSize": "0.8rem"},
),
html.Td(
row.get("Summary", "No summary"),
className="dialog-summary-cell",
),
html.Td(
html.Span(
str(row.get("Root_Cause", "Unknown")).capitalize()
if not pd.isna(row.get("Root_Cause"))
else "Unknown",
className="dialog-tag-small",
style={
"backgroundColor": "#8B4513", # Brown color for root cause
"color": "white",
},
)
),
html.Td(
html.Span( # if sentiment is negative, color it red, otherwise grey
row.get("Sentiment", "Unknown").capitalize(),
className="dialog-tag-small",
style={
"backgroundColor": "#dc3545"
if row.get("Sentiment") == "negative"
else "#6c757d",
"color": "white",
},
)
),
html.Td(
html.Span( # if resolution is unresolved, color it red, otherwise grey
row.get("Resolution", "Unknown").capitalize(),
className="dialog-tag-small",
style={
"backgroundColor": "#dc3545"
if row.get("Resolution") == "unresolved"
else "#6c757d",
"color": "white",
},
)
),
html.Td(
html.Span( # if urgency is urgent, color it red, otherwise grey
row.get("Urgency", "Unknown").capitalize(),
className="dialog-tag-small",
style={
"backgroundColor": "#dc3545"
if row.get("Urgency") == "urgent"
else "#6c757d",
"color": "white",
},
)
),
html.Td(tags_display),
html.Td(
html.Button(
[
html.I(
className="fas fa-eye",
style={"marginRight": "0.25rem"},
),
"View chat session",
],
id={"type": "open-chat-btn", "index": row["id"]},
className="open-chat-btn",
n_clicks=0,
)
),
]
)
)
table = html.Table(table_rows, className="dialogs-table")
modal_title = (
f"All dialogs in Topic: {topic_name} ({len(topic_conversations)} dialogs)"
)
return {"display": "flex"}, modal_title, table
# Callback to close dialogs table modal
@callback(
Output("dialogs-table-modal", "style", allow_duplicate=True),
[Input("close-dialogs-modal-btn", "n_clicks")],
prevent_initial_call=True,
)
def close_dialogs_table_modal(n_clicks):
if n_clicks:
return {"display": "none"}
return {"display": "none"}
# Callback to open conversation modal from dialogs table
@callback(
[
Output("conversation-modal", "style", allow_duplicate=True),
Output("conversation-content", "children", allow_duplicate=True),
Output("conversation-subheader", "children", allow_duplicate=True),
],
[Input({"type": "open-chat-btn", "index": dash.dependencies.ALL}, "n_clicks")],
[State("upload-data", "contents")],
prevent_initial_call=True,
)
def open_conversation_from_table(n_clicks_list, file_contents):
# Check if any button was clicked
if not any(n_clicks_list) or not file_contents:
return {"display": "none"}, "", ""
# Get which button was clicked
ctx = dash.callback_context
if not ctx.triggered:
return {"display": "none"}, "", ""
# Extract the chat ID from the triggered input
triggered_id = ctx.triggered[0]["prop_id"]
chat_id = json.loads(triggered_id.split(".")[0])["index"]
# Debug: print the chat_id to understand its type and value
print(f"DEBUG: Looking for chat_id: {chat_id} (type: {type(chat_id)})")
# Get the full conversation from the uploaded file
content_type, content_string = file_contents.split(",")
decoded = base64.b64decode(content_string)
if (
content_type
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
):
df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
else: # Assume CSV
df_full = pd.read_csv(
io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
)
# Debug: print some info about the dataframe
print(f"DEBUG: DataFrame shape: {df_full.shape}")
print(f"DEBUG: Available chat IDs (first 5): {df_full['id'].head().tolist()}")
print(f"DEBUG: Chat ID types in df: {df_full['id'].dtype}")
# Try to match with different data type conversions
conversation_row = df_full[df_full["id"] == chat_id]
# If not found, try converting types
if len(conversation_row) == 0:
# Try converting chat_id to string
conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
# If still not found, try converting df id to int
if len(conversation_row) == 0:
try:
conversation_row = df_full[df_full["id"] == int(chat_id)]
except (ValueError, TypeError):
pass
if len(conversation_row) == 0:
conversation_text = f"Conversation not found for Chat ID: {chat_id}. Available IDs: {df_full['id'].head(10).tolist()}"
subheader_content = f"Chat ID: {chat_id} (Not Found)"
else:
conversation_row = conversation_row.iloc[0]
conversation_text = conversation_row.get(
"conversation",
"No conversation available, oopsie.", # fix here the conversation status
)
# Create subheader with metadata
subheader_content = f"Chat ID: {chat_id} | Topic: {conversation_row.get('deduplicated_topic_name', 'Unknown')} | Sentiment: {conversation_row.get('Sentiment', 'Unknown')} | Resolution: {conversation_row.get('Resolution', 'Unknown')}"
return {"display": "flex"}, conversation_text, subheader_content
# Callback to open root cause modal when root cause icon is clicked
@callback(
[
Output("root-cause-modal", "style"),
Output("root-cause-modal-title", "children"),
Output("root-cause-table-content", "children"),
],
[Input({"type": "root-cause-icon", "index": dash.dependencies.ALL}, "n_clicks")],
[State("selected-topic-store", "data")],
prevent_initial_call=True,
)
def open_root_cause_modal(n_clicks_list, selected_topic_data):
# Check if any icon was clicked
if not any(n_clicks_list) or not selected_topic_data:
return {"display": "none"}, "", ""
# Get which icon was clicked
ctx = dash.callback_context
if not ctx.triggered:
return {"display": "none"}, "", ""
triggered_id = ctx.triggered[0]["prop_id"]
root_cause = json.loads(triggered_id.split(".")[0])["index"]
topic_name = selected_topic_data["topic_name"]
file_contents = selected_topic_data["file_contents"]
# Get the full data
content_type, content_string = file_contents.split(",")
decoded = base64.b64decode(content_string)
if (
content_type
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
):
df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
else: # Assume CSV
df_full = pd.read_csv(
io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
)
# Filter to this topic and root cause
filtered_conversations = df_full[
(df_full["deduplicated_topic_name"] == topic_name)
& (df_full["root_cause_subcluster"] == root_cause)
]
# Create the table
table_rows = []
# Header row
table_rows.append(
html.Tr(
[
html.Th("Chat ID"),
html.Th("Summary"),
html.Th("Sentiment"),
html.Th("Resolution"),
html.Th("Urgency"),
html.Th("Tags"),
html.Th("Action"),
]
)
)
# Data rows
for _, row in filtered_conversations.iterrows():
# Process tags
tags_str = row.get("consolidated_tags", "")
if pd.notna(tags_str):
tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
tags_display = html.Div(
[
html.Span(
tag,
className="dialog-tag-small",
style={"backgroundColor": "#6c757d", "color": "white"},
)
for tag in tags[:3] # Show only first 3 tags
]
+ (
[
html.Span(
f"+{len(tags) - 3}",
className="dialog-tag-small",
style={"backgroundColor": "#6c757d", "color": "white"},
)
]
if len(tags) > 3
else []
),
className="dialog-tags-cell",
)
else:
tags_display = html.Span(
"No tags",
style={"color": "var(--muted-foreground)", "fontStyle": "italic"},
)
table_rows.append(
html.Tr(
[
html.Td(
row["id"],
style={"fontFamily": "monospace", "fontSize": "0.8rem"},
),
html.Td(
row.get("Summary", "No summary"),
className="dialog-summary-cell",
),
html.Td(
html.Span(
row.get("Sentiment", "Unknown").capitalize(),
className="dialog-tag-small",
style={
"backgroundColor": "#dc3545"
if row.get("Sentiment") == "negative"
else "#6c757d",
"color": "white",
},
)
),
html.Td(
html.Span(
row.get("Resolution", "Unknown").capitalize(),
className="dialog-tag-small",
style={
"backgroundColor": "#dc3545"
if row.get("Resolution") == "unresolved"
else "#6c757d",
"color": "white",
},
)
),
html.Td(
html.Span(
row.get("Urgency", "Unknown").capitalize(),
className="dialog-tag-small",
style={
"backgroundColor": "#dc3545"
if row.get("Urgency") == "urgent"
else "#6c757d",
"color": "white",
},
)
),
html.Td(tags_display),
html.Td(
html.Button(
[
html.I(
className="fas fa-eye",
style={"marginRight": "0.25rem"},
),
"View chat",
],
id={"type": "open-chat-btn-rc", "index": row["id"]},
className="open-chat-btn",
n_clicks=0,
)
),
]
)
)
table = html.Table(table_rows, className="dialogs-table")
modal_title = f"Dialogs with Root Cause: {root_cause} (Topic: {topic_name})"
count_info = html.P(
f"Found {len(filtered_conversations)} dialogs with this root cause",
style={
"margin": "0 0 1rem 0",
"color": "var(--muted-foreground)",
"fontSize": "0.875rem",
},
)
content = html.Div([count_info, table])
return {"display": "flex"}, modal_title, content
# Callback to close root cause modal
@callback(
Output("root-cause-modal", "style", allow_duplicate=True),
[Input("close-root-cause-modal-btn", "n_clicks")],
prevent_initial_call=True,
)
def close_root_cause_modal(n_clicks):
if n_clicks:
return {"display": "none"}
return {"display": "none"}
# Callback to open conversation modal from root cause table
@callback(
[
Output("conversation-modal", "style", allow_duplicate=True),
Output("conversation-content", "children", allow_duplicate=True),
Output("conversation-subheader", "children", allow_duplicate=True),
],
[Input({"type": "open-chat-btn-rc", "index": dash.dependencies.ALL}, "n_clicks")],
[State("upload-data", "contents")],
prevent_initial_call=True,
)
def open_conversation_from_root_cause_table(n_clicks_list, file_contents):
# Check if any button was clicked
if not any(n_clicks_list) or not file_contents:
return {"display": "none"}, "", ""
# Get which button was clicked
ctx = dash.callback_context
if not ctx.triggered:
return {"display": "none"}, "", ""
triggered_id = ctx.triggered[0]["prop_id"]
chat_id = json.loads(triggered_id.split(".")[0])["index"]
# Get the full conversation from the uploaded file
content_type, content_string = file_contents.split(",")
decoded = base64.b64decode(content_string)
if (
content_type
== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
):
df_full = pd.read_excel(io.BytesIO(decoded), dtype={"Root_Cause": str})
else: # Assume CSV
df_full = pd.read_csv(
io.StringIO(decoded.decode("utf-8")), dtype={"Root_Cause": str}
)
# Find the conversation with this chat ID
conversation_row = df_full[df_full["id"] == chat_id]
# If not found, try converting types
if len(conversation_row) == 0:
conversation_row = df_full[df_full["id"].astype(str) == str(chat_id)]
if len(conversation_row) == 0:
try:
conversation_row = df_full[df_full["id"] == int(chat_id)]
except (ValueError, TypeError):
pass
if len(conversation_row) == 0:
conversation_text = f"Conversation not found for Chat ID: {chat_id}"
subheader_content = f"Chat ID: {chat_id} (Not Found)"
else:
row = conversation_row.iloc[0]
conversation_text = row.get("conversation", "No conversation data available.")
# Get additional metadata
root_cause = row.get("root_cause_subcluster", "Unknown")
cluster_name = row.get("deduplicated_topic_name", "Unknown cluster")
# Create subheader with metadata including root cause
subheader_content = html.Div(
[
html.Span(
f"Chat ID: {chat_id}",
style={"fontWeight": "600", "marginRight": "1rem"},
),
html.Span(
f"Cluster: {cluster_name}",
style={"color": "hsl(215.4, 16.3%, 46.9%)", "marginRight": "1rem"},
),
html.Span(
f"Root Cause: {root_cause}",
style={"color": "#8b6f47", "fontWeight": "500"},
),
]
)
return {"display": "flex"}, conversation_text, subheader_content
if __name__ == "__main__":
app.run(debug=False)