Spaces:

macrocosm-os
/

sn1

Paused

App Files Files Community

steffenc commited on Jun 13, 2023

Commit

969e123

1 Parent(s): f7bf07d

Make it better?

Browse files

Files changed (6) hide show

dashboard.py +42 -16
opendashboards/assets/inspect.py +7 -4
opendashboards/assets/io.py +21 -6
opendashboards/assets/metric.py +43 -17
opendashboards/assets/plot.py +14 -0
opendashboards/utils/plotting.py +41 -5

dashboard.py CHANGED Viewed

@@ -40,13 +40,13 @@ with st.spinner(text=f'Checking wandb...'):
 ### Wandb Runs ###
-with st.sidebar:
-    st.markdown('#')
-    st.sidebar.header(":violet[Select] Runs")
-    df_runs_subset = io.filter_dataframe(df_runs, demo_selection=df_runs.id.isin(DEFAULT_SELECTED_RUNS))
-    n_runs = len(df_runs_subset)
 metric.wandb(df_runs)
@@ -64,26 +64,28 @@ with tab1:
     st.subheader(":violet[Run] Data")
     with st.expander(f'Show :violet[raw] wandb data'):
-        filter_selected_checkbox = st.checkbox('Filter to selected runs', value=True)
-        df_to_show = df_runs_subset if filter_selected_checkbox else df_runs
-        # TODO: make this editable so that runs can be selected directly from the table
-        st.dataframe(
-            df_to_show.assign(
-                Selected=df_to_show.index.isin(df_runs_subset.index)
-            ).set_index('Selected').sort_index(ascending=False),#.style.highlight_max(subset=df_runs_subset.index, color='lightgreen', axis=1),
             use_container_width=True,
         )
     if n_runs:
         df = io.load_data(df_runs_subset, load=True, save=True)
         df_long = inspect.explode_data(df)
         df_weights = inspect.weights(df)
     else:
         st.info(f'You must select at least one run to load data')
         st.stop()
-    metric.runs(df_long)
     st.markdown('#')
     st.subheader(":violet[Event] Data")
@@ -93,10 +95,12 @@ with tab1:
         num_rows = raw_data_col2.slider('Number of rows:', min_value=1, max_value=100, value=10, key='num_rows')
         st.dataframe(df_long.head(num_rows) if use_long_checkbox else df.head(num_rows),
                      use_container_width=True)
 ### UID Health ###
 with tab2:
     st.markdown('#')
@@ -106,10 +110,31 @@ with tab2:
     uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
     metric.uids(df_long, uid_src)
-    with st.expander(f'Show UID **{uid_src}** weights data for **{n_runs} selected runs**'):
-        uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
         st.markdown('#')
         st.subheader(f"UID {uid_src.title()} :violet[Weights]")
@@ -189,6 +214,7 @@ with tab3:
             ntop=completion_ntop,
             completions=completion_select,
         )
     with st.expander(f'Show **{completion_src}** completion length data for **{n_runs} selected runs**'):

 ### Wandb Runs ###
+# with st.sidebar:
+#     st.markdown('#')
+#     st.sidebar.header(":violet[Select] Runs")
+    # df_runs_subset = io.filter_dataframe(df_runs, demo_selection=df_runs.id.isin(DEFAULT_SELECTED_RUNS))
+    # n_runs = len(df_runs_subset)
 metric.wandb(df_runs)
     st.subheader(":violet[Run] Data")
     with st.expander(f'Show :violet[raw] wandb data'):
+        # filter_selected_checkbox = st.checkbox('Filter to selected runs', value=True)
+        # df_to_show = df_runs_subset if filter_selected_checkbox else df_runs
+        edited_df = st.data_editor(
+            df_runs.assign(Select=False).set_index('Select'),
+            column_config={"Select": st.column_config.CheckboxColumn(required=True)},
+            disabled=df_runs.columns,
             use_container_width=True,
         )
+        df_runs_subset = df_runs[edited_df.index==True]
+        n_runs = len(df_runs_subset)
     if n_runs:
         df = io.load_data(df_runs_subset, load=True, save=True)
+        df = inspect.clean_data(df)
         df_long = inspect.explode_data(df)
         df_weights = inspect.weights(df)
     else:
         st.info(f'You must select at least one run to load data')
         st.stop()
+    metric.runs(df_long, n_runs)
     st.markdown('#')
     st.subheader(":violet[Event] Data")
         num_rows = raw_data_col2.slider('Number of rows:', min_value=1, max_value=100, value=10, key='num_rows')
         st.dataframe(df_long.head(num_rows) if use_long_checkbox else df.head(num_rows),
                      use_container_width=True)
 ### UID Health ###
+# TODO: Live time - time elapsed since moving_averaged_score for selected UID was 0 (lower bound so use >Time)
+# TODO: Weight - Most recent weight for selected UID (Add warning if weight is 0 or most recent timestamp is not current)
 with tab2:
     st.markdown('#')
     uid_src = st.radio('Select one:', ['followup', 'answer'], horizontal=True, key='uid_src')
     metric.uids(df_long, uid_src)
+    uids = st.multiselect('UID:', sorted(df_long[f'{uid_src}_uids'].unique()), key='uid')
+    with st.expander(f'Show UID health data for **{n_runs} selected runs** and **{len(uids)} selected UIDs**'):
+        st.markdown('#')
+        st.subheader(f"UID {uid_src.title()} :violet[Health]")
+        agg_uid_checkbox = st.checkbox('Aggregate UIDs', value=True)
+        if agg_uid_checkbox:
+            metric.uids(df_long, uid_src, uids)
+        else:
+            for uid in uids:
+                st.caption(f'UID: {uid}')
+                metric.uids(df_long, uid_src, [uid])
+        st.subheader(f'Cumulative completion frequency')
+        freq_col1, freq_col2 = st.columns(2)
+        freq_ntop = freq_col1.slider('Number of Completions:', min_value=10, max_value=1000, value=100, key='freq_ntop')
+        freq_rm_empty = freq_col2.checkbox('Remove empty (failed)', value=True, key='freq_rm_empty')
+        freq_cumulative = freq_col2.checkbox('Cumulative', value=False, key='freq_cumulative')
+        freq_normalize = freq_col2.checkbox('Normalize', value=True, key='freq_normalize')
+        plot.uid_completion_counts(df_long, uids=uids, src=uid_src, ntop=freq_ntop, rm_empty=freq_rm_empty, cumulative=freq_cumulative, normalize=freq_normalize)
+    with st.expander(f'Show UID weights data for **{n_runs} selected runs** and **{len(uids)} selected UIDs**'):
         st.markdown('#')
         st.subheader(f"UID {uid_src.title()} :violet[Weights]")
             ntop=completion_ntop,
             completions=completion_select,
         )
+        # TODO: show the UIDs which have used the selected completions
     with st.expander(f'Show **{completion_src}** completion length data for **{n_runs} selected runs**'):

opendashboards/assets/inspect.py CHANGED Viewed

@@ -3,6 +3,9 @@ import streamlit as st
 import pandas as pd
 import opendashboards.utils.utils as utils
 @st.cache_data
 def explode_data(df):
     list_cols = utils.get_list_col_lengths(df)
@@ -28,10 +31,10 @@ def weights(df, index='_timestamp'):
     # rename columns
     scores.rename({i: f'UID-{i}' for i in range(scores.shape[1])}, axis=1, inplace=True)
-    return scores
 def run_event_data(df_runs, df, selected_runs):
     st.markdown('#')
     show_col1, show_col2 = st.columns(2)
@@ -52,6 +55,6 @@ def run_event_data(df_runs, df, selected_runs):
                         "url": st.column_config.LinkColumn("URL"),
                     }
         )
 def highlight_row(row, expr, color='lightgrey', bg_color='white'):
     return [f'background-color:{color}' if expr else f'background-color:{bg_color}'] * len(row)

 import pandas as pd
 import opendashboards.utils.utils as utils
+def clean_data(df):
+    return df.dropna(subset=df.filter(regex='completions|rewards').columns, how='all')
 @st.cache_data
 def explode_data(df):
     list_cols = utils.get_list_col_lengths(df)
     # rename columns
     scores.rename({i: f'UID-{i}' for i in range(scores.shape[1])}, axis=1, inplace=True)
+    return scores
 def run_event_data(df_runs, df, selected_runs):
     st.markdown('#')
     show_col1, show_col2 = st.columns(2)
                         "url": st.column_config.LinkColumn("URL"),
                     }
         )
 def highlight_row(row, expr, color='lightgrey', bg_color='white'):
     return [f'background-color:{color}' if expr else f'background-color:{bg_color}'] * len(row)

opendashboards/assets/io.py CHANGED Viewed

@@ -16,15 +16,25 @@ from pandas.api.types import (
 @st.cache_data
 def load_runs(project, filters, min_steps=10):
     runs = []
     msg = st.empty()
-    for run in utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY']):
-        step = run.summary.get('_step',0)
         if step < min_steps:
             msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
             continue
-        duration = run.summary.get('_runtime')
-        end_time = run.summary.get('_timestamp')
         # extract values for selected tags
         rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
         tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
@@ -34,6 +44,7 @@ def load_runs(project, filters, min_steps=10):
         runs.append({
             'state': run.state,
             'num_steps': step,
             'entity': run.entity,
             'id': run.id,
             'name': run.name,
@@ -42,9 +53,13 @@ def load_runs(project, filters, min_steps=10):
             'path': os.path.join(run.entity, run.project, run.id),
             'start_time': pd.to_datetime(end_time-duration, unit="s"),
             'end_time': pd.to_datetime(end_time, unit="s"),
-            'duration': pd.to_datetime(duration, unit="s"),
             **tags
         })
     msg.empty()
     return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})

 @st.cache_data
 def load_runs(project, filters, min_steps=10):
     runs = []
+    n_events = 0
+    successful = 0
+    progress = st.progress(0, 'Fetching runs from wandb')
     msg = st.empty()
+    all_runs = utils.get_runs(project, filters, api_key=st.secrets['WANDB_API_KEY'])
+    for i, run in enumerate(all_runs):
+        summary = run.summary
+        step = summary.get('_step',0)
         if step < min_steps:
             msg.warning(f'Skipped run `{run.name}` because it contains {step} events (<{min_steps})')
             continue
+        prog_msg = f'Loading data {i/len(all_runs)*100:.0f}% ({successful}/{len(all_runs)} runs, {n_events} events)'
+        progress.progress(i/len(all_runs),f'{prog_msg}... **fetching** `{run.name}`')
+        duration = summary.get('_runtime')
+        end_time = summary.get('_timestamp')
         # extract values for selected tags
         rules = {'hotkey': re.compile('^[0-9a-z]{48}$',re.IGNORECASE), 'version': re.compile('^\\d\.\\d+\.\\d+$'), 'spec_version': re.compile('\\d{4}$')}
         tags = {k: tag for k, rule in rules.items() for tag in run.tags if rule.match(tag)}
         runs.append({
             'state': run.state,
             'num_steps': step,
+            'num_completions': step*sum(len(v) for k, v in run.summary.items() if k.endswith('completions') and isinstance(v, list)),
             'entity': run.entity,
             'id': run.id,
             'name': run.name,
             'path': os.path.join(run.entity, run.project, run.id),
             'start_time': pd.to_datetime(end_time-duration, unit="s"),
             'end_time': pd.to_datetime(end_time, unit="s"),
+            'duration': pd.to_timedelta(duration, unit="s").round('s'),
             **tags
         })
+        n_events += step
+        successful += 1
+    progress.empty()
     msg.empty()
     return pd.DataFrame(runs).astype({'state': 'category', 'hotkey': 'category', 'version': 'category', 'spec_version': 'category'})

opendashboards/assets/metric.py CHANGED Viewed

@@ -1,7 +1,20 @@
 import time
 import pandas as pd
 import streamlit as st
 @st.cache_data
 def wandb(df_runs):
@@ -9,50 +22,63 @@ def wandb(df_runs):
     # get rows where start time is older than 24h ago
     df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
-    col1, col2, col3 = st.columns(3)
-    col1.metric('Runs', df_runs.shape[0], delta=f'{df_runs.shape[0]-df_runs_old.shape[0]} (24h)')
-    col2.metric('Hotkeys', df_runs.hotkey.nunique(), delta=f'{df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique()} (24h)')
-    col3.metric('Events', df_runs.num_steps.sum(), delta=f'{df_runs.num_steps.sum()-df_runs_old.num_steps.sum()} (24h)')
     st.markdown('----')
 @st.cache_data
-def runs(df_long):
     col1, col2, col3 = st.columns(3)
-    col1.metric(label="Runs", value=df_long.id.nunique())
-    col1.metric(label="Events", value=df_long.shape[0])
     col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
     col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
-    col3.metric(label="Followup Completions", value=df_long.followup_completions.nunique())
-    col3.metric(label="Answer Completions", value=df_long.answer_completions.nunique())
     st.markdown('----')
 @st.cache_data
-def uids(df_long, src, uid=None):
     uid_col = f'{src}_uids'
     completion_col = f'{src}_completions'
     nsfw_col = f'{src}_nsfw_scores'
     reward_col = f'{src}_rewards'
-    if uid is not None:
-        df_long = df_long.loc[df_long[uid_col] == uid]
-    col1, col2, col3 = st.columns(3)
     col1.metric(
         label="Success %",
-        value=f'{df_long.loc[df_long[completion_col].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}'
     )
     col2.metric(
         label="Diversity %",
-        value=f'{df_long[completion_col].nunique()/df_long.shape[0] * 100:.1f}'
     )
     col3.metric(
         label="Toxicity %",
-        value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else 'N/A'
     )
     st.markdown('----')

 import time
+import numerize
 import pandas as pd
 import streamlit as st
+def fmt(number):
+    units = ['', 'k', 'M', 'B']
+    magnitude = 0
+    while abs(number) >= 1000 and magnitude < len(units) - 1:
+        magnitude += 1
+        number /= 1000
+    if units[magnitude]:
+        return f'{number:.2f}{units[magnitude]}'
+    else:
+        return f'{number:.0f}{units[magnitude]}'
 @st.cache_data
 def wandb(df_runs):
     # get rows where start time is older than 24h ago
     df_runs_old = df_runs.loc[df_runs.start_time < pd.to_datetime(time.time()-24*60*60, unit='s')]
+    col1, col2, col3, col4 = st.columns(4)
+    # Convert to appropriate units e.g. 1.2k instead of 1200.
+    col1.metric('Runs', fmt(df_runs.shape[0]), delta=fmt(df_runs.shape[0]-df_runs_old.shape[0])+' (24h)')
+    col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)')
+    col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)')
+    col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)')
     st.markdown('----')
 @st.cache_data
+def runs(df_long, n_runs):
     col1, col2, col3 = st.columns(3)
+    col1.metric(label="Runs", value=n_runs)
+    col1.metric(label="Events", value=df_long.shape[0])
     col2.metric(label="Followup UIDs", value=df_long.followup_uids.nunique())
     col2.metric(label="Answer UIDs", value=df_long.answer_uids.nunique())
+    col3.metric(label="Unique Followups", value=df_long.followup_completions.nunique())
+    col3.metric(label="Unique Answers", value=df_long.answer_completions.nunique())
     st.markdown('----')
 @st.cache_data
+def uids(df_long, src, uids=None):
     uid_col = f'{src}_uids'
     completion_col = f'{src}_completions'
     nsfw_col = f'{src}_nsfw_scores'
     reward_col = f'{src}_rewards'
+    if uids:
+        df_long = df_long.loc[df_long[uid_col].isin(uids)]
+    col1, col2, col3, col4 = st.columns(4)
     col1.metric(
         label="Success %",
+        value=f'{df_long.loc[df_long[completion_col].str.len() > 0].shape[0]/df_long.shape[0] * 100:.1f}',
+        help='Number of successful completions divided by total number of events'
     )
     col2.metric(
         label="Diversity %",
+        value=f'{df_long[completion_col].nunique()/df_long.shape[0] * 100:.1f}',
+        help='Number of unique completions divided by total number of events'
     )
+    # uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions
     col3.metric(
+        label="Uniqueness %",
+        value=f'{df_long.groupby(uid_col)[completion_col].nunique().mean()/df_long[completion_col].nunique() * 100:.1f}',
+        help='Average number of unique completions per uid divided by all unique completions'
+    )
+    col4.metric(
         label="Toxicity %",
+        value=f'{df_long[nsfw_col].mean() * 100:.1f}' if nsfw_col in df_long.columns else '--',
+        help='Average toxicity score of all events'
     )
     st.markdown('----')

opendashboards/assets/plot.py CHANGED Viewed

@@ -65,4 +65,18 @@ def completion_length_time(df, completion_col, uid_col, time_col, words=False):
             words=words
         ),
         use_container_width=True
     )

             words=words
         ),
         use_container_width=True
+    )
+def uid_completion_counts(df, uids, src, rm_empty, ntop=100, cumulative=False, normalize=True):
+    return st.plotly_chart(
+        plotting.plot_uid_completion_counts(
+            df,
+            uids=uids,
+            src=src,
+            rm_empty=rm_empty,
+            ntop=ntop,
+            cumulative=cumulative,
+            normalize=normalize
+        ),
+        use_container_width=True
     )

opendashboards/utils/plotting.py CHANGED Viewed

@@ -249,7 +249,6 @@ def plot_leaderboard(
     else:
         index = rankings.index.astype(str)
-    print(f"Using top {ntop} {group_on} by {agg_col}: \n{rankings}")
     return px.bar(
         x=rankings,
         y=index,
@@ -307,16 +306,16 @@ def plot_completion_length_time(
     uid_col: str = "answer_uids",
     completion_col: str = "answer_completions",
     time_col: str = "answer_times",
-    words: bool = False,
 ) -> go.Figure:
     df = df[[uid_col, completion_col, time_col]].explode(column=[uid_col, completion_col, time_col])
     df["time"] = df[time_col].astype(float)
     if words:
         df["completion_length"] = df[completion_col].str.split().str.len()
     else:
         df["completion_length"] = df[completion_col].str.len()
     return px.scatter(
         df,
         x='completion_length',
@@ -329,7 +328,44 @@ def plot_completion_length_time(
         opacity=0.35,
         **plotly_config,
     )
 def plot_network_embedding(
     df: pd.DataFrame,

     else:
         index = rankings.index.astype(str)
     return px.bar(
         x=rankings,
         y=index,
     uid_col: str = "answer_uids",
     completion_col: str = "answer_completions",
     time_col: str = "answer_times",
+    words: bool = False,
 ) -> go.Figure:
     df = df[[uid_col, completion_col, time_col]].explode(column=[uid_col, completion_col, time_col])
     df["time"] = df[time_col].astype(float)
     if words:
         df["completion_length"] = df[completion_col].str.split().str.len()
     else:
         df["completion_length"] = df[completion_col].str.len()
     return px.scatter(
         df,
         x='completion_length',
         opacity=0.35,
         **plotly_config,
     )
+def plot_uid_completion_counts(
+    df: pd.DataFrame,
+    uids: List[int],
+    src: str = 'answer',
+    rm_empty: bool = True,
+    ntop: int = 100,
+    cumulative: bool = False,
+    normalize: bool = True,
+) -> go.Figure:
+    completion_col = f'{src}_completions'
+    uid_col = f'{src}_uids'
+    if rm_empty:
+        df = df.loc[df[completion_col].str.len()>0]
+    df = df.loc[df[uid_col].isin(uids)]
+    g = df.groupby(uid_col)[completion_col].value_counts(normalize=normalize).reset_index(level=1)
+    y_col = g.columns[-1]
+    # rescale each group to have a max of 1 if normalize is True
+    if cumulative:
+        g[y_col] = g.groupby(level=0)[y_col].cumsum().transform(lambda x: x/x.max() if normalize else x)
+    # get top n completions
+    g = g.groupby(level=0).head(ntop)
+    # # create a rank column which increments by one and resets when the uid changes
+    g['rank'] = g.groupby(level=0).cumcount()+1
+    return px.line(g.sort_index().reset_index(),
+            x='rank',y=y_col,color=uid_col,
+            labels={'rank':'Top Completions',uid_col:'UID',y_col:y_col.replace('_',' ').title()},
+            title=f'{src.title()} Completion {y_col.replace("_"," ").title()}s by Rank',
+            **plotly_config,
+            ).update_traces(opacity=0.7)
 def plot_network_embedding(
     df: pd.DataFrame,