File size: 2,019 Bytes
fc41be9
 
 
 
 
 
 
 
 
0930dbf
fc41be9
 
 
598e822
fc41be9
 
 
 
 
 
 
 
 
0930dbf
fc41be9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
import json
import pandas as pd
import plotly.express as px

# Define categories
CATEGORIES = ["Writing", "Roleplay", "Reasoning", "Math", "Coding", "Extraction", "STEM", "Humanities"]

# Load and process the single model data
@st.cache_resource
def get_model_df():
    q2result = []
    # Replace "gpt-4_single.jsonl" with the actual path to your JSONL file
    with open("data/gpt-4_single.jsonl", "r") as fin:
        for line in fin:
            obj = json.loads(line)
            obj["category"] = CATEGORIES[(obj["question_id"] - 81) // 10]
            q2result.append(obj)
    df = pd.DataFrame(q2result)
    return df

# Placeholder for the pair model data function
# Adapt this function based on how your "gpt-4_pair.jsonl" is structured
@st.cache_resource
def get_model_df_pair():
    # Implement similar to get_model_df if you have pair data
    return pd.DataFrame([])  # Placeholder

df = get_model_df()
df_pair = get_model_df_pair()

# Streamlit app starts here
st.title('Model Performance Visualization')

# Select models to display
all_models = df["model"].unique()
selected_models = st.multiselect('Select Models', all_models, default=all_models[:3])

# Main app logic
if selected_models:
    scores_all = []
    for model in selected_models:
        for cat in CATEGORIES:
            res = df[(df["category"] == cat) & (df["model"] == model) & (df["score"] >= 0)]
            score = res["score"].mean()
            scores_all.append({"model": model, "category": cat, "score": score})

    df_score = pd.DataFrame(scores_all)

    # Renaming models for better visualization
    rename_map = {
        # Define your renaming map here, if needed
    }
    df_score.replace(rename_map, inplace=True)

    # Generate the radial graph
    fig = px.line_polar(df_score, r='score', theta='category', line_close=True, 
                        category_orders={"category": CATEGORIES}, color='model', markers=True)

    # Display the Plotly figure in Streamlit
    st.plotly_chart(fig)