File size: 5,789 Bytes
ffcf9b8
 
4874322
ffcf9b8
 
 
 
 
 
 
 
d2e459f
ffcf9b8
 
 
 
 
 
 
 
 
 
 
 
 
 
b04bfc7
 
 
ffcf9b8
 
 
b04bfc7
ffcf9b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b27b9c
ffcf9b8
7b27b9c
ffcf9b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4874322
ffcf9b8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import json
import streamlit as st
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import fasttext
from huggingface_hub import hf_hub_download

# Constants
LEADERBOARD_FILE = 'leaderboard.json'
TEST_SET = 'atlasia/Darija-LID-benchmark'
CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache')

HF_TOKEN = os.getenv('HF_TOKEN')

def load_leaderboard():
    if os.path.exists(LEADERBOARD_FILE):
        with open(LEADERBOARD_FILE, 'r') as f:
            return json.load(f)
    return []

def save_leaderboard(leaderboard):
    with open(LEADERBOARD_FILE, 'w') as f:
        json.dump(leaderboard, f, indent=2)

def load_test_data() -> list[str]:
    # Create cache directory if it doesn't exist
    os.makedirs(CACHE_DIR, exist_ok=True)
    
    path = hf_hub_download(
        repo_id='atlasia/Darija-LID-private', 
        filename='benchmark.txt', 
        cache_dir=CACHE_DIR, 
        token=HF_TOKEN,
        repo_type='dataset')
    
    with open(path, "r") as f:
        lines = f.readlines()

    samples = list(map(lambda x:x.replace('\n', ''), lines))
    return samples
    
    
def evaluate_predictions(y_true: list[str], y_pred: list[str]) -> dict:
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, pos_label='ary', average="weighted")
    return {
        'accuracy': float(accuracy),
        'precision': float(precision),
        'recall': float(recall),
        'f1': float(f1)
    }

def predict_with_fasttext(model, texts: list[str]) -> list[str]:
    preds = model.predict(texts)
    y_hat = [x[0].split('__label__')[1] for x in preds[0]]
    return y_hat

def load_hf_fasttext_model(model_id):
    model_path = hf_hub_download(repo_id=model_id, filename="model.bin", cache_dir=CACHE_DIR)
    model = fasttext.load_model(model_path)
    os.remove(model_path)
    return model

def load_local_fasttext_model(model_path):
    model = fasttext.load_model(model_path)
    return model

def load_predictions(uploaded_file):
    predictions_df = pd.read_csv(uploaded_file)
    assert 'prediction' in predictions_df.columns, "Predictions file must contain a 'prediction' column"
    y_pred = list(predictions_df['prediction'].values)
    assert set(y_pred) == {'ary', 'other'}, "Predictions must contain only 'ary' or 'other'"
    return y_pred


def main():
    st.title("Darija-LID Model Evaluation")
    st.write("Upload your model or provide a HuggingFace model ID to evaluate it on the Darija-LID test set atlasia/Darija-LID-benchmark.")
    st.write("Currently supports FastText models only. If you're using a different model, you can upload your predictions.")

    # Load test data
    test_data = load_test_data()
    texts = [' '.join(x.split()[1:]) for x in test_data]
    labels = [x.split('__label__')[1].split()[0] for x in test_data]

    # Model input section
    st.header("Model Input")
    model_type = st.radio("Select model type:", ["Local FastText Model", "HuggingFace FastText Model", "Predictions File"])

    if model_type == "Local FastText Model":
        uploaded_file = st.file_uploader("Upload FastText model (.bin)", type=['bin'])
        if uploaded_file:
            with open("temp_model.bin", "wb") as f:
                f.write(uploaded_file.getvalue())
            model = load_local_fasttext_model("temp_model.bin")
            y_pred = predict_with_fasttext(model, texts)
            os.remove("temp_model.bin")

    elif model_type == "HuggingFace FastText Model":
        model_id = st.text_input("Enter HuggingFace model ID:")
        if model_id:
            model = load_hf_fasttext_model(model_id)
            y_pred = predict_with_fasttext(model, texts)

    else:  
        uploaded_file = st.file_uploader("Upload predictions file (CSV with 'prediction' column containing either 'ary' or 'other')", type=['csv'])
        if uploaded_file:
            y_pred = load_predictions(uploaded_file)
            assert len(y_pred) == len(labels), "Predictions and labels must have the same length. Make sure the predictions are for the test set."


    # Evaluation section
    if 'y_pred' in locals():
        st.header("Evaluation Results")
        results = evaluate_predictions(labels, y_pred)
        
        # Display metrics
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.metric("Accuracy", f"{results['accuracy']:.4f}")
        with col2:
            st.metric("Precision", f"{results['precision']:.4f}")
        with col3:
            st.metric("Recall", f"{results['recall']:.4f}")
        with col4:
            st.metric("F1 Score", f"{results['f1']:.4f}")

        # Leaderboard submission
        st.header("Submit to Leaderboard")
        submitter_name = st.text_input("Your Name:")
        if st.button("Submit to Leaderboard"):
            if submitter_name:
                leaderboard = load_leaderboard()
                entry = {
                    'name': submitter_name,
                    'model_type': model_type,
                    'model_id': model_id if model_type == "HuggingFace Model" else "uploaded_file",
                    **results
                }
                leaderboard.append(entry)
                save_leaderboard(leaderboard)
                st.success("Successfully submitted to leaderboard!")
            else:
                st.error("Please enter your name to submit to the leaderboard.")

    # Display leaderboard
    st.header("Leaderboard")
    leaderboard = load_leaderboard()
    if leaderboard:
        df = pd.DataFrame(leaderboard)
        df = df.sort_values('f1', ascending=False)
        st.dataframe(df)
    else:
        st.write("No submissions yet.")

if __name__ == "__main__":
    main()