Spaces:
Running
Running
import os | |
import json | |
import streamlit as st | |
import pandas as pd | |
from sklearn.metrics import accuracy_score, precision_recall_fscore_support | |
import fasttext | |
from huggingface_hub import hf_hub_download | |
# Constants | |
LEADERBOARD_FILE = 'leaderboard.json' | |
TEST_SET = 'atlasia/Darija-LID-benchmark' | |
CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache') | |
HF_TOKEN = os.getenv('HF_TOKEN') | |
def load_leaderboard(): | |
if os.path.exists(LEADERBOARD_FILE): | |
with open(LEADERBOARD_FILE, 'r') as f: | |
return json.load(f) | |
return [] | |
def save_leaderboard(leaderboard): | |
with open(LEADERBOARD_FILE, 'w') as f: | |
json.dump(leaderboard, f, indent=2) | |
def load_test_data() -> list[str]: | |
# Create cache directory if it doesn't exist | |
os.makedirs(CACHE_DIR, exist_ok=True) | |
path = hf_hub_download( | |
repo_id='atlasia/Darija-LID-private', | |
filename='benchmark.txt', | |
cache_dir=CACHE_DIR, | |
token=HF_TOKEN, | |
repo_type='dataset') | |
with open(path, "r") as f: | |
lines = f.readlines() | |
samples = list(map(lambda x:x.replace('\n', ''), lines)) | |
return samples | |
def evaluate_predictions(y_true: list[str], y_pred: list[str]) -> dict: | |
accuracy = accuracy_score(y_true, y_pred) | |
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, pos_label='ary', average="weighted") | |
return { | |
'accuracy': float(accuracy), | |
'precision': float(precision), | |
'recall': float(recall), | |
'f1': float(f1) | |
} | |
def predict_with_fasttext(model, texts: list[str]) -> list[str]: | |
preds = model.predict(texts) | |
y_hat = [x[0].split('__label__')[1] for x in preds[0]] | |
return y_hat | |
def load_hf_fasttext_model(model_id): | |
model_path = hf_hub_download(repo_id=model_id, filename="model.bin", cache_dir=CACHE_DIR) | |
model = fasttext.load_model(model_path) | |
os.remove(model_path) | |
return model | |
def load_local_fasttext_model(model_path): | |
model = fasttext.load_model(model_path) | |
return model | |
def load_predictions(uploaded_file): | |
predictions_df = pd.read_csv(uploaded_file) | |
assert 'prediction' in predictions_df.columns, "Predictions file must contain a 'prediction' column" | |
y_pred = list(predictions_df['prediction'].values) | |
assert set(y_pred) == {'ary', 'other'}, "Predictions must contain only 'ary' or 'other'" | |
return y_pred | |
def main(): | |
st.title("Darija-LID Model Evaluation") | |
st.write("Upload your model or provide a HuggingFace model ID to evaluate it on the Darija-LID test set atlasia/Darija-LID-benchmark.") | |
st.write("Currently supports FastText models only. If you're using a different model, you can upload your predictions.") | |
# Load test data | |
test_data = load_test_data() | |
texts = [' '.join(x.split()[1:]) for x in test_data] | |
labels = [x.split('__label__')[1].split()[0] for x in test_data] | |
# Model input section | |
st.header("Model Input") | |
model_type = st.radio("Select model type:", ["Local FastText Model", "HuggingFace FastText Model", "Predictions File"]) | |
if model_type == "Local FastText Model": | |
uploaded_file = st.file_uploader("Upload FastText model (.bin)", type=['bin']) | |
if uploaded_file: | |
with open("temp_model.bin", "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
model = load_local_fasttext_model("temp_model.bin") | |
y_pred = predict_with_fasttext(model, texts) | |
os.remove("temp_model.bin") | |
elif model_type == "HuggingFace FastText Model": | |
model_id = st.text_input("Enter HuggingFace model ID:") | |
if model_id: | |
model = load_hf_fasttext_model(model_id) | |
y_pred = predict_with_fasttext(model, texts) | |
else: | |
uploaded_file = st.file_uploader("Upload predictions file (CSV with 'prediction' column containing either 'ary' or 'other')", type=['csv']) | |
if uploaded_file: | |
y_pred = load_predictions(uploaded_file) | |
assert len(y_pred) == len(labels), "Predictions and labels must have the same length. Make sure the predictions are for the test set." | |
# Evaluation section | |
if 'y_pred' in locals(): | |
st.header("Evaluation Results") | |
results = evaluate_predictions(labels, y_pred) | |
# Display metrics | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Accuracy", f"{results['accuracy']:.4f}") | |
with col2: | |
st.metric("Precision", f"{results['precision']:.4f}") | |
with col3: | |
st.metric("Recall", f"{results['recall']:.4f}") | |
with col4: | |
st.metric("F1 Score", f"{results['f1']:.4f}") | |
# Leaderboard submission | |
st.header("Submit to Leaderboard") | |
submitter_name = st.text_input("Your Name:") | |
if st.button("Submit to Leaderboard"): | |
if submitter_name: | |
leaderboard = load_leaderboard() | |
entry = { | |
'name': submitter_name, | |
'model_type': model_type, | |
'model_id': model_id if model_type == "HuggingFace Model" else "uploaded_file", | |
**results | |
} | |
leaderboard.append(entry) | |
save_leaderboard(leaderboard) | |
st.success("Successfully submitted to leaderboard!") | |
else: | |
st.error("Please enter your name to submit to the leaderboard.") | |
# Display leaderboard | |
st.header("Leaderboard") | |
leaderboard = load_leaderboard() | |
if leaderboard: | |
df = pd.DataFrame(leaderboard) | |
df = df.sort_values('f1', ascending=False) | |
st.dataframe(df) | |
else: | |
st.write("No submissions yet.") | |
if __name__ == "__main__": | |
main() |