import os import json import streamlit as st import pandas as pd from sklearn.metrics import accuracy_score, precision_recall_fscore_support import fasttext from huggingface_hub import hf_hub_download # Constants LEADERBOARD_FILE = 'leaderboard.json' TEST_SET = 'atlasia/Darija-LID-benchmark' CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache') HF_TOKEN = os.getenv('HF_TOKEN') def load_leaderboard(): if os.path.exists(LEADERBOARD_FILE): with open(LEADERBOARD_FILE, 'r') as f: return json.load(f) return [] def save_leaderboard(leaderboard): with open(LEADERBOARD_FILE, 'w') as f: json.dump(leaderboard, f, indent=2) def load_test_data() -> list[str]: # Create cache directory if it doesn't exist os.makedirs(CACHE_DIR, exist_ok=True) path = hf_hub_download( repo_id='atlasia/Darija-LID-private', filename='benchmark.txt', cache_dir=CACHE_DIR, token=HF_TOKEN, repo_type='dataset') with open(path, "r") as f: lines = f.readlines() samples = list(map(lambda x:x.replace('\n', ''), lines)) return samples def evaluate_predictions(y_true: list[str], y_pred: list[str]) -> dict: accuracy = accuracy_score(y_true, y_pred) precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, pos_label='ary', average="weighted") return { 'accuracy': float(accuracy), 'precision': float(precision), 'recall': float(recall), 'f1': float(f1) } def predict_with_fasttext(model, texts: list[str]) -> list[str]: preds = model.predict(texts) y_hat = [x[0].split('__label__')[1] for x in preds[0]] return y_hat def load_hf_fasttext_model(model_id): model_path = hf_hub_download(repo_id=model_id, filename="model.bin", cache_dir=CACHE_DIR) model = fasttext.load_model(model_path) os.remove(model_path) return model def load_local_fasttext_model(model_path): model = fasttext.load_model(model_path) return model def load_predictions(uploaded_file): predictions_df = pd.read_csv(uploaded_file) assert 'prediction' in predictions_df.columns, "Predictions file must contain a 'prediction' column" y_pred = list(predictions_df['prediction'].values) assert set(y_pred) == {'ary', 'other'}, "Predictions must contain only 'ary' or 'other'" return y_pred def main(): st.title("Darija-LID Model Evaluation") st.write("Upload your model or provide a HuggingFace model ID to evaluate it on the Darija-LID test set atlasia/Darija-LID-benchmark.") st.write("Currently supports FastText models only. If you're using a different model, you can upload your predictions.") # Load test data test_data = load_test_data() texts = [' '.join(x.split()[1:]) for x in test_data] labels = [x.split('__label__')[1].split()[0] for x in test_data] # Model input section st.header("Model Input") model_type = st.radio("Select model type:", ["Local FastText Model", "HuggingFace FastText Model", "Predictions File"]) if model_type == "Local FastText Model": uploaded_file = st.file_uploader("Upload FastText model (.bin)", type=['bin']) if uploaded_file: with open("temp_model.bin", "wb") as f: f.write(uploaded_file.getvalue()) model = load_local_fasttext_model("temp_model.bin") y_pred = predict_with_fasttext(model, texts) os.remove("temp_model.bin") elif model_type == "HuggingFace FastText Model": model_id = st.text_input("Enter HuggingFace model ID:") if model_id: model = load_hf_fasttext_model(model_id) y_pred = predict_with_fasttext(model, texts) else: uploaded_file = st.file_uploader("Upload predictions file (CSV with 'prediction' column containing either 'ary' or 'other')", type=['csv']) if uploaded_file: y_pred = load_predictions(uploaded_file) assert len(y_pred) == len(labels), "Predictions and labels must have the same length. Make sure the predictions are for the test set." # Evaluation section if 'y_pred' in locals(): st.header("Evaluation Results") results = evaluate_predictions(labels, y_pred) # Display metrics col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Accuracy", f"{results['accuracy']:.4f}") with col2: st.metric("Precision", f"{results['precision']:.4f}") with col3: st.metric("Recall", f"{results['recall']:.4f}") with col4: st.metric("F1 Score", f"{results['f1']:.4f}") # Leaderboard submission st.header("Submit to Leaderboard") submitter_name = st.text_input("Your Name:") if st.button("Submit to Leaderboard"): if submitter_name: leaderboard = load_leaderboard() entry = { 'name': submitter_name, 'model_type': model_type, 'model_id': model_id if model_type == "HuggingFace Model" else "uploaded_file", **results } leaderboard.append(entry) save_leaderboard(leaderboard) st.success("Successfully submitted to leaderboard!") else: st.error("Please enter your name to submit to the leaderboard.") # Display leaderboard st.header("Leaderboard") leaderboard = load_leaderboard() if leaderboard: df = pd.DataFrame(leaderboard) df = df.sort_values('f1', ascending=False) st.dataframe(df) else: st.write("No submissions yet.") if __name__ == "__main__": main()