Spaces:
Running
Running
File size: 5,789 Bytes
ffcf9b8 4874322 ffcf9b8 d2e459f ffcf9b8 b04bfc7 ffcf9b8 b04bfc7 ffcf9b8 7b27b9c ffcf9b8 7b27b9c ffcf9b8 4874322 ffcf9b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import os
import json
import streamlit as st
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import fasttext
from huggingface_hub import hf_hub_download
# Constants
LEADERBOARD_FILE = 'leaderboard.json'
TEST_SET = 'atlasia/Darija-LID-benchmark'
CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache')
HF_TOKEN = os.getenv('HF_TOKEN')
def load_leaderboard():
if os.path.exists(LEADERBOARD_FILE):
with open(LEADERBOARD_FILE, 'r') as f:
return json.load(f)
return []
def save_leaderboard(leaderboard):
with open(LEADERBOARD_FILE, 'w') as f:
json.dump(leaderboard, f, indent=2)
def load_test_data() -> list[str]:
# Create cache directory if it doesn't exist
os.makedirs(CACHE_DIR, exist_ok=True)
path = hf_hub_download(
repo_id='atlasia/Darija-LID-private',
filename='benchmark.txt',
cache_dir=CACHE_DIR,
token=HF_TOKEN,
repo_type='dataset')
with open(path, "r") as f:
lines = f.readlines()
samples = list(map(lambda x:x.replace('\n', ''), lines))
return samples
def evaluate_predictions(y_true: list[str], y_pred: list[str]) -> dict:
accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, pos_label='ary', average="weighted")
return {
'accuracy': float(accuracy),
'precision': float(precision),
'recall': float(recall),
'f1': float(f1)
}
def predict_with_fasttext(model, texts: list[str]) -> list[str]:
preds = model.predict(texts)
y_hat = [x[0].split('__label__')[1] for x in preds[0]]
return y_hat
def load_hf_fasttext_model(model_id):
model_path = hf_hub_download(repo_id=model_id, filename="model.bin", cache_dir=CACHE_DIR)
model = fasttext.load_model(model_path)
os.remove(model_path)
return model
def load_local_fasttext_model(model_path):
model = fasttext.load_model(model_path)
return model
def load_predictions(uploaded_file):
predictions_df = pd.read_csv(uploaded_file)
assert 'prediction' in predictions_df.columns, "Predictions file must contain a 'prediction' column"
y_pred = list(predictions_df['prediction'].values)
assert set(y_pred) == {'ary', 'other'}, "Predictions must contain only 'ary' or 'other'"
return y_pred
def main():
st.title("Darija-LID Model Evaluation")
st.write("Upload your model or provide a HuggingFace model ID to evaluate it on the Darija-LID test set atlasia/Darija-LID-benchmark.")
st.write("Currently supports FastText models only. If you're using a different model, you can upload your predictions.")
# Load test data
test_data = load_test_data()
texts = [' '.join(x.split()[1:]) for x in test_data]
labels = [x.split('__label__')[1].split()[0] for x in test_data]
# Model input section
st.header("Model Input")
model_type = st.radio("Select model type:", ["Local FastText Model", "HuggingFace FastText Model", "Predictions File"])
if model_type == "Local FastText Model":
uploaded_file = st.file_uploader("Upload FastText model (.bin)", type=['bin'])
if uploaded_file:
with open("temp_model.bin", "wb") as f:
f.write(uploaded_file.getvalue())
model = load_local_fasttext_model("temp_model.bin")
y_pred = predict_with_fasttext(model, texts)
os.remove("temp_model.bin")
elif model_type == "HuggingFace FastText Model":
model_id = st.text_input("Enter HuggingFace model ID:")
if model_id:
model = load_hf_fasttext_model(model_id)
y_pred = predict_with_fasttext(model, texts)
else:
uploaded_file = st.file_uploader("Upload predictions file (CSV with 'prediction' column containing either 'ary' or 'other')", type=['csv'])
if uploaded_file:
y_pred = load_predictions(uploaded_file)
assert len(y_pred) == len(labels), "Predictions and labels must have the same length. Make sure the predictions are for the test set."
# Evaluation section
if 'y_pred' in locals():
st.header("Evaluation Results")
results = evaluate_predictions(labels, y_pred)
# Display metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Accuracy", f"{results['accuracy']:.4f}")
with col2:
st.metric("Precision", f"{results['precision']:.4f}")
with col3:
st.metric("Recall", f"{results['recall']:.4f}")
with col4:
st.metric("F1 Score", f"{results['f1']:.4f}")
# Leaderboard submission
st.header("Submit to Leaderboard")
submitter_name = st.text_input("Your Name:")
if st.button("Submit to Leaderboard"):
if submitter_name:
leaderboard = load_leaderboard()
entry = {
'name': submitter_name,
'model_type': model_type,
'model_id': model_id if model_type == "HuggingFace Model" else "uploaded_file",
**results
}
leaderboard.append(entry)
save_leaderboard(leaderboard)
st.success("Successfully submitted to leaderboard!")
else:
st.error("Please enter your name to submit to the leaderboard.")
# Display leaderboard
st.header("Leaderboard")
leaderboard = load_leaderboard()
if leaderboard:
df = pd.DataFrame(leaderboard)
df = df.sort_values('f1', ascending=False)
st.dataframe(df)
else:
st.write("No submissions yet.")
if __name__ == "__main__":
main() |