Spaces:

MBZUAI-LLM
/

Mobile-MMLU-Challenge

Running

App Files Files Community

SondosMB commited on Dec 20, 2024

Commit

514663d

verified ·

1 Parent(s): 45d118c

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -194

app.py CHANGED Viewed

@@ -1,168 +1,3 @@
-# # demo.launch()
-# import gradio as gr
-# import pandas as pd
-# import os
-# import re
-# from datetime import datetime
-# LEADERBOARD_FILE = "leaderboard.csv"  # File to store all submissions persistently
-# LAST_UPDATED = datetime.now().strftime("%B %d, %Y")
-# def initialize_leaderboard_file():
-#     """
-#     Ensure the leaderboard file exists and has the correct headers.
-#     """
-#     if not os.path.exists(LEADERBOARD_FILE):
-#         # Create the file with headers
-#         pd.DataFrame(columns=[
-#             "Model Name", "Overall Accuracy", "Valid Accuracy",
-#             "Correct Predictions", "Total Questions", "Timestamp"
-#         ]).to_csv(LEADERBOARD_FILE, index=False)
-#     else:
-#         # Check if the file is empty and write headers if needed
-#         if os.stat(LEADERBOARD_FILE).st_size == 0:
-#             pd.DataFrame(columns=[
-#                 "Model Name", "Overall Accuracy", "Valid Accuracy",
-#                 "Correct Predictions", "Total Questions", "Timestamp"
-#             ]).to_csv(LEADERBOARD_FILE, index=False)
-# def clean_answer(answer):
-#     """
-#     Clean and normalize the predicted answers.
-#     """
-#     if pd.isna(answer):
-#         return None
-#     answer = str(answer)
-#     clean = re.sub(r'[^A-Da-d]', '', answer)
-#     if clean:
-#         return clean[0].upper()
-#     return None
-# def update_leaderboard(results):
-#     """
-#     Append new submission results to the leaderboard file.
-#     """
-#     new_entry = {
-#         "Model Name": results['model_name'],
-#         "Overall Accuracy": round(results['overall_accuracy'] * 100, 2),
-#         "Valid Accuracy": round(results['valid_accuracy'] * 100, 2),
-#         "Correct Predictions": results['correct_predictions'],
-#         "Total Questions": results['total_questions'],
-#         "Timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-#     }
-#     new_entry_df = pd.DataFrame([new_entry])
-#     new_entry_df.to_csv(LEADERBOARD_FILE, mode='a', index=False, header=False)
-# def load_leaderboard():
-#     """
-#     Load all submissions from the leaderboard file.
-#     """
-#     if not os.path.exists(LEADERBOARD_FILE) or os.stat(LEADERBOARD_FILE).st_size == 0:
-#         return pd.DataFrame({
-#             "Model Name": [],
-#             "Overall Accuracy": [],
-#             "Valid Accuracy": [],
-#             "Correct Predictions": [],
-#             "Total Questions": [],
-#             "Timestamp": [],
-#         })
-#     return pd.read_csv(LEADERBOARD_FILE)
-# def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
-#     """
-#     Evaluate predictions and optionally add results to the leaderboard.
-#     """
-#     ground_truth_file = "ground_truth.csv"
-#     if not os.path.exists(ground_truth_file):
-#         return "Ground truth file not found.", load_leaderboard()
-#     if not prediction_file:
-#         return "Prediction file not uploaded.", load_leaderboard()
-#     try:
-#         # Load predictions and ground truth
-#         predictions_df = pd.read_csv(prediction_file.name)
-#         ground_truth_df = pd.read_csv(ground_truth_file)
-#         # Merge predictions with ground truth
-#         merged_df = pd.merge(predictions_df, ground_truth_df, on='question_id', how='inner')
-#         merged_df['pred_answer'] = merged_df['predicted_answer'].apply(clean_answer)
-#         # Evaluate predictions
-#         valid_predictions = merged_df.dropna(subset=['pred_answer'])
-#         correct_predictions = (valid_predictions['pred_answer'] == valid_predictions['Answer']).sum()
-#         total_predictions = len(merged_df)
-#         total_valid_predictions = len(valid_predictions)
-#         # Calculate accuracy
-#         overall_accuracy = correct_predictions / total_predictions if total_predictions > 0 else 0
-#         valid_accuracy = correct_predictions / total_valid_predictions if total_valid_predictions > 0 else 0
-#         results = {
-#             'model_name': model_name if model_name else "Unknown Model",
-#             'overall_accuracy': overall_accuracy,
-#             'valid_accuracy': valid_accuracy,
-#             'correct_predictions': correct_predictions,
-#             'total_questions': total_predictions,
-#         }
-#         # Update leaderboard only if opted in
-#         if add_to_leaderboard:
-#             update_leaderboard(results)
-#             return "Evaluation completed and added to leaderboard.", load_leaderboard()
-#         else:
-#             return "Evaluation completed but not added to leaderboard.", load_leaderboard()
-#     except Exception as e:
-#         return f"Error during evaluation: {str(e)}", load_leaderboard()
-# # Initialize leaderboard file
-# initialize_leaderboard_file()
-# # Gradio Interface
-# with gr.Blocks() as demo:
-#     gr.Markdown("# Prediction Evaluation Tool with Leaderboard")
-#     with gr.Tabs():
-#         # Submission Tab
-#         with gr.TabItem("🏅 Submission"):
-#             file_input = gr.File(label="Upload Prediction CSV")
-#             model_name_input = gr.Textbox(label="Model Name", placeholder="Enter your model name")
-#             add_to_leaderboard_checkbox = gr.Checkbox(label="Add to Leaderboard?", value=True)
-#             eval_status = gr.Textbox(label="Evaluation Status", interactive=False)
-#             leaderboard_table_preview = gr.Dataframe(
-#                 value=load_leaderboard(),
-#                 label="Leaderboard (Preview)",
-#                 interactive=False,
-#                 wrap=True,
-#             )
-#             eval_button = gr.Button("Evaluate and Update Leaderboard")
-#             eval_button.click(
-#                 evaluate_predictions,
-#                 inputs=[file_input, model_name_input, add_to_leaderboard_checkbox],
-#                 outputs=[eval_status, leaderboard_table_preview],
-#             )
-#         # Leaderboard Tab
-#         with gr.TabItem("🏅 Leaderboard"):
-#             leaderboard_table = gr.Dataframe(
-#                 value=load_leaderboard(),
-#                 label="Leaderboard",
-#                 interactive=False,
-#                 wrap=True,
-#             )
-#             refresh_button = gr.Button("Refresh Leaderboard")
-#             refresh_button.click(
-#                 lambda: load_leaderboard(),
-#                 inputs=[],
-#                 outputs=[leaderboard_table],
-#             )
-#     gr.Markdown(f"Last updated on **{LAST_UPDATED}**")
-# demo.launch()
 import gradio as gr
 import pandas as pd
 import os
@@ -309,70 +144,77 @@ def evaluate_predictions(prediction_file, model_name, add_to_leaderboard):
 initialize_leaderboard_file()
-import gradio as gr
 # Function to set default mode
 css_tech_theme = """
 body {
-    background-color: #f4f4f9;
-    color: #2e2e2e;
-    font-family: Arial, sans-serif;
 }
 a {
-    color: #4a90e2;
 }
 a:hover {
-    color: #7d56c5;
     text-decoration: underline;
 }
 button {
-    background-color: #4a90e2;
     color: #ffffff;
-    border-radius: 5px;
-    padding: 10px;
 }
 button:hover {
-    background-color: #7d56c5;
 }
 .input-row, .tab-content {
-    background-color: #e9eef5;
     border-radius: 8px;
-    padding: 15px;
 }
 .dataframe {
-    color: #2e2e2e;
-    background-color: #f4f4f9;
-    border: 1px solid #4a90e2;
 }
 """
 with gr.Blocks(css=css_tech_theme) as demo:
     gr.Markdown("""
-# 🏆 **Mobile-MMLU Benchmark Competition**
-### 🌟 **Welcome to the Competition Overview**
 ![Competition Logo](mobile_mmlu_sd.jpeg)
 ---
-Welcome to the **Mobile-MMLU Benchmark Competition**. Here you can submit your predictions, view the leaderboard, and track your performance!
 ---
     """)
     with gr.Tabs():
         with gr.TabItem("📖 Overview"):
             gr.Markdown("""
-## 📘 Overview
 Welcome to the **Mobile-MMLU Benchmark Competition**! Evaluate mobile-compatible Large Language Models (LLMs) on **16,186 scenario-based and factual questions** across **80 fields**.
 ---
-### 🌐 **What is Mobile-MMLU?**
 Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
-### 🔍 **How It Works**
 1. **Download the Dataset**
    Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
 2. **Generate Predictions**
@@ -385,17 +227,17 @@ Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized f
    View real-time rankings on the leaderboard.
 ---
-### 🏆 **Competition Tasks**
 Participants must:
 - Optimize their models for **accuracy**.
 - Answer diverse field questions effectively.
 ---
-### 🚀 **Get Started**
 1. Prepare your model using resources on our [GitHub page](https://github.com/your-github-repo).
 2. Submit predictions in the required format.
 3. Track your progress on the leaderboard.
-### 📧 **Contact Us**
 For support, email: [Insert Email Address]
 ---
             """)
@@ -421,18 +263,18 @@ For support, email: [Insert Email Address]
         with gr.TabItem("🏅 Leaderboard"):
             leaderboard_table = gr.Dataframe(
                 value=load_leaderboard(),
-                label="🏆 Leaderboard",
                 interactive=False,
                 wrap=True,
             )
-            refresh_button = gr.Button("🔄 Refresh Leaderboard")
             refresh_button.click(
                 lambda: load_leaderboard(),
                 inputs=[],
                 outputs=[leaderboard_table],
             )
-    gr.Markdown(f"**📅 Last updated:** {LAST_UPDATED}")
 demo.launch()

 import gradio as gr
 import pandas as pd
 import os
 initialize_leaderboard_file()
 # Function to set default mode
 css_tech_theme = """
 body {
+    background-color: #ffffff;
+    color: #333333;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    line-height: 1.6;
 }
 a {
+    color: #007acc;
+    font-weight: 500;
 }
 a:hover {
+    color: #005bb5;
     text-decoration: underline;
 }
 button {
+    background-color: #007acc;
     color: #ffffff;
+    border: none;
+    border-radius: 6px;
+    padding: 10px 15px;
+    font-size: 14px;
+    cursor: pointer;
+    transition: background-color 0.3s ease;
 }
 button:hover {
+    background-color: #005bb5;
 }
 .input-row, .tab-content {
+    background-color: #f9f9fc;
     border-radius: 8px;
+    padding: 20px;
+    box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
 }
 .dataframe {
+    color: #333333;
+    background-color: #ffffff;
+    border: 1px solid #d1d5db;
+    border-radius: 6px;
+    padding: 10px;
+    font-size: 14px;
 }
 """
 with gr.Blocks(css=css_tech_theme) as demo:
     gr.Markdown("""
+# 🏆 Mobile-MMLU Benchmark Competition
+### 🌟 Welcome to the Competition Overview
 ![Competition Logo](mobile_mmlu_sd.jpeg)
 ---
+Welcome to the **Mobile-MMLU Benchmark Competition**. Here you can submit your predictions, view the leaderboard, and track your performance.
 ---
     """)
     with gr.Tabs():
         with gr.TabItem("📖 Overview"):
             gr.Markdown("""
+## Overview
 Welcome to the **Mobile-MMLU Benchmark Competition**! Evaluate mobile-compatible Large Language Models (LLMs) on **16,186 scenario-based and factual questions** across **80 fields**.
 ---
+### What is Mobile-MMLU?
 Mobile-MMLU is a benchmark designed to test the capabilities of LLMs optimized for mobile use. Contribute to advancing mobile AI systems by competing to achieve the highest accuracy.
+### How It Works
 1. **Download the Dataset**
    Access the dataset and instructions on our [GitHub page](https://github.com/your-github-repo).
 2. **Generate Predictions**
    View real-time rankings on the leaderboard.
 ---
+### Competition Tasks
 Participants must:
 - Optimize their models for **accuracy**.
 - Answer diverse field questions effectively.
 ---
+### Get Started
 1. Prepare your model using resources on our [GitHub page](https://github.com/your-github-repo).
 2. Submit predictions in the required format.
 3. Track your progress on the leaderboard.
+### Contact Us
 For support, email: [Insert Email Address]
 ---
             """)
         with gr.TabItem("🏅 Leaderboard"):
             leaderboard_table = gr.Dataframe(
                 value=load_leaderboard(),
+                label="Leaderboard",
                 interactive=False,
                 wrap=True,
             )
+            refresh_button = gr.Button("Refresh Leaderboard")
             refresh_button.click(
                 lambda: load_leaderboard(),
                 inputs=[],
                 outputs=[leaderboard_table],
             )
+    gr.Markdown(f"**Last updated:** {LAST_UPDATED}")
 demo.launch()