File size: 3,041 Bytes
fe79a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0babe14
fe79a14
 
0babe14
 
 
 
fe79a14
 
 
0babe14
fe79a14
 
0babe14
fe79a14
 
 
0babe14
 
fe79a14
0babe14
 
fe79a14
0babe14
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
from pathlib import Path
import logging

from leaderboard.leaderboard import LeaderboardApp

from about import render_about
from submission import render_submit

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(module)s - %(message)s"
)
logger = logging.getLogger(__name__)

def create_app():
    """
    Creates and configures the main Gradio application for MIZAN: A Persian LLM Leaderboard.
    """
    logger.info("Initializing MIZAN: A Persian LLM Leaderboard application...")

    config_file_path = Path("leaderboard/leaderboard_config.yaml")

    if not config_file_path.exists():
        logger.error(f"CRITICAL: Leaderboard configuration file not found at {config_file_path}. The application may not function correctly.")

    leaderboard_processor = LeaderboardApp(config_path=config_file_path)

    logger.info("Loading and processing leaderboard data...")
    leaderboard_processor.load_data()
    leaderboard_processor.handle_nulls_in_averages()
    leaderboard_processor.generate_model_rankings()
    logger.info("Leaderboard data processing complete.")

    with gr.Blocks(title="MIZAN: A Persian LLM Leaderboard") as demo:
        gr.Markdown("<h1 style='text-align: center; width: 100%; margin-bottom: 10px;'>🇮🇷 MIZAN: A Persian LLM Leaderboard</h1>")
        gr.Markdown("""<p style='font-size: 1.1em; text-align: center; max-width: 800px; margin: 0 auto 20px auto;'>
        MIZAN: A Persian LLM Leaderboard is a comprehensive benchmark for evaluating Large Language Models (LLMs) in Persian.
        It combines existing datasets, translated benchmarks, and new Persian-specific data to assess LLM capabilities in understanding,
        generation, reasoning, and knowledge relevant to the Persian language and culture.
        MIZAN provides a standardized tool for researchers and developers to measure Persian LLM performance.
        </p>""")

        with gr.Tabs():
            with gr.TabItem("LLM Benchmark"):
                logger.info("Creating 'LLM Benchmark' tab content...")
                leaderboard_processor.create_gradio_interface() 
                logger.info("'LLM Benchmark' tab content created.")

            with gr.TabItem("About MIZAN"): 
                logger.info("Creating 'About MIZAN' tab content...") 
                render_about()
                logger.info("'About MIZAN' tab content created.") 

            with gr.TabItem("Request New Model"):
                logger.info("Creating 'Submit Your Model' tab content...")
                render_submit() 
                logger.info("'Submit Your Model' tab content created.")

    logger.info("MIZAN: A Persian LLM Leaderboard application interface created.") 
    return demo

if __name__ == "__main__":
    logger.info("Launching MIZAN: A Persian LLM Leaderboard application...") 
    pull_app = create_app()
    pull_app.launch(
        debug=True, 
        share=True 
    )
    logger.info("MIZAN: A Persian LLM Leaderboard application has been launched.")