File size: 6,854 Bytes
af86b7d
 
 
 
 
 
 
 
 
e4e854a
af86b7d
 
 
 
 
 
 
 
e4e854a
af86b7d
 
e4e854a
af86b7d
 
 
 
 
e4e854a
af86b7d
 
e4e854a
 
 
 
 
 
af86b7d
 
e4e854a
af86b7d
 
 
 
e4e854a
af86b7d
 
 
e4e854a
af86b7d
 
 
 
 
 
 
e4e854a
af86b7d
 
 
 
 
 
 
 
e4e854a
af86b7d
e4e854a
 
 
 
 
 
 
 
af86b7d
e4e854a
 
 
 
af86b7d
e4e854a
af86b7d
 
e4e854a
af86b7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4e854a
af86b7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""
Hugging Face Leaderboard Service
Manages leaderboard data persistence using HF Hub
"""

import json
import os
from datetime import datetime
from typing import List, Dict, Optional
from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
import logging

logger = logging.getLogger(__name__)


class HFLeaderboardService:
    """
    Service for managing leaderboard data on Hugging Face Hub
    Stores leaderboard as a JSON file in the current HF Space repository
    """

    def __init__(self, repo_id: Optional[str] = None, token: Optional[str] = None):
        """
        Initialize HF Leaderboard Service

        Args:
            repo_id: HF Hub repository ID (format: username/repo-name)
                    If not provided, uses SPACE_ID env var (auto-set in HF Spaces)
            token: HF API token (if not provided, uses HF_TOKEN env var)
        """
        # Use SPACE_ID if available (automatically set in HF Spaces)
        self.repo_id = repo_id or os.getenv("SPACE_ID")
        if not self.repo_id:
            raise ValueError("No repo_id provided and SPACE_ID env var not set. Cannot determine target repository.")

        self.token = token or os.getenv("HF_TOKEN")
        self.api = HfApi()
        self.leaderboard_file = "leaderboard.json"
        self.repo_type = "space"  # Store in Space repo, not separate dataset

        if not self.token:
            logger.warning("No HF token provided. Read-only mode (if repo is public)")

        logger.info(f"HF Leaderboard Service initialized for Space: {self.repo_id}")

    def _save_to_hub(self, data: List[Dict]):
        """
        Save leaderboard data to HF Hub using best practice commit pattern

        Args:
            data: List of leaderboard entries
        """
        if not self.token:
            raise ValueError("No HF token available for writing")

        # Create temporary file with leaderboard data
        temp_file = f"/tmp/{self.leaderboard_file}"
        with open(temp_file, "w") as f:
            json.dump({
                "leaderboard": data,
                "last_updated": datetime.utcnow().isoformat(),
                "version": "1.0"
            }, f, indent=2)

        # Commit to HF Hub using best practice pattern
        try:
            operations = [
                CommitOperationAdd(
                    path_or_fileobj=temp_file,
                    path_in_repo=self.leaderboard_file
                )
            ]

            self.api.create_commit(
                repo_id=self.repo_id,
                operations=operations,
                commit_message=f"update leaderboard - {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} utc",
                repo_type=self.repo_type,
                token=self.token
            )

            logger.info(f"Leaderboard saved to HF Hub: {self.repo_id}")
        except Exception as e:
            logger.error(f"Failed to commit to HF Hub: {e}")
            raise
        finally:
            # Clean up temp file
            if os.path.exists(temp_file):
                os.remove(temp_file)

    def _load_from_hub(self) -> List[Dict]:
        """
        Load leaderboard data from HF Hub

        Returns:
            List of leaderboard entries
        """
        try:
            # Download file from HF Hub
            file_path = hf_hub_download(
                repo_id=self.repo_id,
                filename=self.leaderboard_file,
                repo_type=self.repo_type,
                token=self.token
            )

            with open(file_path, "r") as f:
                data = json.load(f)
                return data.get("leaderboard", [])
        except Exception as e:
            logger.warning(f"Failed to load from HF Hub: {e}. Returning empty leaderboard.")
            return []

    def get_leaderboard(self) -> List[Dict]:
        """
        Get current leaderboard data

        Returns:
            List of leaderboard entries sorted by rank
        """
        return self._load_from_hub()

    def add_entry(self, entry: Dict) -> bool:
        """
        Add new entry to leaderboard

        Args:
            entry: Leaderboard entry with keys: initials, level, round, passagesPassed, date

        Returns:
            True if successful, False otherwise
        """
        if not self.token:
            raise ValueError("No HF token available for writing")

        try:
            # Load current leaderboard
            leaderboard = self._load_from_hub()

            # Add new entry
            leaderboard.append(entry)

            # Sort leaderboard (highest level first, then round, then passages)
            leaderboard = self._sort_leaderboard(leaderboard)

            # Keep only top 10
            leaderboard = leaderboard[:10]

            # Save back to hub
            self._save_to_hub(leaderboard)

            logger.info(f"Added entry to leaderboard: {entry['initials']} - Level {entry['level']}")
            return True
        except Exception as e:
            logger.error(f"Failed to add entry: {e}")
            return False

    def update_leaderboard(self, leaderboard: List[Dict]) -> bool:
        """
        Replace entire leaderboard with new data

        Args:
            leaderboard: Complete leaderboard data

        Returns:
            True if successful, False otherwise
        """
        if not self.token:
            raise ValueError("No HF token available for writing")

        try:
            # Sort and limit to top 10
            sorted_board = self._sort_leaderboard(leaderboard)[:10]
            self._save_to_hub(sorted_board)
            return True
        except Exception as e:
            logger.error(f"Failed to update leaderboard: {e}")
            return False

    def _sort_leaderboard(self, entries: List[Dict]) -> List[Dict]:
        """
        Sort leaderboard entries by performance

        Args:
            entries: List of leaderboard entries

        Returns:
            Sorted list (best first)
        """
        return sorted(entries, key=lambda x: (
            -x.get('level', 0),  # Higher level is better
            -x.get('round', 0),  # Higher round is better
            -x.get('passagesPassed', 0),  # More passages is better
            x.get('date', '')  # Newer is better (date sorts ascending)
        ))

    def clear_leaderboard(self) -> bool:
        """
        Clear all leaderboard data (admin function)

        Returns:
            True if successful, False otherwise
        """
        if not self.token:
            raise ValueError("No HF token available for writing")

        try:
            self._save_to_hub([])
            logger.info("Leaderboard cleared")
            return True
        except Exception as e:
            logger.error(f"Failed to clear leaderboard: {e}")
            return False