Spaces:
Restarting
Restarting
Commit
·
995dcf8
1
Parent(s):
95ba712
add
Browse files- README.md +4 -2
- requirements.txt +0 -1
- src/envs.py +0 -4
- src/leaderboard/github_data.py +0 -131
- src/leaderboard/read_evals.py +0 -10
README.md
CHANGED
|
@@ -16,6 +16,7 @@ sdk_version: 5.19.0
|
|
| 16 |
Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
|
| 17 |
|
| 18 |
Results files should have the following format and be stored as json files:
|
|
|
|
| 19 |
```json
|
| 20 |
{
|
| 21 |
"config": {
|
|
@@ -40,7 +41,8 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
|
|
| 40 |
|
| 41 |
# Code logic for more complex edits
|
| 42 |
|
| 43 |
-
You'll find
|
|
|
|
| 44 |
- the main table' columns names and properties in `src/display/utils.py`
|
| 45 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
| 46 |
-
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
|
|
|
| 16 |
Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
|
| 17 |
|
| 18 |
Results files should have the following format and be stored as json files:
|
| 19 |
+
|
| 20 |
```json
|
| 21 |
{
|
| 22 |
"config": {
|
|
|
|
| 41 |
|
| 42 |
# Code logic for more complex edits
|
| 43 |
|
| 44 |
+
You'll find
|
| 45 |
+
|
| 46 |
- the main table' columns names and properties in `src/display/utils.py`
|
| 47 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
| 48 |
+
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
requirements.txt
CHANGED
|
@@ -10,7 +10,6 @@ matplotlib
|
|
| 10 |
numpy
|
| 11 |
pandas
|
| 12 |
python-dateutil
|
| 13 |
-
requests
|
| 14 |
tqdm
|
| 15 |
transformers
|
| 16 |
tokenizers>=0.15.0
|
|
|
|
| 10 |
numpy
|
| 11 |
pandas
|
| 12 |
python-dateutil
|
|
|
|
| 13 |
tqdm
|
| 14 |
transformers
|
| 15 |
tokenizers>=0.15.0
|
src/envs.py
CHANGED
|
@@ -11,10 +11,6 @@ LOCAL_MODE = True
|
|
| 11 |
# Get token from environment or use None in local mode
|
| 12 |
TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
|
| 13 |
|
| 14 |
-
# GitHub API token for fetching repo metadata
|
| 15 |
-
# This increases rate limits from 60 to 5000 requests per hour
|
| 16 |
-
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
|
| 17 |
-
|
| 18 |
OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 19 |
# ----------------------------------
|
| 20 |
|
|
|
|
| 11 |
# Get token from environment or use None in local mode
|
| 12 |
TOKEN = os.environ.get("HF_TOKEN") if not LOCAL_MODE else None
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
OWNER = "libvulnwatch" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 15 |
# ----------------------------------
|
| 16 |
|
src/leaderboard/github_data.py
DELETED
|
@@ -1,131 +0,0 @@
|
|
| 1 |
-
"""Utilities for fetching GitHub repository data"""
|
| 2 |
-
|
| 3 |
-
import os
|
| 4 |
-
import requests
|
| 5 |
-
import time
|
| 6 |
-
from functools import lru_cache
|
| 7 |
-
from urllib.parse import urlparse
|
| 8 |
-
|
| 9 |
-
# Import GitHub token from envs
|
| 10 |
-
from src.envs import GITHUB_TOKEN
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
def extract_repo_path(repo_url):
|
| 14 |
-
"""Extract org/repo path from GitHub URL
|
| 15 |
-
|
| 16 |
-
Args:
|
| 17 |
-
repo_url: GitHub repository URL
|
| 18 |
-
|
| 19 |
-
Returns:
|
| 20 |
-
Repository path in format "org/repo"
|
| 21 |
-
"""
|
| 22 |
-
if not repo_url:
|
| 23 |
-
return None
|
| 24 |
-
|
| 25 |
-
# Handle both URL and org/repo format
|
| 26 |
-
if repo_url.startswith(("http://", "https://")):
|
| 27 |
-
parsed = urlparse(repo_url)
|
| 28 |
-
path = parsed.path.strip("/")
|
| 29 |
-
|
| 30 |
-
# Remove .git suffix if present
|
| 31 |
-
if path.endswith(".git"):
|
| 32 |
-
path = path[:-4]
|
| 33 |
-
|
| 34 |
-
return path
|
| 35 |
-
|
| 36 |
-
# Already in org/repo format
|
| 37 |
-
return repo_url
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
@lru_cache(maxsize=128)
|
| 41 |
-
def get_github_data(repo_path, use_token=True):
|
| 42 |
-
"""Fetch repository data from GitHub API
|
| 43 |
-
|
| 44 |
-
Args:
|
| 45 |
-
repo_path: Repository path in format "org/repo"
|
| 46 |
-
use_token: Whether to use GitHub token if available
|
| 47 |
-
|
| 48 |
-
Returns:
|
| 49 |
-
Dictionary with repository data including stars and license
|
| 50 |
-
"""
|
| 51 |
-
if not repo_path:
|
| 52 |
-
return {"github_stars": 0, "license": "Unknown"}
|
| 53 |
-
|
| 54 |
-
api_url = f"https://api.github.com/repos/{repo_path}"
|
| 55 |
-
headers = {"Accept": "application/vnd.github.v3+json"}
|
| 56 |
-
|
| 57 |
-
# Add token for higher rate limits if available
|
| 58 |
-
if use_token and GITHUB_TOKEN:
|
| 59 |
-
headers["Authorization"] = f"token {GITHUB_TOKEN}"
|
| 60 |
-
|
| 61 |
-
try:
|
| 62 |
-
response = requests.get(api_url, headers=headers)
|
| 63 |
-
|
| 64 |
-
if response.status_code == 200:
|
| 65 |
-
data = response.json()
|
| 66 |
-
|
| 67 |
-
# Extract relevant fields
|
| 68 |
-
result = {
|
| 69 |
-
"github_stars": data.get("stargazers_count", 0),
|
| 70 |
-
"license": data.get("license", {}).get("spdx_id", "Unknown"),
|
| 71 |
-
"full_name": data.get("full_name", repo_path),
|
| 72 |
-
"created_at": data.get("created_at", ""),
|
| 73 |
-
"updated_at": data.get("updated_at", ""),
|
| 74 |
-
"language": data.get("language", ""),
|
| 75 |
-
"forks_count": data.get("forks_count", 0),
|
| 76 |
-
"default_branch": data.get("default_branch", "main"),
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
-
# If license is None or "NOASSERTION", use "Unknown"
|
| 80 |
-
if not result["license"] or result["license"] == "NOASSERTION":
|
| 81 |
-
result["license"] = "Unknown"
|
| 82 |
-
|
| 83 |
-
return result
|
| 84 |
-
else:
|
| 85 |
-
print(f"GitHub API error for {repo_path}: {response.status_code} - {response.text}")
|
| 86 |
-
return {"github_stars": 0, "license": "Unknown"}
|
| 87 |
-
|
| 88 |
-
except Exception as e:
|
| 89 |
-
print(f"Error fetching GitHub data for {repo_path}: {e}")
|
| 90 |
-
return {"github_stars": 0, "license": "Unknown"}
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
def update_assessment_with_github_data(assessment, force_update=False):
|
| 94 |
-
"""Update assessment with data from GitHub
|
| 95 |
-
|
| 96 |
-
Args:
|
| 97 |
-
assessment: AssessmentResult object
|
| 98 |
-
force_update: Whether to force update even if values exist
|
| 99 |
-
|
| 100 |
-
Returns:
|
| 101 |
-
Updated AssessmentResult object
|
| 102 |
-
"""
|
| 103 |
-
# Skip if no data is missing or if force_update is False
|
| 104 |
-
if not force_update and assessment.stars > 0 and assessment.license != "?":
|
| 105 |
-
return assessment
|
| 106 |
-
|
| 107 |
-
# Try getting repo path from library_name first
|
| 108 |
-
repo_path = None
|
| 109 |
-
if assessment.library_name and "/" in assessment.library_name:
|
| 110 |
-
repo_path = assessment.library_name
|
| 111 |
-
|
| 112 |
-
# Fall back to repository_url if available
|
| 113 |
-
if not repo_path and hasattr(assessment, 'repository_url') and assessment.repository_url:
|
| 114 |
-
repo_path = extract_repo_path(assessment.repository_url)
|
| 115 |
-
|
| 116 |
-
# If we still don't have a path, reconstruct from org/repo
|
| 117 |
-
if not repo_path and assessment.org and assessment.repo:
|
| 118 |
-
repo_path = f"{assessment.org}/{assessment.repo}"
|
| 119 |
-
|
| 120 |
-
# If we found a valid path, fetch and update
|
| 121 |
-
if repo_path:
|
| 122 |
-
github_data = get_github_data(repo_path)
|
| 123 |
-
|
| 124 |
-
# Update if data is missing or force_update is True
|
| 125 |
-
if force_update or assessment.stars == 0:
|
| 126 |
-
assessment.stars = github_data["github_stars"]
|
| 127 |
-
|
| 128 |
-
if force_update or assessment.license == "?":
|
| 129 |
-
assessment.license = github_data["license"]
|
| 130 |
-
|
| 131 |
-
return assessment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -9,7 +9,6 @@ import numpy as np
|
|
| 9 |
|
| 10 |
from src.display.formatting import make_clickable_library, make_clickable_report
|
| 11 |
from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
|
| 12 |
-
from src.leaderboard.github_data import update_assessment_with_github_data
|
| 13 |
|
| 14 |
|
| 15 |
@dataclass
|
|
@@ -31,7 +30,6 @@ class AssessmentResult:
|
|
| 31 |
availability: bool = True
|
| 32 |
verified: bool = False
|
| 33 |
report_url: str = "" # URL to detailed assessment report
|
| 34 |
-
repository_url: str = "" # GitHub repository URL
|
| 35 |
|
| 36 |
@classmethod
|
| 37 |
def init_from_json_file(self, json_filepath):
|
|
@@ -90,7 +88,6 @@ class AssessmentResult:
|
|
| 90 |
verified=assessment.get("independently_verified", False),
|
| 91 |
last_update=last_update,
|
| 92 |
report_url=assessment.get("report_url", ""),
|
| 93 |
-
repository_url=assessment.get("repository_url", ""),
|
| 94 |
)
|
| 95 |
|
| 96 |
def update_with_request_file(self, requests_path):
|
|
@@ -102,15 +99,8 @@ class AssessmentResult:
|
|
| 102 |
request = json.load(f)
|
| 103 |
self.library_type = LibraryType.from_str(request.get("library_type", ""))
|
| 104 |
self.stars = request.get("stars", 0)
|
| 105 |
-
# Add repository URL if not already set
|
| 106 |
-
if not self.repository_url and "repository_url" in request:
|
| 107 |
-
self.repository_url = request.get("repository_url", "")
|
| 108 |
except Exception:
|
| 109 |
print(f"Could not find request file for {self.library_name} version {self.version}")
|
| 110 |
-
|
| 111 |
-
# Try to get GitHub stars and license if missing
|
| 112 |
-
if self.stars == 0 or self.license == "?":
|
| 113 |
-
update_assessment_with_github_data(self)
|
| 114 |
|
| 115 |
def to_dict(self):
|
| 116 |
"""Converts the Assessment Result to a dict compatible with our dataframe display"""
|
|
|
|
| 9 |
|
| 10 |
from src.display.formatting import make_clickable_library, make_clickable_report
|
| 11 |
from src.display.utils import AutoEvalColumn, LibraryType, Tasks, Language, AssessmentStatus
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
@dataclass
|
|
|
|
| 30 |
availability: bool = True
|
| 31 |
verified: bool = False
|
| 32 |
report_url: str = "" # URL to detailed assessment report
|
|
|
|
| 33 |
|
| 34 |
@classmethod
|
| 35 |
def init_from_json_file(self, json_filepath):
|
|
|
|
| 88 |
verified=assessment.get("independently_verified", False),
|
| 89 |
last_update=last_update,
|
| 90 |
report_url=assessment.get("report_url", ""),
|
|
|
|
| 91 |
)
|
| 92 |
|
| 93 |
def update_with_request_file(self, requests_path):
|
|
|
|
| 99 |
request = json.load(f)
|
| 100 |
self.library_type = LibraryType.from_str(request.get("library_type", ""))
|
| 101 |
self.stars = request.get("stars", 0)
|
|
|
|
|
|
|
|
|
|
| 102 |
except Exception:
|
| 103 |
print(f"Could not find request file for {self.library_name} version {self.version}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
def to_dict(self):
|
| 106 |
"""Converts the Assessment Result to a dict compatible with our dataframe display"""
|