gardarjuto's picture
refactor(backend): Consolidate all source files into a single 'app' package
d4577f4
import os
import json
import logging
from typing import List, Dict, Any
from pathlib import Path
from huggingface_hub import snapshot_download
from fastapi import HTTPException
from app.config import (
QUEUE_REPO,
RESULTS_REPO,
EVAL_REQUESTS_PATH,
EVAL_RESULTS_PATH,
HF_TOKEN
)
from app.core.cache import cache_config
logger = logging.getLogger(__name__)
try:
from app.leaderboard.read_evals import get_raw_eval_results
from app.populate import get_leaderboard_df
from app.display.utils import COLS, BENCHMARK_COLS, Tasks
except ImportError as e:
# Fallback for development without mounted volume
logger.warning(f"Could not import original modules: {e}")
# Define minimal fallbacks
COLS = ["Model", "Average ⬆️", "Type", "Precision", "Architecture", "Hub License", "Hub ❤️", "#Params (B)", "Available on the hub", "Model sha"]
BENCHMARK_COLS = ["WinoGrande-IS (3-shot)", "GED", "Inflection (1-shot)", "Belebele (IS)", "ARC-Challenge-IS", "WikiQA-IS"]
class MockTask:
def __init__(self, name, col_name):
self.name = name
self.col_name = col_name
class Tasks:
task0 = MockTask("winogrande_is", "WinoGrande-IS (3-shot)")
task1 = MockTask("ged", "GED")
task2 = MockTask("inflection", "Inflection (1-shot)")
task5 = MockTask("belebele_is", "Belebele (IS)")
task6 = MockTask("arc_challenge_is", "ARC-Challenge-IS")
task7 = MockTask("wiki_qa_is", "WikiQA-IS")
class IcelandicLeaderboardService:
def __init__(self):
self.results_path = EVAL_RESULTS_PATH
self.requests_path = EVAL_REQUESTS_PATH
async def _ensure_data_available(self):
"""Ensure evaluation data is available locally"""
try:
# Download results if not exists or empty
if not os.path.exists(self.results_path) or not os.listdir(self.results_path):
logger.info(f"Downloading results to {self.results_path}")
snapshot_download(
repo_id=RESULTS_REPO,
local_dir=self.results_path,
repo_type="dataset",
token=HF_TOKEN,
tqdm_class=None,
etag_timeout=30
)
# Download requests if not exists or empty
if not os.path.exists(self.requests_path) or not os.listdir(self.requests_path):
logger.info(f"Downloading requests to {self.requests_path}")
snapshot_download(
repo_id=QUEUE_REPO,
local_dir=self.requests_path,
repo_type="dataset",
token=HF_TOKEN,
tqdm_class=None,
etag_timeout=30
)
except Exception as e:
logger.error(f"Failed to download data: {e}")
raise HTTPException(status_code=500, detail=f"Failed to download data: {str(e)}")
async def fetch_raw_data(self) -> List[Dict[str, Any]]:
"""Fetch raw leaderboard data using original Icelandic processing logic"""
try:
await self._ensure_data_available()
logger.info("Processing Icelandic leaderboard data")
# Try to use original processing logic if available
try:
raw_data, df = get_leaderboard_df(
self.results_path,
self.requests_path,
COLS,
BENCHMARK_COLS
)
# Convert DataFrame to list of dictionaries
data = df.to_dict('records')
logger.info(f"Processed {len(data)} Icelandic leaderboard entries")
return data
except NameError:
# Fallback: return mock data for testing
logger.warning("Using mock data - original processing modules not available")
return self._generate_mock_data()
except Exception as e:
logger.error(f"Failed to fetch Icelandic leaderboard data: {e}")
raise HTTPException(status_code=500, detail=str(e))
def _generate_mock_data(self) -> List[Dict[str, Any]]:
"""Generate mock data for testing when original modules aren't available"""
return [
{
"Model": "test-model/icelandic-gpt-7b",
"Average ⬆️": 85.5,
"Type": "fine-tuned",
"T": "🔶",
"Precision": "bfloat16",
"Architecture": "LlamaForCausalLM",
"Hub License": "apache-2.0",
"Hub ❤️": 42,
"#Params (B)": 7.0,
"Available on the hub": True,
"Model sha": "abc123def456",
"WinoGrande-IS (3-shot)": 78.5,
"GED": 92.3,
"Inflection (1-shot)": 85.1,
"Belebele (IS)": 80.7,
"ARC-Challenge-IS": 76.2,
"WikiQA-IS": 89.4,
"Reasoning": False,
"Note": ""
},
{
"Model": "test-model/icelandic-llama-13b",
"Average ⬆️": 88.2,
"Type": "instruction-tuned",
"T": "⭕",
"Precision": "float16",
"Architecture": "LlamaForCausalLM",
"Hub License": "mit",
"Hub ❤️": 156,
"#Params (B)": 13.0,
"Available on the hub": True,
"Model sha": "def456abc789",
"WinoGrande-IS (3-shot)": 82.1,
"GED": 94.8,
"Inflection (1-shot)": 87.9,
"Belebele (IS)": 85.3,
"ARC-Challenge-IS": 79.8,
"WikiQA-IS": 91.2,
"Reasoning": True,
"Note": "reasoning model with 32k thinking budget"
}
]
async def get_formatted_data(self) -> List[Dict[str, Any]]:
"""Get formatted leaderboard data compatible with React frontend"""
try:
raw_data = await self.fetch_raw_data()
formatted_data = []
for item in raw_data:
try:
formatted_item = await self.transform_data(item)
formatted_data.append(formatted_item)
except Exception as e:
logger.error(f"Failed to format entry: {e}")
continue
logger.info(f"Formatted {len(formatted_data)} entries for frontend")
return formatted_data
except Exception as e:
logger.error(f"Failed to format leaderboard data: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def transform_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Transform Icelandic leaderboard data into format expected by React frontend"""
# Create unique ID and clean model name
raw_model_name = data.get("Model", "Unknown")
# Extract clean model name from HTML if present
if '<a target="_blank" href=' in raw_model_name:
# Parse HTML to extract clean model name
import re
match = re.search(r'>([^<]+)</a>', raw_model_name)
model_name = match.group(1) if match else raw_model_name
else:
model_name = raw_model_name
precision = data.get("Precision", "Unknown")
revision = data.get("Model sha", "Unknown")
unique_id = f"{model_name}_{precision}_{revision}"
# Map Icelandic tasks to evaluations format
evaluations = {}
task_mapping = {
"WinoGrande-IS (3-shot)": "winogrande_is",
"GED": "ged",
"Inflection (1-shot)": "inflection",
"Belebele (IS)": "belebele_is",
"ARC-Challenge-IS": "arc_challenge_is",
"WikiQA-IS": "wiki_qa_is"
}
for task_display_name, task_key in task_mapping.items():
if task_display_name in data:
evaluations[task_key] = {
"name": task_display_name,
"value": data.get(task_display_name, 0),
"normalized_score": data.get(task_display_name, 0)
}
# Extract model type and clean it
model_type_symbol = data.get("T", "")
model_type_name = data.get("Type", "Unknown")
# Map Icelandic model types to frontend format
type_mapping = {
"pretrained": "pretrained",
"fine-tuned": "fine-tuned",
"instruction-tuned": "instruction-tuned",
"RL-tuned": "RL-tuned"
}
clean_model_type = type_mapping.get(model_type_name, model_type_name)
features = {
"is_not_available_on_hub": not data.get("Available on the hub", True),
"is_merged": False, # Not tracked in Icelandic leaderboard
"is_moe": False, # Not tracked in Icelandic leaderboard
"is_flagged": False, # Not tracked in Icelandic leaderboard
"is_official_provider": False # Not tracked in Icelandic leaderboard
}
metadata = {
"upload_date": None, # Not available in Icelandic data
"submission_date": None, # Not available in Icelandic data
"generation": None, # Not available in Icelandic data
"base_model": None, # Not available in Icelandic data
"hub_license": data.get("Hub License", ""),
"hub_hearts": data.get("Hub ❤️", 0),
"params_billions": data.get("#Params (B)", 0),
"co2_cost": 0 # Not tracked in Icelandic leaderboard
}
transformed_data = {
"id": unique_id,
"model": {
"name": model_name,
"sha": revision,
"precision": precision,
"type": clean_model_type,
"weight_type": None, # Not available in Icelandic data
"architecture": data.get("Architecture", "Unknown"),
"average_score": data.get("Average ⬆️", 0),
"has_chat_template": False, # Not tracked in Icelandic leaderboard
"reasoning": data.get("Reasoning", False), # Reasoning enabled flag
"note": data.get("Note", "") # Extra model information
},
"evaluations": evaluations,
"features": features,
"metadata": metadata
}
return transformed_data