Spaces:

yourbench
/

demo

Running on CPU Upgrade

File size: 2,325 Bytes

c2b7f1b
 
 
 
 
 
 
d2805fc
ed6c708
 
c2b7f1b
 
 
ed6c708
 
 
b9d391c
 
 
 
c2b7f1b
ed6c708
 
 
 
 
 
 
 
 
c2b7f1b
3964afa
d2805fc
da60a9e
d2805fc
 
 
 
3964afa
97bea1c
 
 
d2805fc
 
c2b7f1b
ed6c708
 
c2b7f1b
 
 
8695aa8
 
 
c2b7f1b
8695aa8
 
c2b7f1b

"""
Central configuration for models and providers

This file centralizes all configurations related to models and providers used in the application.
"""

# Definition of preferred providers, used in get_available_model_provider.py
# PREFERRED_PROVIDERS = ["sambanova", "novita"]
# increased number of providers
PREFERRED_PROVIDERS = ["fireworks-ai", "cerebras", "sambanova", "together", "nebius", "novita", "hyperbolic", "cohere", "hf-inference"]

# Default models to evaluate for evaluation
DEFAULT_EVALUATION_MODELS = [
    "Qwen/Qwen3-30B-A3B",
    "Qwen/Qwen3-235B-A22B",
    "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
    "deepseek-ai/DeepSeek-V3-0324",
    "google/gemma-3-27b-it",
    "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
]
# DEFAULT_EVALUATION_MODELS = [
#     "Qwen/QwQ-32B",
#     "Qwen/Qwen2.5-72B-Instruct",
#     "Qwen/Qwen2.5-32B-Instruct",
#     "meta-llama/Llama-3.1-8B-Instruct",
#     "meta-llama/Llama-3.3-70B-Instruct",
#     "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
#     "mistralai/Mistral-Small-24B-Instruct-2501",
# ]

# Alternative models to use if default model is not available
ALTERNATIVE_BENCHMARK_MODELS = [
    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
    "meta-llama/Llama-3.3-70B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
    "Qwen/Qwen2.5-72B-Instruct",
    "mistralai/Mistral-Small-24B-Instruct-2501",
    # Open-source models that can work without authentication
    "HuggingFaceH4/zephyr-7b-beta",
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    "microsoft/phi-2",
]

# Required model for create_bench_config_file.py (only one default model)
DEFAULT_BENCHMARK_MODEL = "Qwen/Qwen3-30B-A3B"
# DEFAULT_BENCHMARK_MODEL = "Qwen/Qwen2.5-32B-Instruct"

# Models by roles for benchmark configuration
# All roles use the default model except chunking
BENCHMARK_MODEL_ROLES = {
    "ingestion": [DEFAULT_BENCHMARK_MODEL],
    "summarization": [DEFAULT_BENCHMARK_MODEL],
    "chunking": ["intfloat/multilingual-e5-large-instruct"],
    "single_shot_question_generation": [DEFAULT_BENCHMARK_MODEL],
    "multi_hop_question_generation": [DEFAULT_BENCHMARK_MODEL],
}

# Default evaluation timeout (in seconds)
DEFAULT_EVALUATION_TIMEOUT = 60.0

# Default benchmark timeout (in seconds)
DEFAULT_BENCHMARK_TIMEOUT = 300.0