Spaces:
Running
Running
| export VLLM_DISABLE_COMPILE_CACHE=1 | |
| model_name=$1 | |
| MODEL_NAMES=( | |
| $model_name | |
| ) | |
| TASKS=( | |
| "math" | |
| "gsm8k" | |
| "amc" | |
| "minerva" | |
| "olympiad" | |
| "aime2024" | |
| "aime2025" | |
| ) | |
| GPU_QUEUE=($(nvidia-smi --query-gpu=index --format=csv,noheader)) | |
| echo "Available GPUs: ${GPU_QUEUE[@]}" | |
| declare -A pids | |
| start_job() { | |
| local gpu_id="$1" | |
| local model="$2" | |
| local task="$3" | |
| echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] Start task [${task}] with model [${model}] on GPU [${gpu_id}] ..." | |
| CUDA_VISIBLE_DEVICES="${gpu_id}" \ | |
| python evaluation/generate.py --model "${model}" --dataset "${task}" & | |
| pids["${gpu_id}"]=$! | |
| } | |
| for MODEL_NAME in "${MODEL_NAMES[@]}"; do | |
| echo "==> Processing model: ${MODEL_NAME}" | |
| TASK_INDEX=0 | |
| NUM_TASKS=${#TASKS[@]} | |
| while :; do | |
| while [ ${#GPU_QUEUE[@]} -gt 0 ] && [ ${TASK_INDEX} -lt ${NUM_TASKS} ]; do | |
| gpu_id="${GPU_QUEUE[0]}" | |
| GPU_QUEUE=("${GPU_QUEUE[@]:1}") | |
| task="${TASKS[${TASK_INDEX}]}" | |
| ((TASK_INDEX++)) | |
| start_job "$gpu_id" "$MODEL_NAME" "$task" | |
| done | |
| if [ ${TASK_INDEX} -ge ${NUM_TASKS} ] && [ ${#pids[@]} -eq 0 ]; then | |
| break | |
| fi | |
| for gpu_id in "${!pids[@]}"; do | |
| pid="${pids[$gpu_id]}" | |
| if ! kill -0 "$pid" 2>/dev/null; then | |
| echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] GPU [${gpu_id}] job finished with PID [${pid}]." | |
| unset pids["$gpu_id"] | |
| GPU_QUEUE+=("$gpu_id") | |
| fi | |
| done | |
| sleep 1 | |
| done | |
| done | |
| python evaluation/results_recheck.py --model_name $model_name & | |
| python evaluation/eval_supergpqa.py --model_path $model_name | |
| python evaluation/eval_bbeh.py --model_path $model_name | |
| python evaluation/eval_mmlupro.py --model_path $model_name | |
| python evaluation/test.py --model_name $model_name | |
| echo "==> All tasks have finished!" |