Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
·
b08a0ac
1
Parent(s):
46f6b9d
Add RWKV to nlp-eval.yaml
Browse files- pegasus/nlp-eval.yaml +3 -0
pegasus/nlp-eval.yaml
CHANGED
|
@@ -19,6 +19,7 @@
|
|
| 19 |
- project-baize/baize-v2-7B
|
| 20 |
- StabilityAI/stablelm-tuned-alpha-7b
|
| 21 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
|
|
|
| 22 |
|
| 23 |
- command:
|
| 24 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
|
|
@@ -41,6 +42,7 @@
|
|
| 41 |
- project-baize/baize-v2-7B
|
| 42 |
- StabilityAI/stablelm-tuned-alpha-7b
|
| 43 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
|
|
|
| 44 |
|
| 45 |
- command:
|
| 46 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
|
|
@@ -63,6 +65,7 @@
|
|
| 63 |
- project-baize/baize-v2-7B
|
| 64 |
- StabilityAI/stablelm-tuned-alpha-7b
|
| 65 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
|
|
|
| 66 |
|
| 67 |
- command:
|
| 68 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json
|
|
|
|
| 19 |
- project-baize/baize-v2-7B
|
| 20 |
- StabilityAI/stablelm-tuned-alpha-7b
|
| 21 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
| 22 |
+
- RWKV/rwkv-raven-7b
|
| 23 |
|
| 24 |
- command:
|
| 25 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks hellaswag --num_fewshot 10 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/hellaswag.json
|
|
|
|
| 42 |
- project-baize/baize-v2-7B
|
| 43 |
- StabilityAI/stablelm-tuned-alpha-7b
|
| 44 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
| 45 |
+
- RWKV/rwkv-raven-7b
|
| 46 |
|
| 47 |
- command:
|
| 48 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-causal-experimental --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks truthfulqa_mc --num_fewshot 0 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/truthfulqa_mc.json
|
|
|
|
| 65 |
- project-baize/baize-v2-7B
|
| 66 |
- StabilityAI/stablelm-tuned-alpha-7b
|
| 67 |
- togethercomputer/RedPajama-INCITE-7B-Chat
|
| 68 |
+
- RWKV/rwkv-raven-7b
|
| 69 |
|
| 70 |
- command:
|
| 71 |
- docker exec leaderboard{{ gpu }} python lm-evaluation-harness/main.py --device cuda --no_cache --model hf-seq2seq --model_args pretrained={{ model }},trust_remote_code=True,use_accelerate=True --tasks arc_challenge --num_fewshot 25 --output_path /data/leaderboard/benchmark/nlp/{{ replace model "/" "--" }}/arc_challenge.json
|