mlfoundations-dev/hp_ablations_grid_qwen_bsz512_lr8e-6
Text Generation
•
8B
•
Updated
•
2
mlfoundations-dev/hp_ablations_grid_qwen_bsz256_lr5e-6
Text Generation
•
8B
•
Updated
•
2
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr5e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
6
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr5e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
6
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr5e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr2e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
4
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr5e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr5e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr2e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
4
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr2e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr2e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/llama3-1_8b_webinstruct_original_700k
Text Generation
•
8B
•
Updated
•
4
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr2e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr2e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/hp_ablations_grid_mistral_bsz2048_lr2e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
5
mlfoundations-dev/hp_ablations_grid_mistral_bsz4096_lr2e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr2e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr5e-6_scheduler-cosine-warmup0.15-minlr5e-7
Text Generation
•
7B
•
Updated
•
2
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr5e-6_scheduler-cosine-warmup0.15
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/hp_ablations_grid_mistral_bsz1024_lr5e-6_scheduler-cosine-warmup0.05-minlr5e-7
Text Generation
•
7B
•
Updated
•
3
mlfoundations-dev/oh_v1.3_slim_orca_x4
Text Generation
•
8B
•
Updated
•
5
mlfoundations-dev/original_tiger_dataset_small
Text Generation
•
8B
•
Updated
•
4
mlfoundations-dev/hp_ablations_gemma_epoch4
Text Generation
•
9B
•
Updated
•
3
mlfoundations-dev/hp_ablations_gemma_epoch2
Text Generation
•
9B
•
Updated
•
3
mlfoundations-dev/oh-dcft-v3.1-llama-3.1-8b
Text Generation
•
8B
•
Updated
•
4
•
1
mlfoundations-dev/oh-dcft-v3.1-claude-3-5-haiku-20241022
Text Generation
•
8B
•
Updated
•
4
•
5
mlfoundations-dev/oh_v1.3_metamath_x8
Text Generation
•
8B
•
Updated
•
3
mlfoundations-dev/hp_ablations_gemma_epoch3
Text Generation
•
9B
•
Updated
•
3
mlfoundations-dev/hp_ablations_gemma_epoch4_dcftv1.2
Text Generation
•
9B
•
Updated
•
3
mlfoundations-dev/hp_ablations_gemma_bsz1024
Text Generation
•
9B
•
Updated
•
3