trl-sandbox / docs /source /_toctree.yml
ivangabriele's picture
feat: initialize project
2f5127c verified
- sections:
- local: index
title: TRL
- local: installation
title: Installation
- local: quickstart
title: Quickstart
title: Getting started
- sections:
- local: dataset_formats
title: Dataset Formats
- local: how_to_train
title: Training FAQ
- local: logging
title: Understanding Logs
title: Conceptual Guides
- sections:
- local: clis
title: Command Line Interface (CLI)
- local: customization
title: Customizing the Training
- local: reducing_memory_usage
title: Reducing Memory Usage
- local: speeding_up_training
title: Speeding Up Training
- local: distributing_training
title: Distributing Training
- local: use_model
title: Using Trained Models
title: How-to guides
- sections:
- local: deepspeed_integration
title: DeepSpeed
- local: liger_kernel_integration
title: Liger Kernel
- local: peft_integration
title: PEFT
- local: unsloth_integration
title: Unsloth
- local: vllm_integration
title: vLLM
title: Integrations
- sections:
- local: example_overview
title: Example Overview
- local: community_tutorials
title: Community Tutorials
- local: sentiment_tuning
title: Sentiment Tuning
- local: using_llama_models
title: Training StackLlama
- local: detoxifying_a_lm
title: Detoxifying a Language Model
- local: multi_adapter_rl
title: Multi Adapter RLHF
- local: training_vlm_sft
title: Fine-tuning a Multimodal Model Using SFT (Single or Multi-Image Dataset)
title: Examples
- sections:
- sections: # Sorted alphabetically
- local: alignprop_trainer
title: AlignProp
- local: bco_trainer
title: BCO
- local: cpo_trainer
title: CPO
- local: ddpo_trainer
title: DDPO
- local: dpo_trainer
title: DPO
- local: online_dpo_trainer
title: Online DPO
- local: gkd_trainer
title: GKD
- local: grpo_trainer
title: GRPO
- local: kto_trainer
title: KTO
- local: nash_md_trainer
title: Nash-MD
- local: orpo_trainer
title: ORPO
- local: ppo_trainer
title: PPO
- local: prm_trainer
title: PRM
- local: reward_trainer
title: Reward
- local: rloo_trainer
title: RLOO
- local: sft_trainer
title: SFT
- local: iterative_sft_trainer
title: Iterative SFT
- local: xpo_trainer
title: XPO
title: Trainers
- local: models
title: Model Classes
- local: model_utils
title: Model Utilities
- local: best_of_n
title: Best of N Sampling
- local: judges
title: Judges
- local: callbacks
title: Callbacks
- local: data_utils
title: Data Utilities
- local: rewards
title: Reward Functions
- local: script_utils
title: Script Utilities
- local: others
title: Others
title: API