diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a997a5970f7125d68e65ee441e7dca8a5c3c857
--- /dev/null
+++ b/app.py
@@ -0,0 +1,82 @@
+import os
+import streamlit as st
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.legacy.callbacks import CallbackManager
+from llama_index.llms.openai_like import OpenAILike
+
+# Create an instance of CallbackManager
+callback_manager = CallbackManager()
+
+api_base_url = "https://internlm-chat.intern-ai.org.cn/puyu/api/v1/" #使用浦语api
+model = "internlm2.5-latest"
+api_key = "eyJ0eXBlIjoiSldUIiwiYWxnIjoiSFM1MTIifQ.eyJqdGkiOiI1MDIyOTcwMyIsInJvbCI6IlJPTEVfUkVHSVNURVIiLCJpc3MiOiJPcGVuWExhYiIsImlhdCI6MTczNTU3MTAzNywiY2xpZW50SWQiOiJlYm1ydm9kNnlvMG5semFlazF5cCIsInBob25lIjoiMTU3MjY5NDEyOTYiLCJ1dWlkIjoiYTliN2RkNTctMzhjMy00OWMyLTkzZjktNGZhOGQzMDU3ZjliIiwiZW1haWwiOiJrYWlhYWJiQDE2My5jb20iLCJleHAiOjE3NTExMjMwMzd9.8sHrwlGX3oApzFVIs-imqyLH86GXeOVpF88a_GhtgaN9nLi8KTvbBOARy_AP0mXyhGtGAUknQUWD4g3WD3-Fww"
+
+llm =OpenAILike(model=model, api_base=api_base_url, api_key=api_key, is_chat_model=True,callback_manager=callback_manager)
+
+os.system('git lfs install')
+os.system('git clone https://www.modelscope.cn/Ceceliachenen/paraphrase-multilingual-MiniLM-L12-v2.git')
+
+
+st.set_page_config(page_title="llama_index_demo", page_icon="🦜🔗")
+st.title("llama_index_demo")
+
+# 初始化模型
+@st.cache_resource
+def init_models():
+ embed_model = HuggingFaceEmbedding(
+ model_name="./paraphrase-multilingual-MiniLM-L12-v2"
+ )
+ Settings.embed_model = embed_model
+
+ #用初始化llm
+ Settings.llm = llm
+
+ documents = SimpleDirectoryReader("./data").load_data()
+ index = VectorStoreIndex.from_documents(documents)
+ query_engine = index.as_query_engine()
+
+ return query_engine
+
+# 检查是否需要初始化模型
+if 'query_engine' not in st.session_state:
+ st.session_state['query_engine'] = init_models()
+
+def greet2(question):
+ response = st.session_state['query_engine'].query(question)
+ return response
+
+# Store LLM generated responses
+if "messages" not in st.session_state.keys():
+ st.session_state.messages = [{"role": "assistant", "content": "你好,我是你的助手,有什么我可以帮助你的吗?"}]
+
+ # Display or clear chat messages
+for message in st.session_state.messages:
+ with st.chat_message(message["role"]):
+ st.write(message["content"])
+
+def clear_chat_history():
+ st.session_state.messages = [{"role": "assistant", "content": "你好,我是你的助手,有什么我可以帮助你的吗?"}]
+
+st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
+
+# Function for generating LLaMA2 response
+def generate_llama_index_response(prompt_input):
+ return greet2(prompt_input)
+
+# User-provided prompt
+if prompt := st.chat_input():
+ st.session_state.messages.append({"role": "user", "content": prompt})
+ with st.chat_message("user"):
+ st.write(prompt)
+
+# Gegenerate_llama_index_response last message is not from assistant
+if st.session_state.messages[-1]["role"] != "assistant":
+ with st.chat_message("assistant"):
+ with st.spinner("Thinking..."):
+ response = generate_llama_index_response(prompt)
+ placeholder = st.empty()
+ placeholder.markdown(response)
+ message = {"role": "assistant", "content": response}
+ st.session_state.messages.append(message)
+
diff --git a/data/xtuner/.github/CONTRIBUTING.md b/data/xtuner/.github/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..09eab9a11f2729b5bdebf211cc77fa44c62c104f
--- /dev/null
+++ b/data/xtuner/.github/CONTRIBUTING.md
@@ -0,0 +1,258 @@
+## Contributing to InternLM
+
+Welcome to the XTuner community! All kinds of contributions are welcomed, including but not limited to
+
+**Fix bug**
+
+You can directly post a Pull Request to fix typo in code or documents
+
+The steps to fix the bug of code implementation are as follows.
+
+1. If the modification involve significant changes, you should create an issue first and describe the error information and how to trigger the bug. Other developers will discuss with you and propose an proper solution.
+
+2. Posting a pull request after fixing the bug and adding corresponding unit test.
+
+**New Feature or Enhancement**
+
+1. If the modification involve significant changes, you should create an issue to discuss with our developers to propose an proper design.
+2. Post a Pull Request after implementing the new feature or enhancement and add corresponding unit test.
+
+**Document**
+
+You can directly post a pull request to fix documents. If you want to add a document, you should first create an issue to check if it is reasonable.
+
+### Pull Request Workflow
+
+If you're not familiar with Pull Request, don't worry! The following guidance will tell you how to create a Pull Request step by step. If you want to dive into the develop mode of Pull Request, you can refer to the [official documents](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
+
+#### 1. Fork and clone
+
+If you are posting a pull request for the first time, you should fork the XTuner repository by clicking the **Fork** button in the top right corner of the GitHub page, and the forked repository will appear under your GitHub profile.
+
+
+
+Then, you can clone the repositories to local:
+
+```shell
+git clone git@github.com:{username}/xtuner.git
+```
+
+After that, you should add official repository as the upstream repository
+
+```bash
+git remote add upstream git@github.com:InternLM/xtuner.git
+```
+
+Check whether remote repository has been added successfully by `git remote -v`
+
+```bash
+origin git@github.com:{username}/xtuner.git (fetch)
+origin git@github.com:{username}/xtuner.git (push)
+upstream git@github.com:InternLM/xtuner.git (fetch)
+upstream git@github.com:InternLM/xtuner.git (push)
+```
+
+> Here's a brief introduction to origin and upstream. When we use "git clone", we create an "origin" remote by default, which points to the repository cloned from. As for "upstream", we add it ourselves to point to the target repository. Of course, if you don't like the name "upstream", you could name it as you wish. Usually, we'll push the code to "origin". If the pushed code conflicts with the latest code in official("upstream"), we should pull the latest code from upstream to resolve the conflicts, and then push to "origin" again. The posted Pull Request will be updated automatically.
+
+#### 2. Configure pre-commit
+
+You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of InternLM. **Note**: The following code should be executed under the XTuner directory.
+
+```shell
+pip install -U pre-commit
+pre-commit install
+```
+
+Check that pre-commit is configured successfully, and install the hooks defined in `.pre-commit-config.yaml`.
+
+```shell
+pre-commit run --all-files
+```
+
+
+
+
+
+If the installation process is interrupted, you can repeatedly run `pre-commit run ... ` to continue the installation.
+
+If the code does not conform to the code style specification, pre-commit will raise a warning and fixes some of the errors automatically.
+
+
+
+If we want to commit our code bypassing the pre-commit hook, we can use the `--no-verify` option(**only for temporarily commit**).
+
+```shell
+git commit -m "xxx" --no-verify
+```
+
+#### 3. Create a development branch
+
+After configuring the pre-commit, we should create a branch based on the master branch to develop the new feature or fix the bug. The proposed branch name is `username/pr_name`
+
+```shell
+git checkout -b yhc/refactor_contributing_doc
+```
+
+In subsequent development, if the master branch of the local repository is behind the master branch of "upstream", we need to pull the upstream for synchronization, and then execute the above command:
+
+```shell
+git pull upstream master
+```
+
+#### 4. Commit the code and pass the unit test
+
+- XTuner introduces mypy to do static type checking to increase the robustness of the code. Therefore, we need to add Type Hints to our code and pass the mypy check. If you are not familiar with Type Hints, you can refer to [this tutorial](https://docs.python.org/3/library/typing.html).
+
+- The committed code should pass through the unit test
+
+ ```shell
+ # Pass all unit tests
+ pytest tests
+
+ # Pass the unit test of runner
+ pytest tests/test_runner/test_runner.py
+ ```
+
+ If the unit test fails for lack of dependencies, you can install the dependencies referring to the [guidance](#unit-test)
+
+- If the documents are modified/added, we should check the rendering result referring to [guidance](#document-rendering)
+
+#### 5. Push the code to remote
+
+We could push the local commits to remote after passing through the check of unit test and pre-commit. You can associate the local branch with remote branch by adding `-u` option.
+
+```shell
+git push -u origin {branch_name}
+```
+
+This will allow you to use the `git push` command to push code directly next time, without having to specify a branch or the remote repository.
+
+#### 6. Create a Pull Request
+
+(1) Create a pull request in GitHub's Pull request interface
+
+
+
+(2) Modify the PR description according to the guidelines so that other developers can better understand your changes
+
+
+
+Find more details about Pull Request description in [pull request guidelines](#pr-specs).
+
+**note**
+
+(a) The Pull Request description should contain the reason for the change, the content of the change, and the impact of the change, and be associated with the relevant Issue (see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue))
+
+(b) If it is your first contribution, please sign the CLA
+
+
+
+(c) Check whether the Pull Request pass through the CI
+
+
+
+XTuner will run unit test for the posted Pull Request on different platforms (Linux, Window, Mac), based on different versions of Python, PyTorch, CUDA to make sure the code is correct. We can see the specific test information by clicking `Details` in the above image so that we can modify the code.
+
+(3) If the Pull Request passes the CI, then you can wait for the review from other developers. You'll modify the code based on the reviewer's comments, and repeat the steps [4](#4-commit-the-code-and-pass-the-unit-test)-[5](#5-push-the-code-to-remote) until all reviewers approve it. Then, we will merge it ASAP.
+
+
+
+#### 7. Resolve conflicts
+
+If your local branch conflicts with the latest master branch of "upstream", you'll need to resolove them. There are two ways to do this:
+
+```shell
+git fetch --all --prune
+git rebase upstream/master
+```
+
+or
+
+```shell
+git fetch --all --prune
+git merge upstream/master
+```
+
+If you are very good at handling conflicts, then you can use rebase to resolve conflicts, as this will keep your commit logs tidy. If you are not familiar with `rebase`, then you can use `merge` to resolve conflicts.
+
+### Guidance
+
+#### Unit test
+
+If you cannot run the unit test of some modules for lacking of some dependencies, such as [video](https://github.com/open-mmlab/mmcv/tree/master/mmcv/video) module, you can try to install the following dependencies:
+
+```shell
+# Linux
+sudo apt-get update -y
+sudo apt-get install -y libturbojpeg
+sudo apt-get install -y ffmpeg
+
+# Windows
+conda install ffmpeg
+```
+
+We should also make sure the committed code will not decrease the coverage of unit test, we could run the following command to check the coverage of unit test:
+
+```shell
+python -m coverage run -m pytest /path/to/test_file
+python -m coverage html
+# check file in htmlcov/index.html
+```
+
+#### Document rendering
+
+If the documents are modified/added, we should check the rendering result. We could install the dependencies and run the following command to render the documents and check the results:
+
+```shell
+pip install -r requirements/docs.txt
+cd docs/zh_cn/
+# or docs/en
+make html
+# check file in ./docs/zh_cn/_build/html/index.html
+```
+
+### Code style
+
+#### Python
+
+We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
+
+We use the following tools for linting and formatting:
+
+- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools.
+- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports.
+- [yapf](https://github.com/google/yapf): A formatter for Python files.
+- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files.
+- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files.
+- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring.
+
+Style configurations of yapf and isort can be found in [setup.cfg](../setup.cfg).
+
+We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`,
+fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit.
+The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml).
+
+#### C++ and CUDA
+
+We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
+
+### PR Specs
+
+1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style
+
+2. One short-time branch should be matched with only one PR
+
+3. Accomplish a detailed change in one PR. Avoid large PR
+
+ - Bad: Support Faster R-CNN
+ - Acceptable: Add a box head to Faster R-CNN
+ - Good: Add a parameter to box head to support custom conv-layer number
+
+4. Provide clear and significant commit message
+
+5. Provide clear and meaningful PR description
+
+ - Task name should be clarified in title. The general format is: \[Prefix\] Short description of the PR (Suffix)
+ - Prefix: add new feature \[Feature\], fix bug \[Fix\], related to documents \[Docs\], in developing \[WIP\] (which will not be reviewed temporarily)
+ - Introduce main changes, results and influences on other modules in short description
+ - Associate related issues and pull requests with a milestone
diff --git a/data/xtuner/.github/workflows/deploy.yml b/data/xtuner/.github/workflows/deploy.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b2c6f0bc208ca0f3d2aba1d4dc04d97fb51cacbd
--- /dev/null
+++ b/data/xtuner/.github/workflows/deploy.yml
@@ -0,0 +1,26 @@
+name: deploy
+
+on: push
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build-n-publish:
+ runs-on: ubuntu-latest
+ if: startsWith(github.event.ref, 'refs/tags')
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python 3.8
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+ - name: Build XTuner
+ run: |
+ pip install wheel
+ python setup.py sdist bdist_wheel
+ - name: Publish distribution to PyPI
+ run: |
+ pip install twine
+ twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}
diff --git a/data/xtuner/.github/workflows/lint.yml b/data/xtuner/.github/workflows/lint.yml
new file mode 100644
index 0000000000000000000000000000000000000000..74a733eb81e8e3e3b7c6ca1c08de8856d6cfb81e
--- /dev/null
+++ b/data/xtuner/.github/workflows/lint.yml
@@ -0,0 +1,23 @@
+name: lint
+
+on: [push, pull_request]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python 3.8
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+ - name: Install pre-commit hook
+ run: |
+ pip install pre-commit
+ pre-commit install
+ - name: Linting
+ run: pre-commit run --all-files
diff --git a/data/xtuner/.gitignore b/data/xtuner/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ffe3444b8cdb2ec3e6791d047d0593fcf9d20d41
--- /dev/null
+++ b/data/xtuner/.gitignore
@@ -0,0 +1,124 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/*/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# custom
+data/
+data
+.vscode
+.idea
+.DS_Store
+*.pkl
+*.pkl.json
+*.log.json
+work_dirs/
+
+# Pytorch
+*.pth
+*.py~
+*.sh~
+
+# srun
+*.out
+batchscript-*
diff --git a/data/xtuner/.owners.yml b/data/xtuner/.owners.yml
new file mode 100644
index 0000000000000000000000000000000000000000..996ae4c69c03821b2b79a1b7a4233988cf0623ee
--- /dev/null
+++ b/data/xtuner/.owners.yml
@@ -0,0 +1,8 @@
+assign:
+ issues: disabled
+ pull_requests: disabled
+ strategy:
+ random
+ # daily-shift-based
+ schedule:
+ '*/1 * * * *'
diff --git a/data/xtuner/.pre-commit-config-zh-cn.yaml b/data/xtuner/.pre-commit-config-zh-cn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..4b9f51976e4b46db4db69952f437e43d72581070
--- /dev/null
+++ b/data/xtuner/.pre-commit-config-zh-cn.yaml
@@ -0,0 +1,51 @@
+exclude: ^tests/data/|^xtuner/tools/model_converters/modeling_internlm2_reward/
+repos:
+ - repo: https://gitee.com/openmmlab/mirrors-flake8
+ rev: 5.0.4
+ hooks:
+ - id: flake8
+ args: ["--exclude=xtuner/model/transformers_models/*"]
+ - repo: https://gitee.com/openmmlab/mirrors-isort
+ rev: 5.11.5
+ hooks:
+ - id: isort
+ - repo: https://gitee.com/openmmlab/mirrors-yapf
+ rev: v0.32.0
+ hooks:
+ - id: yapf
+ - repo: https://gitee.com/openmmlab/mirrors-pre-commit-hooks
+ rev: v4.3.0
+ hooks:
+ - id: trailing-whitespace
+ - id: check-yaml
+ - id: end-of-file-fixer
+ - id: requirements-txt-fixer
+ - id: double-quote-string-fixer
+ - id: check-merge-conflict
+ - id: fix-encoding-pragma
+ args: ["--remove"]
+ - id: mixed-line-ending
+ args: ["--fix=lf"]
+ - repo: https://gitee.com/openmmlab/mirrors-codespell
+ rev: v2.2.1
+ hooks:
+ - id: codespell
+ - repo: https://gitee.com/openmmlab/mirrors-mdformat
+ rev: 0.7.9
+ hooks:
+ - id: mdformat
+ args: ["--number"]
+ additional_dependencies:
+ - mdformat-openmmlab
+ - mdformat_frontmatter
+ - linkify-it-py
+ - repo: https://gitee.com/openmmlab/mirrors-docformatter
+ rev: v1.3.1
+ hooks:
+ - id: docformatter
+ args: ["--in-place", "--wrap-descriptions", "79"]
+ - repo: https://github.com/asottile/pyupgrade
+ rev: v3.0.0
+ hooks:
+ - id: pyupgrade
+ args: ["--py36-plus"]
diff --git a/data/xtuner/.pre-commit-config.yaml b/data/xtuner/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f6bbfd6339aeba49dbae8a0edc425a6e3f0c8eb2
--- /dev/null
+++ b/data/xtuner/.pre-commit-config.yaml
@@ -0,0 +1,53 @@
+exclude: ^tests/data/|^xtuner/tools/model_converters/modeling_internlm2_reward/
+repos:
+ - repo: https://github.com/PyCQA/flake8
+ rev: 5.0.4
+ hooks:
+ - id: flake8
+ args: ["--exclude=xtuner/model/transformers_models/*"]
+ - repo: https://github.com/PyCQA/isort
+ rev: 5.11.5
+ hooks:
+ - id: isort
+ - repo: https://github.com/pre-commit/mirrors-yapf
+ rev: v0.32.0
+ hooks:
+ - id: yapf
+ exclude: 'xtuner/parallel/sequence/__init__.py'
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.3.0
+ hooks:
+ - id: trailing-whitespace
+ - id: check-yaml
+ - id: end-of-file-fixer
+ - id: requirements-txt-fixer
+ - id: double-quote-string-fixer
+ - id: check-merge-conflict
+ - id: fix-encoding-pragma
+ args: ["--remove"]
+ - id: mixed-line-ending
+ args: ["--fix=lf"]
+ - repo: https://github.com/codespell-project/codespell
+ rev: v2.2.1
+ hooks:
+ - id: codespell
+ - repo: https://github.com/executablebooks/mdformat
+ rev: 0.7.9
+ hooks:
+ - id: mdformat
+ args: ["--number"]
+ additional_dependencies:
+ - mdformat-openmmlab
+ - mdformat_frontmatter
+ - linkify-it-py
+ exclude: 'docs/zh_cn/user_guides/sequence_parallel.md'
+ - repo: https://github.com/myint/docformatter
+ rev: v1.3.1
+ hooks:
+ - id: docformatter
+ args: ["--in-place", "--wrap-descriptions", "79"]
+ - repo: https://github.com/asottile/pyupgrade
+ rev: v3.0.0
+ hooks:
+ - id: pyupgrade
+ args: ["--py36-plus"]
diff --git a/data/xtuner/LICENSE b/data/xtuner/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64
--- /dev/null
+++ b/data/xtuner/LICENSE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/data/xtuner/MANIFEST.in b/data/xtuner/MANIFEST.in
new file mode 100644
index 0000000000000000000000000000000000000000..36e1610bf8093a8355a58d7d9779697a64931313
--- /dev/null
+++ b/data/xtuner/MANIFEST.in
@@ -0,0 +1,2 @@
+recursive-include xtuner/configs *.py *.yml *.json
+recursive-include xtuner/tools *.sh *.py
diff --git a/data/xtuner/README.md b/data/xtuner/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..263d300c7a17778e3be4ff6f64cd262995f98527
--- /dev/null
+++ b/data/xtuner/README.md
@@ -0,0 +1,302 @@
+
+ Models + | ++ SFT Datasets + | ++ Data Pipelines + | ++ Algorithms + | +
+
|
+
+
|
++ + | ++ + | +
+ All-IN-ONE toolbox for LLM + +
+ + + + + +Documentation +------------- +.. toctree:: + :maxdepth: 2 + :caption: Get Started + + get_started/overview.md + get_started/installation.md + get_started/quickstart.md + +.. toctree:: + :maxdepth: 2 + :caption: Preparation + + preparation/pretrained_model.rst + preparation/prompt_template.rst + +.. toctree:: + :maxdepth: 2 + :caption: Training + + training/modify_settings.rst + training/custom_sft_dataset.rst + training/custom_pretrain_dataset.rst + training/custom_agent_dataset.rst + training/multi_modal_dataset.rst + training/open_source_dataset.rst + training/visualization.rst + +.. toctree:: + :maxdepth: 2 + :caption: DPO + + dpo/overview.md + dpo/quick_start.md + dpo/modify_settings.md + +.. toctree:: + :maxdepth: 2 + :caption: Reward Model + + reward_model/overview.md + reward_model/quick_start.md + reward_model/modify_settings.md + reward_model/preference_data.md + +.. toctree:: + :maxdepth: 2 + :caption: Acceleration + + acceleration/deepspeed.rst + acceleration/pack_to_max_length.rst + acceleration/flash_attn.rst + acceleration/varlen_flash_attn.rst + acceleration/hyper_parameters.rst + acceleration/length_grouped_sampler.rst + acceleration/train_large_scale_dataset.rst + acceleration/train_extreme_long_sequence.rst + acceleration/benchmark.rst + +.. toctree:: + :maxdepth: 2 + :caption: Chat + + chat/llm.md + chat/agent.md + chat/vlm.md + chat/lmdeploy.md + +.. toctree:: + :maxdepth: 2 + :caption: Evaluation + + evaluation/hook.md + evaluation/mmlu.md + evaluation/mmbench.md + evaluation/opencompass.md + +.. toctree:: + :maxdepth: 2 + :caption: Models + + models/supported.md + +.. toctree:: + :maxdepth: 2 + :caption: InternEvo Migration + + internevo_migration/internevo_migration.rst + internevo_migration/ftdp_dataset/ftdp.rst + internevo_migration/ftdp_dataset/Case1.rst + internevo_migration/ftdp_dataset/Case2.rst + internevo_migration/ftdp_dataset/Case3.rst + internevo_migration/ftdp_dataset/Case4.rst diff --git a/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case1.rst b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case1.rst new file mode 100644 index 0000000000000000000000000000000000000000..c8eb0c76afa4c5630d910c3fce05eea62e2a9a08 --- /dev/null +++ b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case1.rst @@ -0,0 +1,2 @@ +Case 1 +====== diff --git a/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case2.rst b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case2.rst new file mode 100644 index 0000000000000000000000000000000000000000..74069f68f830fe2de5ee641266b4a9aad585ea7a --- /dev/null +++ b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case2.rst @@ -0,0 +1,2 @@ +Case 2 +====== diff --git a/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case3.rst b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case3.rst new file mode 100644 index 0000000000000000000000000000000000000000..d963b538b55c70a12978e738e1f3d6db399f445f --- /dev/null +++ b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case3.rst @@ -0,0 +1,2 @@ +Case 3 +====== diff --git a/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case4.rst b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case4.rst new file mode 100644 index 0000000000000000000000000000000000000000..1f7626933c512221449355c3eae138d9ea681955 --- /dev/null +++ b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/Case4.rst @@ -0,0 +1,2 @@ +Case 4 +====== diff --git a/data/xtuner/docs/en/internevo_migration/ftdp_dataset/ftdp.rst b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/ftdp.rst new file mode 100644 index 0000000000000000000000000000000000000000..613568f151b54848f747c0740161d01e905359a2 --- /dev/null +++ b/data/xtuner/docs/en/internevo_migration/ftdp_dataset/ftdp.rst @@ -0,0 +1,2 @@ +ftdp +==== diff --git a/data/xtuner/docs/en/internevo_migration/internevo_migration.rst b/data/xtuner/docs/en/internevo_migration/internevo_migration.rst new file mode 100644 index 0000000000000000000000000000000000000000..869206508d772d8503003f7669a134a1d44fce7e --- /dev/null +++ b/data/xtuner/docs/en/internevo_migration/internevo_migration.rst @@ -0,0 +1,2 @@ +InternEVO Migration +=================== diff --git a/data/xtuner/docs/en/make.bat b/data/xtuner/docs/en/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..954237b9b9f2b248bb1397a15c055c0af1cad03e --- /dev/null +++ b/data/xtuner/docs/en/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/data/xtuner/docs/en/models/supported.md b/data/xtuner/docs/en/models/supported.md new file mode 100644 index 0000000000000000000000000000000000000000..c61546e5209d69ef0824b54bada46c18de3d8f72 --- /dev/null +++ b/data/xtuner/docs/en/models/supported.md @@ -0,0 +1 @@ +# Supported Models diff --git a/data/xtuner/docs/en/notes/changelog.md b/data/xtuner/docs/en/notes/changelog.md new file mode 100644 index 0000000000000000000000000000000000000000..2c9678539d213e5bbca90bbf4449cfbe4dfd7936 --- /dev/null +++ b/data/xtuner/docs/en/notes/changelog.md @@ -0,0 +1,25 @@ + + +# Changelog + +## v0.1.0 (2023.08.30) + +XTuner is released! 🔥🔥🔥 + +### Highlights + +- XTuner supports LLM fine-tuning on consumer-grade GPUs. The minimum GPU memory required for 7B LLM fine-tuning is only **8GB**. +- XTuner supports various LLMs, datasets, algorithms and training pipelines. +- Several fine-tuned adapters are released simultaneously, including various gameplays such as the colorist LLM, plugins-based LLM, and many more. For further details, please visit [XTuner on HuggingFace](https://huggingface.co/xtuner)! diff --git a/data/xtuner/docs/en/preparation/pretrained_model.rst b/data/xtuner/docs/en/preparation/pretrained_model.rst new file mode 100644 index 0000000000000000000000000000000000000000..a3ac291ac1e74801c032a581b9e0b2afaf180a91 --- /dev/null +++ b/data/xtuner/docs/en/preparation/pretrained_model.rst @@ -0,0 +1,2 @@ +Pretrained Model +================ diff --git a/data/xtuner/docs/en/preparation/prompt_template.rst b/data/xtuner/docs/en/preparation/prompt_template.rst new file mode 100644 index 0000000000000000000000000000000000000000..43ccb98e31eaca7c05368628475613f515371810 --- /dev/null +++ b/data/xtuner/docs/en/preparation/prompt_template.rst @@ -0,0 +1,2 @@ +Prompt Template +=============== diff --git a/data/xtuner/docs/en/reward_model/modify_settings.md b/data/xtuner/docs/en/reward_model/modify_settings.md new file mode 100644 index 0000000000000000000000000000000000000000..4f41ca300865bc83bd02b727cc6b61696f8617fb --- /dev/null +++ b/data/xtuner/docs/en/reward_model/modify_settings.md @@ -0,0 +1,100 @@ +## Modify Reward Model Training Configuration + +This section introduces the config related to Reward Model training. For more details on XTuner config files, please refer to [Modify Settings](https://xtuner.readthedocs.io/zh-cn/latest/training/modify_settings.html). + +### Loss Function + +XTuner uses the [Bradley–Terry Model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model) for preference modeling in the Reward Model. You can specify `loss_type="ranking"` to use ranking loss. XTuner also implements the focal loss function proposed in InternLM2, which adjusts the weights of difficult and easy samples to avoid overfitting. You can set `loss_type="focal"` to use this loss function. For a detailed explanation of this loss function, please refer to the [InternLM2 Technical Report](https://arxiv.org/abs/2403.17297). + +Additionally, to maintain stable reward model output scores, we have added a constraint term in the loss. You can specify `penalty_type='log_barrier'` or `penalty_type='L2'` to enable log barrier or L2 constraints, respectively. + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +loss_type = 'focal' # 'ranking' or 'focal' +penalty_type = 'log_barrier' # 'log_barrier' or 'L2' +``` + +### Modifying the Model + +Users can modify `pretrained_model_name_or_path` to change the pretrained model. + +Note that XTuner calculates reward scores by appending a special token at the end of the data. Therefore, when switching models with different vocabularies, the ID of this special token also needs to be modified accordingly. We usually use an unused token at the end of the vocabulary as the reward token. + +For example, in InternLM2, we use `[UNUSED_TOKEN_130]` as the reward token: + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft' +reward_token_id = 92527 # use [UNUSED_TOKEN_130] as reward token +``` + +If the user switches to the llama3 model, we can use `<|reserved_special_token_0|>` as the reward token: + +```python +####################################################################### +# PART 1 Settings # +####################################################################### +# Model +pretrained_model_name_or_path = 'meta-llama/Meta-Llama-3-8B-Instruct' +reward_token_id = 128002 # use <|reserved_special_token_0|> as reward token +``` + +### Training Data + +In Reward Model training, you can specify the maximum number of tokens for a single sample sequence using `max_length`. XTuner will automatically truncate or pad the data. + +```python +# Data +max_length = 2048 +``` + +In the configuration file, we use the `train_dataset` field to specify the training dataset. You can specify the dataset loading method using the `dataset` field and the dataset mapping function using the `dataset_map_fn` field. + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +sampler = SequenceParallelSampler \ + if sequence_parallel_size > 1 else DefaultSampler + +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='argilla/ultrafeedback-binarized-preferences-cleaned'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=False, + is_reward=True, + reward_token_id=reward_token_id, + num_proc=32, + use_varlen_attn=use_varlen_attn, + max_packed_length=max_packed_length, + shuffle_before_pack=True, +) + +train_dataloader = dict( + batch_size=batch_size, + num_workers=dataloader_num_workers, + dataset=train_dataset, + sampler=dict(type=sampler, shuffle=True), + collate_fn=dict( + type=preference_collate_fn, use_varlen_attn=use_varlen_attn)) +``` + +In the above configuration, we use `load_dataset` to load the `argilla/ultrafeedback-binarized-preferences-cleaned` dataset from Hugging Face, using `orpo_dpo_mix_40k_map_fn` as the dataset mapping function (this is because `orpo_dpo_mix_40k` and `ultrafeedback-binarized-preferences-cleaned` have the same format, so the same mapping function is used). + +For more information on handling datasets and writing dataset mapping functions, please refer to the [Preference Data Section](./preference_data.md). + +### Accelerating Training + +When training with preference data, we recommend enabling the [Variable-Length Attention Mechanism](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/varlen_flash_attn.html) to avoid memory waste caused by length differences between chosen and rejected samples within a single preference. You can enable the variable-length attention mechanism by setting `use_varlen_attn=True`. + +XTuner also supports many training acceleration methods. For details on how to use them, please refer to the [Acceleration Strategies Section](https://xtuner.readthedocs.io/zh-cn/latest/acceleration/hyper_parameters.html). diff --git a/data/xtuner/docs/en/reward_model/overview.md b/data/xtuner/docs/en/reward_model/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..eb210140c7e88df9912429d900709f54cfa3be5b --- /dev/null +++ b/data/xtuner/docs/en/reward_model/overview.md @@ -0,0 +1,43 @@ +## Introduction to Reward Model + +### Overview + +The Reward Model is a crucial component in the reinforcement learning process. Its primary task is to predict reward values based on given inputs, guiding the direction of the learning algorithm. In RLHF (Reinforcement Learning from Human Feedback), the Reward Model acts as a proxy for human preferences, helping the reinforcement learning algorithm optimize strategies more effectively. + +In large language model training, the Reward Model typically refers to the Preference Model. By providing good and bad (chosen & rejected) responses to the same prompts during training, it fits human preferences and predicts a reward value during inference to guide the optimization of the Actor model in the RLHF process. + +Applications of the Reward Model include but are not limited to: + +- **RLHF Training**: During RLHF training such as the Proximal Policy Optimization (PPO) algorithm, the Reward Model provides reward signals, improve the quality of generated content, and align it more closely with human preferences. +- **BoN Sampling**: In the Best-of-N (BoN) sampling process, users can use the Reward Model to score multiple responses to the same prompt and select the highest-scoring generated result, thereby enhancing the model's output. +- **Data Construction**: The Reward Model can be used to evaluate and filter training data or replace manual annotation to construct DPO training data. + +### Features of Reward Model Training in XTuner + +The Reward Model training in XTuner offers the following significant advantages: + +1. **Latest Training Techniques**: XTuner integrates the Reward Model training loss function from InternLM2, which stabilizes the numerical range of reward scores and reduces overfitting on simple samples (see [InternLM2 Technical Report](https://arxiv.org/abs/2403.17297) for details). + +2. **Reducing Memory Waste**: Due to the length differences in chosen and rejected data in preference datasets, padding tokens during data concatenation can cause memory waste. In XTuner, by utilizing the variable-length attention feature from Flash Attention2, preference pairs are packed into the same sequence during training, significantly reducing memory waste caused by padding tokens. This not only improves memory efficiency but also allows for training larger models or handling more data under the same hardware conditions. + + + +3. **Efficient Training**: Leveraging XTuner's QLoRA training capabilities, we can perform full parameter training only on the Reward Model's Value Head, while using QLoRA fine-tuning on the language model itself, substantially reducing the memory overhead of model training. + +4. **Long Text Training**: With XTuner's sequence parallel functionality, long text data can be trained efficiently. + + + +### Getting Started + +Refer to the [Quick Start Guide](./quick_start.md) to understand the basic concepts. For more information on configuring training parameters, please see the [Modifying Reward Model Settings](./modify_settings.md) section. + +### Open-source Models + +We use XTuner to train the InternLM2 Reward Models from the InternLM2 Technical Report, welcome to download and use: + +| Model | Transformers(HF) | ModelScope(HF) | OpenXLab(HF) | RewardBench Score | +| ------------------------- | -------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | +| **InternLM2-1.8B-Reward** | [🤗internlm2-1_8b-reward](https://huggingface.co/internlm/internlm2-1_8b-reward) | [internlm2-1_8b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-1_8b-reward/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-1_8b-reward) | 80.6 | +| **InternLM2-7B-Reward** | [🤗internlm2-7b-reward](https://huggingface.co/internlm/internlm2-7b-reward) | [internlm2-7b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-7b-reward/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-7b-reward) | 86.6 | +| **InternLM2-20B-Reward** | [🤗internlm2-20b-reward](https://huggingface.co/internlm/internlm2-20b-reward) | [internlm2-20b-reward](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-20b-reward/summary) | [](https://openxlab.org.cn/models/detail/OpenLMLab/internlm2-20b-reward) | 89.5 | diff --git a/data/xtuner/docs/en/reward_model/preference_data.md b/data/xtuner/docs/en/reward_model/preference_data.md new file mode 100644 index 0000000000000000000000000000000000000000..2f304e627a29bc8e6acb73705a15f676551c5d24 --- /dev/null +++ b/data/xtuner/docs/en/reward_model/preference_data.md @@ -0,0 +1,110 @@ +## Preference Dataset + +### Overview + +XTuner's Reward Model, along with DPO, ORPO, and other algorithms that training on preference data, adopts the same data format. Each training sample in the preference dataset needs to contain the following three fields: `prompt`, `chosen`, and `rejected`. The values for each field follow the [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) format. A specific example is as follows: + +```json +{ + "prompt": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Who won the world series in 2020?" + }, + { + "role": "assistant", + "content": "The Los Angeles Dodgers won the World Series in 2020." + }, + { + "role": "user", + "content": "Where was it played?" + } + ], + "chosen": [ + { + "role": "assistant", + "content": "The 2020 World Series was played at Globe Life Field in Arlington, Texas." + } + ], + "rejected": [ + { + "role": "assistant", + "content": "I don't know." + } + ] +} +``` + +When conducting Reward Model training or DPO training, XTuner processes the preference dataset into different training labels based on the type of training task. + + + +As shown in the above image, for Reward Model training, we follow the ChatGPT training method by adding a special `<|reward|>` token at the end of the conversation data and calculating the loss only on the logits output by this token. For DPO series algorithm training, we mask the tokens in the prompt part and calculate the loss only on the chosen and rejected responses. In the configuration file, we control the dataset type through the `is_reward` and `is_dpo` fields in the dataset. + +### Training with Custom Data + +After understanding the data format of a single sample, you only need to save the converted data in [JSON Lines](https://jsonlines.org/) format. Each line in the file is a sample. + +```json +{"prompt": [{"content": "How can I learn to better regulate my emotions as a grown adult?", "role": "user"}], "chosen": [{"content": "You could do emotional regulation exercises with me, and try to notice and practice ways that you could better control your emotions in real life.", "role": "assistant"}], "rejected": [{"content": "Do you mean how can you learn to be more emotionally mature?", "role": "assistant"}]} +{"prompt": [{"content": "What are some interesting things I can add to a grilled cheese sandwich?", "role": "user"}, {"content": "A tomato, a slice of pickle, some chopped pickles, some chopped green onions. Also, some mayonnaise if you like.", "role": "assistant"}, {"content": "Sounds great. Anything else?", "role": "user"}], "chosen": [{"content": "If you’d like something a little different, I recommend grating some horseradish on top of the sandwich.", "role": "assistant"}], "rejected": [{"content": "Maybe some nice fresh basil?", "role": "assistant"}]} +{"prompt": [{"content": "How do I bob for apples?", "role": "user"}], "chosen": [{"content": "Ah! Do you mean the game of “applesauce,” or do you mean the idea of “to bob for apples”?", "role": "assistant"}], "rejected": [{"content": "Sorry, I don’t know that term.", "role": "assistant"}]} +...... +``` + +After preparing the custom dataset, you need to fill in the path to your saved data in the `data_files` field in the configuration file. You can load multiple JSONL files simultaneously for training. + +```python +####################################################################### +# PART 3 Dataset & Dataloader # +####################################################################### +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_jsonl_dataset, + data_files=[ + '/your/jsonl/path/here.jsonl', + '/your/another/jsonl/path/here.jsonl' + ]), +) +``` + +### Training with Open Source Datasets + +Similar to configuring SFT data in XTuner, when using open-source datasets from Hugging Face, you only need to define a mapping function `map_fn` to process the dataset format into XTuner's data format. + +Taking `Intel/orca_dpo_pairs` as an example, this dataset has `system`, `question`, `chosen`, and `rejected` fields, with each field's value in text format instead of the [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) format. Therefore, we need to define a mapping function for this dataset: + +```python +def intel_orca_dpo_map_fn(example): + prompt = [{ + 'role': 'system', + 'content': example['system'] + }, { + 'role': 'user', + 'content': example['question'] + }] + chosen = [{'role': 'assistant', 'content': example['chosen']}] + rejected = [{'role': 'assistant', 'content': example['rejected']}] + return {'prompt': prompt, 'chosen': chosen, 'rejected': rejected} +``` + +As shown in the code, `intel_orca_dpo_map_fn` processes the four fields in the original data, converting them into `prompt`, `chosen`, and `rejected` fields, and ensures each field follows the [OpenAI chat message](https://platform.openai.com/docs/api-reference/chat/create) format, maintaining uniformity in subsequent data processing flows. + +After defining the mapping function, you need to import it in the configuration file and configure it in the `dataset_map_fn` field. + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='Intel/orca_dpo_pairs'), + tokenizer=tokenizer, + max_length=max_length, + dataset_map_fn=intel_orca_dpo_map_fn, +) +``` diff --git a/data/xtuner/docs/en/reward_model/quick_start.md b/data/xtuner/docs/en/reward_model/quick_start.md new file mode 100644 index 0000000000000000000000000000000000000000..5c802be2f33f9c25d1bb018de07c38ea09d86c69 --- /dev/null +++ b/data/xtuner/docs/en/reward_model/quick_start.md @@ -0,0 +1,85 @@ +## Quick Start Guide for Reward Model + +In this section, we will introduce how to use XTuner to train a 1.8B Reward Model, helping you get started quickly. + +### Preparing Pretrained Model Weights + +According to the paper [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155), we use a language model fine-tuned with SFT as the initialization model for the Reward Model. Here, we use [InternLM2-chat-1.8b-sft](https://huggingface.co/internlm/internlm2-chat-1_8b-sft) as the initialization model. + +Set `pretrained_model_name_or_path = 'internlm/internlm2-chat-1_8b-sft'` in the training configuration file, and the model files will be automatically downloaded when training starts. If you need to download the model weights manually, please refer to the section [Preparing Pretrained Model Weights](https://xtuner.readthedocs.io/zh-cn/latest/preparation/pretrained_model.html), which provides detailed instructions on how to download model weights from Huggingface or Modelscope. Here are the links to the models on HuggingFace and ModelScope: + +- HuggingFace link: https://huggingface.co/internlm/internlm2-chat-1_8b-sft +- ModelScope link: https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm2-chat-1_8b-sft/summary + +### Preparing Training Data + +In this tutorial, we use the [UltraFeedback](https://arxiv.org/abs/2310.01377) dataset as an example. For convenience, we use the preprocessed [argilla/ultrafeedback-binarized-preferences-cleaned](https://huggingface.co/datasets/argilla/ultrafeedback-binarized-preferences-cleaned) dataset from Huggingface. + +```python +train_dataset = dict( + type=build_preference_dataset, + dataset=dict( + type=load_dataset, + path='argilla/ultrafeedback-binarized-preferences-cleaned'), + dataset_map_fn=orpo_dpo_mix_40k_map_fn, + is_dpo=False, + is_reward=True, +) +``` + +Using the above configuration in the configuration file will automatically download and process this dataset. If you want to use other open-source datasets from Huggingface or custom datasets, please refer to the [Preference Dataset](./preference_data.md) section. + +### Preparing Configuration Files + +XTuner provides several ready-to-use configuration files, which can be viewed using `xtuner list-cfg`. Execute the following command to copy a configuration file to the current directory. + +```bash +xtuner copy-cfg internlm2_chat_1_8b_reward_full_ultrafeedback . +``` + +Open the copied configuration file. If you choose to download the model and dataset automatically, no modifications are needed. If you want to specify paths to your pre-downloaded model and dataset, modify the `pretrained_model_name_or_path` and the `path` parameter in `dataset` under `train_dataset`. + +For more training parameter configurations, please refer to the section [Modifying Reward Training Configuration](./modify_settings.md). + +### Starting the Training + +After completing the above steps, you can start the training task using the following commands. + +```bash +# Single node single GPU +xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py +# Single node multiple GPUs +NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py +# Slurm cluster +srun ${SRUN_ARGS} xtuner train ./internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py --launcher slurm +``` + +The correct training log should look like the following (running on a single A800 GPU): + +``` +06/06 16:12:11 - mmengine - INFO - Iter(train) [ 10/15230] lr: 3.9580e-07 eta: 2:59:41 time: 0.7084 data_time: 0.0044 memory: 18021 loss: 0.6270 acc: 0.0000 chosen_score_mean: 0.0000 rejected_score_mean: 0.0000 num_samples: 4.0000 num_tokens: 969.0000 +06/06 16:12:17 - mmengine - INFO - Iter(train) [ 20/15230] lr: 8.3536e-07 eta: 2:45:25 time: 0.5968 data_time: 0.0034 memory: 42180 loss: 0.6270 acc: 0.5000 chosen_score_mean: 0.0013 rejected_score_mean: 0.0010 num_samples: 4.0000 num_tokens: 1405.0000 +06/06 16:12:22 - mmengine - INFO - Iter(train) [ 30/15230] lr: 1.2749e-06 eta: 2:37:18 time: 0.5578 data_time: 0.0024 memory: 32121 loss: 0.6270 acc: 0.7500 chosen_score_mean: 0.0016 rejected_score_mean: 0.0011 num_samples: 4.0000 num_tokens: 932.0000 +06/06 16:12:28 - mmengine - INFO - Iter(train) [ 40/15230] lr: 1.7145e-06 eta: 2:36:05 time: 0.6033 data_time: 0.0025 memory: 42186 loss: 0.6270 acc: 0.7500 chosen_score_mean: 0.0027 rejected_score_mean: 0.0016 num_samples: 4.0000 num_tokens: 994.0000 +06/06 16:12:35 - mmengine - INFO - Iter(train) [ 50/15230] lr: 2.1540e-06 eta: 2:41:03 time: 0.7166 data_time: 0.0027 memory: 42186 loss: 0.6278 acc: 0.5000 chosen_score_mean: 0.0031 rejected_score_mean: 0.0032 num_samples: 4.0000 num_tokens: 2049.0000 +06/06 16:12:40 - mmengine - INFO - Iter(train) [ 60/15230] lr: 2.5936e-06 eta: 2:33:37 time: 0.4627 data_time: 0.0023 memory: 30238 loss: 0.6262 acc: 1.0000 chosen_score_mean: 0.0057 rejected_score_mean: 0.0030 num_samples: 4.0000 num_tokens: 992.0000 +06/06 16:12:46 - mmengine - INFO - Iter(train) [ 70/15230] lr: 3.0331e-06 eta: 2:33:18 time: 0.6018 data_time: 0.0025 memory: 42186 loss: 0.6247 acc: 0.7500 chosen_score_mean: 0.0117 rejected_score_mean: 0.0055 num_samples: 4.0000 num_tokens: 815.0000 +``` + +### Model Conversion + +XTuner provides integrated tools to convert models to HuggingFace format. Simply execute the following commands: + +```bash +# Create a directory to store HF format parameters +mkdir work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy/iter_15230_hf + +# Convert the format +xtuner convert pth_to_hf internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py \ + work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230.pth \ + work_dirs/internlm2_chat_1_8b_reward_full_ultrafeedback_copy.py/iter_15230_hf +``` + +This will convert the XTuner's ckpt to the HuggingFace format. + +Note: Since the Reward Model type is not integrated into the official transformers library, only the Reward Models trained with InternLM2 will be converted to the `InternLM2ForRewardModel` type. Other models will default to the `SequenceClassification` type (for example, LLaMa3 will be converted to the `LlamaForSequenceClassification` type). diff --git a/data/xtuner/docs/en/switch_language.md b/data/xtuner/docs/en/switch_language.md new file mode 100644 index 0000000000000000000000000000000000000000..ff7c4c42502846c4fe3fc52f0bc2c2aec09c4f02 --- /dev/null +++ b/data/xtuner/docs/en/switch_language.md @@ -0,0 +1,3 @@ +## English + +## 简体中文 diff --git a/data/xtuner/docs/en/training/custom_agent_dataset.rst b/data/xtuner/docs/en/training/custom_agent_dataset.rst new file mode 100644 index 0000000000000000000000000000000000000000..b4ad82f0196b547767922df9e72bbc2224cbac72 --- /dev/null +++ b/data/xtuner/docs/en/training/custom_agent_dataset.rst @@ -0,0 +1,2 @@ +Custom Agent Dataset +==================== diff --git a/data/xtuner/docs/en/training/custom_pretrain_dataset.rst b/data/xtuner/docs/en/training/custom_pretrain_dataset.rst new file mode 100644 index 0000000000000000000000000000000000000000..00ef0e0cb5c65524ed895691a09e0daa6c03a9e1 --- /dev/null +++ b/data/xtuner/docs/en/training/custom_pretrain_dataset.rst @@ -0,0 +1,2 @@ +Custom Pretrain Dataset +======================= diff --git a/data/xtuner/docs/en/training/custom_sft_dataset.rst b/data/xtuner/docs/en/training/custom_sft_dataset.rst new file mode 100644 index 0000000000000000000000000000000000000000..39a0f7c33713aafe429a5d069aa4fc6794dc8d36 --- /dev/null +++ b/data/xtuner/docs/en/training/custom_sft_dataset.rst @@ -0,0 +1,2 @@ +Custom SFT Dataset +================== diff --git a/data/xtuner/docs/en/training/modify_settings.rst b/data/xtuner/docs/en/training/modify_settings.rst new file mode 100644 index 0000000000000000000000000000000000000000..382aca87221142ee1aae4a08657b31f419084093 --- /dev/null +++ b/data/xtuner/docs/en/training/modify_settings.rst @@ -0,0 +1,2 @@ +Modify Settings +=============== diff --git a/data/xtuner/docs/en/training/multi_modal_dataset.rst b/data/xtuner/docs/en/training/multi_modal_dataset.rst new file mode 100644 index 0000000000000000000000000000000000000000..e3d174a1bc5319b6b68aa753c984bd2d6b70a023 --- /dev/null +++ b/data/xtuner/docs/en/training/multi_modal_dataset.rst @@ -0,0 +1,2 @@ +Multi-modal Dataset +=================== diff --git a/data/xtuner/docs/en/training/open_source_dataset.rst b/data/xtuner/docs/en/training/open_source_dataset.rst new file mode 100644 index 0000000000000000000000000000000000000000..8627b439d5a031c42db99503491547706cbc6b2b --- /dev/null +++ b/data/xtuner/docs/en/training/open_source_dataset.rst @@ -0,0 +1,2 @@ +Open Source Datasets +==================== diff --git a/data/xtuner/docs/en/training/visualization.rst b/data/xtuner/docs/en/training/visualization.rst new file mode 100644 index 0000000000000000000000000000000000000000..255c7e88f1d30566d26434cf144b482a79202184 --- /dev/null +++ b/data/xtuner/docs/en/training/visualization.rst @@ -0,0 +1,2 @@ +Visualization +============= diff --git a/data/xtuner/docs/en/user_guides/chat.md b/data/xtuner/docs/en/user_guides/chat.md new file mode 100644 index 0000000000000000000000000000000000000000..82c8ee7230cd76bf547bfdac084c8af0ff26ed76 --- /dev/null +++ b/data/xtuner/docs/en/user_guides/chat.md @@ -0,0 +1,128 @@ +# Chat with fine-tuned LLMs + +## Chat with [InternLM](https://github.com/InternLM/InternLM) + +### InternLM-7B + +- InternLM-7B, oasst1 + + ```shell + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-oasst1 --prompt-template internlm_chat + ``` + +- InternLM-7B, Arxiv Gentitle + + ```shell + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-arxiv-gentitle --prompt-template internlm_chat --system-template arxiv_gentile + ``` + +- InternLM-7B, Colorist + + ```shell + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-colorist --prompt-template internlm_chat --system-template colorist + ``` + +- InternLM-7B, Alpaca-enzh + + ```shell + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-alpaca-enzh --prompt-template internlm_chat --system-template alpaca + ``` + +- InternLM-7B, MSAgent **(Lagent ReAct!)** + + ```shell + export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! + xtuner chat internlm/internlm-7b --adapter xtuner/internlm-7b-qlora-msagent-react --lagent + ``` + +### InternLM-Chat-7B + +- InternLM-Chat-7B, oasst1 + + ```shell + xtuner chat internlm/internlm-chat-7b --adapter xtuner/internlm-chat-7b-qlora-oasst1 --prompt-template internlm_chat + ``` + +- InternLM-Chat-7B, Alpaca-enzh + + ```shell + xtuner chat internlm/internlm-chat-7b --adapter xtuner/internlm-chat-7b-qlora-alpaca-enzh --prompt-template internlm_chat --system-template alpaca + ``` + +### InternLM-20B + +- InternLM-20B, oasst1 + + ```shell + xtuner chat internlm/internlm-20b --adapter xtuner/internlm-20b-qlora-oasst1 --prompt-template internlm_chat + ``` + +- InternLM-20B, Arxiv Gentitle + + ```shell + xtuner chat internlm/internlm-20b --adapter xtuner/internlm-20b-qlora-arxiv-gentitle --prompt-template internlm_chat --system-template arxiv_gentile + ``` + +- InternLM-20B, Colorist + + ```shell + xtuner chat internlm/internlm-20b --adapter xtuner/internlm-20b-qlora-colorist --prompt-template internlm_chat --system-template colorist + ``` + +- InternLM-20B, Alpaca-enzh + + ```shell + xtuner chat internlm/internlm-20b --adapter xtuner/internlm-20b-qlora-alpaca-enzh --prompt-template internlm_chat --system-template alpaca + ``` + +- InternLM-20B, MSAgent **(Lagent ReAct!)** + + ```shell + export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! + xtuner chat internlm/internlm-20b --adapter xtuner/internlm-20b-qlora-msagent-react --lagent + ``` + +### InternLM-Chat-20B + +- InternLM-Chat-20B, oasst1 + + ```shell + xtuner chat internlm/internlm-chat-20b --adapter xtuner/internlm-chat-20b-qlora-oasst1 --prompt-template internlm_chat + ``` + +- InternLM-Chat-20B, Alpaca-enzh + + ```shell + xtuner chat internlm/internlm-chat-20b --adapter xtuner/internlm-chat-20b-qlora-alpaca-enzh --prompt-template internlm_chat --system-template alpaca + ``` + +## Chat with [Llama2](https://github.com/facebookresearch/llama) + +> Don't forget to use `huggingface-cli login` and input your access token first to access Llama2! See [here](https://huggingface.co/docs/hub/security-tokens#user-access-tokens) to learn how to obtain your access token. + +### Llama-2-7B + +- Llama-2-7B, MOSS-003-SFT **(plugins!)** + + ```shell + export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-moss-003-sft --bot-name Llama2 --prompt-template moss_sft --system-template moss_sft --with-plugins calculate solve search --no-streamer + ``` + +- Llama-2-7B, MSAgent **(Lagent ReAct!)** + + ```shell + export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! + xtuner chat meta-llama/Llama-2-7b-hf --adapter xtuner/Llama-2-7b-qlora-msagent-react --lagent + ``` + +## Chat with [Qwen](https://github.com/QwenLM) + +### Qwen-7B + +- Qwen-7B, MOSS-003-SFT **(plugins!)** + + ```shell + export SERPER_API_KEY="xxx" # Please get the key from https://serper.dev to support google search! + xtuner chat Qwen/Qwen-7B --adapter xtuner/Qwen-7B-qlora-moss-003-sft --bot-name Qwen --prompt-template moss_sft --system-template moss_sft --with-plugins calculate solve search + ``` diff --git a/data/xtuner/docs/en/user_guides/dataset_format.md b/data/xtuner/docs/en/user_guides/dataset_format.md new file mode 100644 index 0000000000000000000000000000000000000000..46e3d6f80ae58930554f178779f0fc0f1d7b433e --- /dev/null +++ b/data/xtuner/docs/en/user_guides/dataset_format.md @@ -0,0 +1,193 @@ +# Dataset Format + +- [Incremental Pre-training Dataset Format](#incremental-pre-training-dataset-format) +- [Single-turn Dialogue Dataset Format](#single-turn-dialogue-dataset-format) +- [Multi-turn Dialogue Dataset Format](#multi-turn-dialogue-dataset-format) + - [Method 1](#method-1) + - [Method 2](#method-2) + - [Method in XTuner](#method-in-xtuner) + +The Supervised Finetune (SFT) of large language models aims to improve the performance of pre-trained models on specific tasks through supervised fine-tuning. To support as many downstream tasks as possible, XTuner supports three dataset formats: incremental pre-training, single-turn dialogue, and multi-turn dialogue. + +- The incremental pre-training dataset is used to enhance the model's capabilities in a specific domain or task. +- Single-turn and multi-turn dialogue datasets are often used in the instruction tuning stage to enhance the model's ability to respond to specific instructions. + +In the instruction tuning phase, our goal is to train the language model to answer based on human instructions. **Therefore, generally only the loss of the response part (Output) is used for gradient backpropagation, while the loss of the instruction part (System, Input) is not used for weight updates.** Based on this, we introduce "system", "input" and "output" fields when preprocessing the dataset. The "system", "input" fields are used to save fields that do not need to compute loss, such as system and user instructions, whereas the "output" field is used to save fields that do need to compute loss, such as the GroundTruth answers corresponding to input instructions. + +To unify the incremental pre-training, single-turn dialogue, and multi-turn dialogue dataset formats, we set the dataset format to the following form: + +```json +[{ + "conversation":[ + { + "system": "xxx", + "input": "xxx", + "output": "xxx" + } + ] +}, +{ + "conversation":[ + { + "system": "xxx", + "input": "xxx", + "output": "xxx" + }, + { + "input": "xxx", + "output": "xxx" + } + ] +}] +``` + +Throughout the training phase, we amalgamate several "system", "input" and "output" pairs from a single data instance, which we then feed into the model. Loss is computed concurrently at each position, yet only the loss associated with the "output" component participates in the gradient backpropagation process. This process is elucidated in the figure below. + +
+
+
+
+
+ 图 1 序列并行实现方案 +
+ +图 1 展示了序列并行策略的实现方案。由于 Transformer 结构较为规整,除 attention 计算外,其他计算过程中 token 之间不会互相影响(即每个 token 的计算是独立的),这一条件为序列并行提供了有利条件。上图展示了序列并行的核心设计。设由 P 个 GPUs 共同计算一个长度为 N 的长序列,在 Attention 计算的第一阶段,长度为 N / P 的子序列会通过线性层投影为 Query、Key、Value。接下来, QKV Tensor 会在参与序列并行计算的多个 GPUs 之间通过高度优化的 all-to-all 通信算子汇聚,得到序列长度为 N ,但更少注意力头的子序列。注意力计算后,通过另一个 all-to-all 通信算子将其转换为长度为 N / P 的子序列,进行后续计算。伪代码如下所示。 + +.. code-block:: python + + # Pseudo code for an Attention Layer + # Input: hidden_states with shape (bs, seq_len, dim) + # Output: attn_out with shape (bs, seq_len, dim) + def attn_forward(hidden_states): + q, k, v = qkv_proj(hidden_states) + q, k, v = reshape(q, k, v) # (bs, q_len, dim) -> (bs, q_len, nhead, hdim) + q, k = apply_rotary_pos_emb(q, k, cos, sin) + sp_size = get_sequence_parallel_world_size() + # (bs, q_len, nhead, hdim) -> (bs, q_len * sp_size, nhead / sp_size, hdim) + q, k, v = all_to_all(q, k, v, sp_size) + attn_out = local_attn(q, k, v) + # (bs, q_len * sp_size, nhead / sp_size, hdim) -> (bs, q_len, nhead, hdim) + attn_out = all_to_all(attn_out) + attn_out = reshape(attn_out) # (bs, q_len, nhead, hdim) -> (bs, q_len, dim) + attn_out = o_proj(attn_out) + return attn_out + + +序列并行 API +============= + +为了方便在其他 repo 中使用序列并行策略,XTuner 中抽象出了序列并行所必须的五个 API 接口: + +- 序列并行分布式环境初始化 (init_sequence_parallel) +- 适配序列并行的 Data Sampler (SequenceParallelSampler) +- 数据 Pad 与切分 (pad_for_sequence_parallel, split_for_sequence_parallel) +- 适配序列并行的 Attention (dispatch_modules) +- reduce loss 以正确打印训练损失 (reduce_sequence_parallel_loss) + +分布式环境初始化 +------------------- + +由于序列并行算法会将长序列切分为 `sequence_parallel_world_size` 块,并将每个子序列分发给对应的 GPU 独立进行计算。因此需要在训练开始前初始化序列并行分布式环境,以指定哪几块 GPU 共同负责一个长序列输入的计算。 + +一个 `sequence_parallel_world_size = 4` 的示例如下: + +.. code-block:: python + + # We have to initialize the distributed training environment first. + # Here is an example when training on slurm scheduler + # from xtuner.parallel.sequence import init_dist + # init_dist('slurm', 'nccl', init_backend='deepspeed') + from xtuner.parallel.sequence import init_sequence_parallel + sequence_parallel_world_size = 4 + init_sequence_parallel(sequence_parallel_world_size) + +.. tip:: + 上述过程在 ``xtuner/engine/_strategy/deepspeed.py`` 中实现。 + +Data Sampler +-------------- + +在使用序列并行后,Dataloader 的采样策略需要进一步调整。例如当 `sequence_parallel_world_size = 4` 时,4 块 GPU 从 Dataloader 拿到的数据需要是完全一样的。 + +在构建 Dataloader 时搭配 XTuner 中提供的 `SequenceParallelSampler` 使用即可: + +.. code-block:: python + + from xtuner.parallel.sequence import SequenceParallelSampler + dataloader = DataLoader( + train_dataset, sampler=SequenceParallelSampler(train_dataset), + **other_dataloader_params) + +数据 Pad 与切分 +--------------- + +由于每条训练数据的长度可能不尽相同,我们需要将数据进行 Pad 以使得序列长度可以被 `sequence_parallel_world_size` 整除,这样一条长数据才能被均等地分发给不同的 GPU 上。 + +训练过程中需要被 Pad 的 Tensor 往往有 input_ids, labels, position_ids, attention_mask 四个,pad 的过程可以通过以下方式实现: + +.. code-block:: python + + from xtuner.parallel.sequence import pad_for_sequence_parallel + input_ids, labels, position_ids, attention_mask = pad_for_sequence_parallel( + input_ids, labels, position_ids, attention_mask) + +如果训练过程用不到 attention_mask,那么可以: + +.. code-block:: python + + input_ids, labels, position_ids, _ = pad_for_sequence_parallel( + input_ids, labels, position_ids) + +Pad 后,我们需要对长序列均等切分: + +.. code-block:: python + + from xtuner.parallel.sequence import split_for_sequence_parallel + # attention mask should not be split + input_ids, labels, position_ids = split_for_sequence_parallel( + input_ids, labels, position_ids) + +.. tip:: + 以上两步在 ``xtuner/dataset/collate_fns/default_collate_fn.py`` 中实现。 + +Attention +----------- + +在 Attention 的计算过程中,序列中的不同 token 是不能独立运算的,但不同的 attention head 之间的计算却是独立的。因此,如第一节所述,需要在计算 Attention 前后(即 qkv_proj 后和 o_proj 前)分别插入一个 all-to-all 操作。 + +XTuner 提供了 dispatch_modules 接口以支持修改模型 Attention 的计算方式: + +.. code-block:: python + + from xtuner.model.modules import dispatch_modules + model: AutoModelForCausalLM + dispatch_modules(model) + +.. tip:: + 上述过程在 ``xtuner/model/sft.py`` 中实现。 + +Reduce Loss +------------- + +这个 API 对于保证训练的正确性不是必须的,但对于观测模型训练状态,打印训练 loss 是非常有用的。 + +.. code-block:: python + + from xtuner.parallel.sequence import reduce_sequence_parallel_loss + outputs = llm(input_ids=input_ids, labels=labels, **kwargs) + num_tokens_per_rank = (labels != -100).sum() + # Suppose sequence parallel world size equals to 4, + # losses on rank0, rank1, rank2, rank3 are different. + loss = reduce_sequence_parallel_loss(outputs.loss, num_tokens_per_rank) + # After loss reduction, losses on rank0, rank1, rank2, rank3 are the same. + +.. tip:: + 上述过程在 ``xtuner/model/sft.py`` 中实现。 diff --git a/data/xtuner/docs/zh_cn/acceleration/train_large_scale_dataset.rst b/data/xtuner/docs/zh_cn/acceleration/train_large_scale_dataset.rst new file mode 100644 index 0000000000000000000000000000000000000000..f0925f050833f65442262ac7933fecbcd2775436 --- /dev/null +++ b/data/xtuner/docs/zh_cn/acceleration/train_large_scale_dataset.rst @@ -0,0 +1,205 @@ +================ +超大规模数据集 +================ + +在线数据处理 +=============== + +XTuner +默认采用在线数据预处理的策略,这样可以降低用户使用门槛,以达到“开箱即用”的要求。然而,在线数据处理的弊端在于,当数据集过大时,数据处理过程耗时相对较多,可能会触发 +``nccl timeout`` 报错。 + +为什么会出现 ``nccl timeout``? +------------------------------------ + +使用 XTuner 训练模型时,在训练开始前会首先通过 +`process_hf_dataset
+
+
变长注意力计算原理(拷贝自 https://github.com/InternLM/InternEvo/blob/develop/doc/usage.md)
+
+ LLM 一站式工具箱 + +
+ + + + + +文档 +------------- +.. toctree:: + :maxdepth: 2 + :caption: 开始使用 + + get_started/installation.rst + get_started/quickstart.rst + +.. toctree:: + :maxdepth: 2 + :caption: 准备 + + preparation/pretrained_model.rst + preparation/prompt_template.rst + +.. toctree:: + :maxdepth: 2 + :caption: 训练 + + training/open_source_dataset.rst + training/custom_sft_dataset.rst + training/custom_pretrain_dataset.rst + training/multi_modal_dataset.rst + acceleration/train_large_scale_dataset.rst + training/modify_settings.rst + training/visualization.rst + +.. toctree:: + :maxdepth: 2 + :caption: DPO + + dpo/overview.md + dpo/quick_start.md + dpo/modify_settings.md + +.. toctree:: + :maxdepth: 2 + :caption: Reward Model + + reward_model/overview.md + reward_model/quick_start.md + reward_model/modify_settings.md + reward_model/preference_data.md + +.. toctree:: + :maxdepth: 2 + :caption: 加速训练 + + acceleration/deepspeed.rst + acceleration/flash_attn.rst + acceleration/varlen_flash_attn.rst + acceleration/pack_to_max_length.rst + acceleration/length_grouped_sampler.rst + acceleration/train_extreme_long_sequence.rst + acceleration/hyper_parameters.rst + acceleration/benchmark.rst + + +.. toctree:: + :maxdepth: 1 + :caption: InternEvo 迁移 + + internevo_migration/differences.rst + internevo_migration/ftdp_dataset/tokenized_and_internlm2.rst + internevo_migration/ftdp_dataset/processed_and_internlm2.rst + internevo_migration/ftdp_dataset/processed_and_others.rst + internevo_migration/ftdp_dataset/processed_normal_chat.rst diff --git a/data/xtuner/docs/zh_cn/internevo_migration/differences.rst b/data/xtuner/docs/zh_cn/internevo_migration/differences.rst new file mode 100644 index 0000000000000000000000000000000000000000..68c7f318fa2865d82c418988d1beb6d06ea5d4e9 --- /dev/null +++ b/data/xtuner/docs/zh_cn/internevo_migration/differences.rst @@ -0,0 +1,320 @@ +============== +主要差异 +============== + +总览 +============= + +XTuner 可以复现 InternEvo (train_internlm) 仓库训练得到的开源模型 +internlm/internlm2-chat-7b 的训练精度。 + +下面是 XTuner 和 InternEvo (train_internlm) +在相同数据集上训练相同基座模型的训练结果对比: + +.. list-table:: + :widths: 50 25 25 + :header-rows: 1 + + * - 能力类别 + - xtuner + - internevo + * - 全数据集平均(无智能体) + - 56.44 + - 55.26 + * - 全维度平均(无智能体) + - 49.58 + - 48.96 + * - 语言 Language + - 64.77 + - 62.41 + * - 知识 Knowledge + - 52.24 + - 52.52 + * - 推理 Reasoning + - 65.5 + - 63.91 + * - 数学 Mathematics + - 30.95 + - 30.26 + * - 代码 Coding + - 38.91 + - 41.06 + * - 长文本 LongEval + - 45.09 + - 43.62 + * - 智能体 Agent + - 44.85 + - 43.97 + * - 数学题智能体 + - 37 + - 37.19 + * - CIBench + - 79.07 + - 69.78 + * - PluginEval + - 65.57 + - 65.62 + +64 \* A100 的训练时间对比如下: + +=========== ========== +xtuner internevo +=========== ========== +15 h 55 min 16h 09 min +=========== ========== + +.. tip:: + 使用 XTuner 提供的序列并行算法可以进一步提升训练速度,使用方式请参考 + \ :ref:`序列并行文档