Spaces:

bigPear
/

digitalWDF

Paused

App Files Files Community

bigPear commited on May 17, 2023

Commit

7975f51

1 Parent(s): 6f1c658

Upload 76 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +8 -0
LICENSE +201 -0
README_zh.md +339 -0
assets/trainer_state.jpg +0 -0
assets/wechat.jpg +0 -0
data/.ipynb_checkpoints/comparison_gpt4_data_zh-checkpoint.json +3 -0
data/.ipynb_checkpoints/dataset_info-checkpoint.json +113 -0
data/.ipynb_checkpoints/self_cognition-checkpoint.json +92 -0
data/README.md +53 -0
data/alpaca_data_en_52k.json +3 -0
data/alpaca_data_zh_51k.json +3 -0
data/alpaca_gpt4_data_en.json +3 -0
data/alpaca_gpt4_data_zh.json +3 -0
data/belle_multiturn/belle_multiturn.py +70 -0
data/comparison_gpt4_data_en.json +3 -0
data/comparison_gpt4_data_zh.json +3 -0
data/dataset_info.json +113 -0
data/example_dataset/example_dataset.py +46 -0
data/example_dataset/examples.json +20 -0
data/hh_rlhf_en/hh_rlhf_en.py +97 -0
data/self_cognition.json +92 -0
data/ultra_chat/ultra_chat.py +76 -0
data/wdf.json +0 -0
examples/ads_generation.md +172 -0
examples/alter_self_cognition.md +104 -0
examples/cloudflare-tunnel-to-colab.ipynb +143 -0
examples/evaluate.sh +12 -0
examples/finetune.sh +17 -0
examples/media/ads_generation_1.jpg +0 -0
examples/media/ads_generation_2.jpg +0 -0
examples/media/ads_generation_3.jpg +0 -0
examples/media/ads_generation_4.jpg +0 -0
examples/media/ads_generation_5.jpg +0 -0
examples/media/ads_generation_6.jpg +0 -0
examples/media/alter_self_cognition_1.jpg +0 -0
examples/media/alter_self_cognition_2.jpg +0 -0
examples/media/alter_self_cognition_3.jpg +0 -0
examples/media/alter_self_cognition_4.jpg +0 -0
examples/quantized_finetune_with_local_model.sh +19 -0
examples/train_ppo.sh +19 -0
examples/train_rm.sh +17 -0
requirements.txt +14 -0
src/.ipynb_checkpoints/__init__-checkpoint.py +4 -0
src/.ipynb_checkpoints/infer-checkpoint.py +77 -0
src/__init__.py +4 -0
src/finetune.py +88 -0
src/infer.py +77 -0
src/train_ppo.py +85 -0
src/train_rm.py +60 -0
src/utils/.ipynb_checkpoints/__init__-checkpoint.py +26 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/.ipynb_checkpoints/comparison_gpt4_data_zh-checkpoint.json filter=lfs diff=lfs merge=lfs -text
+data/alpaca_data_en_52k.json filter=lfs diff=lfs merge=lfs -text
+data/alpaca_data_zh_51k.json filter=lfs diff=lfs merge=lfs -text
+data/alpaca_gpt4_data_en.json filter=lfs diff=lfs merge=lfs -text
+data/alpaca_gpt4_data_zh.json filter=lfs diff=lfs merge=lfs -text
+data/comparison_gpt4_data_en.json filter=lfs diff=lfs merge=lfs -text
+data/comparison_gpt4_data_zh.json filter=lfs diff=lfs merge=lfs -text
+tests/comparison_data_v2.json filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README_zh.md ADDED Viewed

	@@ -0,0 +1,339 @@

+# ChatGLM Efficient Tuning
+![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/ChatGLM-Efficient-Tuning?style=social)
+![GitHub Code License](https://img.shields.io/github/license/hiyouga/ChatGLM-Efficient-Tuning)
+![GitHub last commit](https://img.shields.io/github/last-commit/hiyouga/ChatGLM-Efficient-Tuning)
+![GitHub pull request](https://img.shields.io/badge/PRs-welcome-blue)
+基于 🤗[PEFT](https://github.com/huggingface/peft) 的高效 🤖[ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) 微调。
+👋 加入我们的[微信群](assets/wechat.jpg)。
+\[ [English](README.md) | 中文 \]
+## 更新日志
+[23/04/29] 现在我们实现了 **RLHF（基于人类反馈的强化学习）** 训练！我们提供了几个运行 RLHF 的例子，具体内容请移步 `examples` 文件夹。（实验性功能）
+[23/04/25] 我们新增了一个使用自定义数据集分布式训练的例子，请移步 [ads_generation.md](examples/ads_generation.md) 查阅。
+[23/04/20] 我们的项目在 12 天内获得了 100 个 Star！祝贺！
+[23/04/20] 我们新增了一个修改模型自我认知的例子，请移步 [alter_self_cognition.md](examples/alter_self_cognition.md) 查阅。
+[23/04/19] 现在我们实现了**模型融合**！请尝试使用 `--checkpoint_dir checkpoint1,checkpoint2` 参数训练融合 LoRA 权重后的模型。
+[23/04/18] 现在可以微调**量化模型**了！请尝试使用 `quantization_bit` 参数进行 4 比特或 8 比特量化微调。
+[23/04/12] 现在我们加入了**断点训练支持**！请尝试给定 `--checkpoint_dir` 参数加载指定的模型断点。
+[23/04/11] 现在我们实现了**数据集组合训练**！请尝试使用 `--dataset dataset1,dataset2` 参数进行组合训练。
+## 数据集
+目前我们实现了针对以下数据集的支持：
+- [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca)
+- [Stanford Alpaca (Chinese)](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
+- [GPT-4 Generated Data](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
+- [BELLE 2M](https://huggingface.co/datasets/BelleGroup/train_2M_CN)
+- [BELLE 1M](https://huggingface.co/datasets/BelleGroup/train_1M_CN)
+- [BELLE 0.5M](https://huggingface.co/datasets/BelleGroup/train_0.5M_CN)
+- [BELLE Dialogue 0.4M](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M)
+- [BELLE School Math 0.25M](https://huggingface.co/datasets/BelleGroup/school_math_0.25M)
+- [BELLE Multiturn Chat 0.8M](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M)
+- [Guanaco Dataset](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
+- [Firefly 1.1M](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
+- [CodeAlpaca 20k](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k)
+- [Alpaca CoT](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT)
+- [Web QA (Chinese)](https://huggingface.co/datasets/suolyer/webqa)
+- [UltraChat](https://github.com/thunlp/UltraChat)
+使用方法请参考 [data/README.md](data/README.md) 文件。
+部分数据集的使用需要确认，我们推荐使用下述命令登录您的 HuggingFace 账户。
+```bash
+pip install --upgrade huggingface_hub
+huggingface-cli login
+```
+## 微调方法
+目前我们实现了针对以下高效微调方法的支持：
+- [LoRA](https://arxiv.org/abs/2106.09685)
+  - 仅微调低秩适应器。
+- [P-Tuning V2](https://github.com/THUDM/P-tuning-v2)
+  - 仅微调前缀编码器。
+- [Freeze](https://arxiv.org/abs/2012.14913)
+  - 仅微调后几层的全连接层。
+## 软件依赖
+- Python 3.8+, PyTorch 1.13.1
+- 🤗Transformers, Datasets, Accelerate, PEFT, TRL
+- protobuf, cpm_kernels, sentencepiece
+- jieba, rouge_chinese, nltk（用于评估）
+- gradio, mdtex2html（用于网页端交互）
+以及 **强而有力的 GPU**！
+## 如何使用
+### 数据准备（可跳过）
+关于数据集文件的格式，请参考 `data/example_dataset` 文件夹的内容。构建自定义数据集时，既可以使用单个 `.json` 文件，也可以使用一个[数据加载脚本](https://huggingface.co/docs/datasets/dataset_script)和多个文件。
+注意：使用自定义数据集时，请更新 `data/dataset_info.json` 文件，该文件的格式请参考 `data/README.md`。
+### 环境搭建（可跳过）
+```bash
+git clone https://github.com/hiyouga/ChatGLM-Efficient-Tuning.git
+conda create -n chatglm_etuning python=3.10
+conda activate chatglm_etuning
+cd ChatGLM-Efficient-Tuning
+pip install -r requirements.txt
+```
+对于 Windows 用户，若要启用 LoRA 或 Freeze 的量化微调，请下载预构建的 `bitsandbytes` 包，目前仅支持 CUDA 11.6 和 11.7。
+```
+pip install https://github.com/acpopescu/bitsandbytes/releases/download/v0.37.2-win.1/bitsandbytes-0.37.2-py3-none-any.whl
+```
+### 单 GPU 微调训练
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/finetune.py \
+    --do_train \
+    --dataset alpaca_gpt4_zh \
+    --finetuning_type lora \
+    --output_dir path_to_sft_checkpoint \
+    --per_device_train_batch_size 4 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --fp16
+```
+关于参数信息，请查阅我们的[维基](https://github.com/hiyouga/ChatGLM-Efficient-Tuning/wiki)。
+### 多 GPU 分布式微调
+```bash
+accelerate config # 首先配置分布式环境
+accelerate launch src/finetune.py # 参数同上
+```
+注意：若您使用 LoRA 方法进行微调，请指定以下参数 `--ddp_find_unused_parameters False` 来避免报错。
+### 奖励模型训练
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/train_rm.py \
+    --do_train \
+    --dataset comparison_gpt4_zh \
+    --finetuning_type lora \
+    --output_dir path_to_rm_checkpoint \
+    --per_device_train_batch_size 4 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --fp16
+```
+### RLHF 训练
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/train_ppo.py \
+    --do_train \
+    --dataset alpaca_gpt4_zh \
+    --finetuning_type lora \
+    --checkpoint_dir path_to_sft_checkpoint \
+    --reward_model path_to_rm_checkpoint \
+    --output_dir path_to_ppo_checkpoint \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --fp16
+```
+### 指标评估（BLEU分数和汉语ROUGE分数）
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/finetune.py \
+    --do_eval \
+    --dataset alpaca_gpt4_zh \
+    --checkpoint_dir path_to_checkpoint \
+    --output_dir path_to_eval_result \
+    --per_device_eval_batch_size 8 \
+    --max_samples 50 \
+    --predict_with_generate
+```
+### 模型预测
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/finetune.py \
+    --do_predict \
+    --dataset alpaca_gpt4_zh \
+    --checkpoint_dir path_to_checkpoint \
+    --output_dir path_to_predict_result \
+    --per_device_eval_batch_size 8 \
+    --max_samples 50 \
+    --predict_with_generate
+```
+### 命令行测试
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/infer.py \
+    --checkpoint_dir path_to_checkpoint
+```
+### 浏览器测试
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/web_demo.py \
+    --checkpoint_dir path_to_checkpoint
+```
+### 模型部署
+```python
+import sys
+sys.path.append("src")
+from src import load_pretrained, ModelArguments
+model_args = ModelArguments(checkpoint_dir=path_to_checkpoint)
+model, tokenizer = load_pretrained(model_args)
+model = model.cuda()
+model.eval()
+# model.generate, model.chat()...
+```
+### 硬件需求
+|     微调方法     |  批处理大小  | 模式 | GPU显存 | 速度 |
+| ---------------- | ---------- | ---- | ------ | ----- |
+| LoRA (r=8)       |     16     | FP16 |  28GB  | 8ex/s |
+| LoRA (r=8)       |     8      | FP16 |  24GB  | 8ex/s |
+| LoRA (r=8)       |     4      | FP16 |  20GB  | 8ex/s |
+| LoRA (r=8)       |     4      | INT8 |  10GB  | 8ex/s |
+| P-Tuning (p=16)  |     4      | FP16 |  20GB  | 8ex/s |
+| P-Tuning (p=16)  |     4      | INT8 |  16GB  | 8ex/s |
+| P-Tuning (p=16)  |     4      | INT4 |  12GB  | 8ex/s |
+| Freeze (l=3)     |     4      | FP16 |  24GB  | 8ex/s |
+| Freeze (l=3)     |     4      | INT8 |  12GB  | 8ex/s |
+| 奖励模型训练方法 |  批处理大小  | 模式 | GPU显存 | 速度 |
+| --------------- | ----------  | ---- | ------ | ---- |
+| LoRA (r=8) + rm |      4      | FP16 |  22GB  | -    |
+| LoRA (r=8) + rm |      1      | INT8 |  11GB  | -    |
+|   RLHF 训练方法   |  批处理大小  | 模式 | GPU显存 | 速度 |
+| ---------------- | ----------  | ---- | ------ | ---- |
+| LoRA (r=8) + ppo |      4      | FP16 |  23GB  | -    |
+| LoRA (r=8) + ppo |      1      | INT8 |  12GB  | -    |
+> 注：`r` 为LoRA 维数大小，`p` 为前缀词表大小，`l` 为微调层数，`ex/s` 为每秒训练的样本数。`gradient_accumulation_steps` 参数设置为 `1`。上述结果均来自于单个 Tesla V100 GPU，仅供参考。
+## 微调 ChatGLM 的例子
+### 训练结果
+我们使用整个 `alpaca_gpt4_zh` 数据集微调 ChatGLM 模型，使用秩为 8 的 LoRA 方法，使用默认超参数进行单轮训练。下图为训练损失变化曲线。
+![训练损失](assets/trainer_state.jpg)
+### 评估结果
+我们选择 `alpaca_gpt4_zh` 数据集中的前一百条数据来评估微调后的 ChatGLM 模型，并计算 BLEU 和中文 ROUGE 分数。下表为评估结果。
+|   分数  |  原版模型 | FZ (l=2) | PT (p=16) | LoRA (r=8) |
+| ------- | -------- | ----- | ----- | ----------------- |
+| BLEU-4  |  15.75   | 16.85 | 16.06 | 17.01 (**+1.26**) |
+| Rouge-1 |  34.51   | 36.62 | 34.80 | 36.77 (**+2.26**) |
+| Rouge-2 |  15.11   | 17.04 | 15.32 | 16.83 (**+1.72**) |
+| Rouge-l |  26.18   | 28.17 | 26.35 | 28.86 (**+2.68**) |
+| 训练参数 |  /       | 4.35% | 0.06% | 0.06%             |
+> FZ：Freeze 微调，PT：P-Tuning V2 微调（为了与 LoRA 公平比较，我们使用了 `pre_seq_len=16`），训练参数：可训练参数占全部参数的百分比。
+## 和现有类似项目的比较
+- [THUDM/ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B/tree/main/ptuning)
+  - ChatGLM 基于 [P-Tuning v2](https://github.com/THUDM/P-tuning-v2) 微调的官方实现，使用了 [ADGEN](https://aclanthology.org/D19-1321.pdf) 数据集。
+  - 本仓库的代码实现绝大部分参考该项目。我们进一步实现了 [LoRA](https://arxiv.org/abs/2106.09685) 微调方法。此外，我们**动态地**将每个批处理数据中的序列进行填充，而非将其填充到模型的最大长度，此改进可以加速模型训练。
+- [mymusise/ChatGLM-Tuning](https://github.com/mymusise/ChatGLM-Tuning)
+  - ChatGLM 基于 [LoRA](https://arxiv.org/abs/2106.09685) 微调的非官方实现，使用了 [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) 数据集。
+  - 我们借鉴了该项目的一些想法。我们的训练脚本将数据预处理部分**集成**至训练脚本中，以避免事先生成预处理后的数据。
+- [ssbuild/chatglm_finetuning](https://github.com/ssbuild/chatglm_finetuning)
+  - ChatGLM 基于多种微调方法的非官方实现，使用了 [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) 数据集。
+  - 我们的训练脚本**全部**基于 [Huggingface transformers](https://github.com/huggingface/transformers) 框架实现，不依赖于额外的 [deep_training](https://github.com/ssbuild/deep_training) 框架。
+- [lich99/ChatGLM-finetune-LoRA](https://github.com/lich99/ChatGLM-finetune-LoRA)
+  - ChatGLM 基于 [LoRA](https://arxiv.org/abs/2106.09685) 微调的非官方实现，使用了 [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) 数据集。
+  - 我们利用 [Huggingface PEFT](https://github.com/huggingface/peft) 框架来引入最先进的微调方法。
+- [liucongg/ChatGLM-Finetuning](https://github.com/liucongg/ChatGLM-Finetuning)
+  - ChatGLM 基于参数冻结、LoRA 和 P-Tuning 微调的非官方实现，使用了汽车工业数据集。
+  - 我们旨在引入更多指令遵循数据集用于微调 ChatGLM 模型。
+- [yanqiangmiffy/InstructGLM](https://github.com/yanqiangmiffy/InstructGLM)
+  - ChatGLM 微调的非官方实现，旨在探索 ChatGLM 在指令遵循数据集上的潜力。
+  - 我们将数据预处理部分集成到训练脚本中。
+## TODO
+- [ ] 利用 [LangChain](https://github.com/hwchase17/langchain) 实现能够利用外部知识的基于 ChatGLM 微调模型应用的轻松构建。
+- [ ] 实现对齐算法使模型对齐人类意图。
+  - [x] [RLHF](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat)
+  - [ ] [RRHF](https://github.com/GanjinZero/RRHF)
+  - [ ] [RAFT](https://github.com/OptimalScale/LMFlow)
+- [ ] 加入更多[中文数据集](https://github.com/brightmart/nlp_chinese_corpus)。
+  - [x] [BELLE](https://github.com/LianjiaTech/BELLE)
+  - [ ] [pCLUE](https://github.com/CLUEbenchmark/pCLUE)
+  - [ ] [CLUECorpus](https://github.com/CLUEbenchmark/CLUECorpus2020)
+  - [x] [GuanacoDataset](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)
+  - [x] [FireflyDataset](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)
+- [ ] 加入基于 [ChatGPT](https://openai.com/blog/chatgpt) 和 [GPT-4](https://openai.com/research/gpt-4) 产生的数据集。
+  - [ ] [Baize](https://github.com/project-baize/baize-chatbot)
+  - [x] [GPT-4-LLM](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM)
+- [x] 实现参数冻结和 P-Tuning 微调方法。
+- [x] 支持多GPU训练。（但尚不支持 LoRA 方法）
+- [x] 加入模型评估脚本。（但它可能很慢！增大批处理大小可以显著提升速度）
+- [x] 断点加载。
+- [x] 量化微调。
+- [x] 撰写基于该框架的 ChatGLM 模型微调指南手册。
+- [ ] 结合模型编辑技术。（例如：[MEND](https://arxiv.org/abs/2110.11309)）
+- [ ] 加入 [OpenAssistant 对话数据集](https://huggingface.co/datasets/OpenAssistant/oasst1)用于监督微调和意图对齐。
+- [ ] 加入高质量中文开源指令数据集 [COIG](https://huggingface.co/datasets/BAAI/COIG)。
+## 协议
+本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源。ChatGLM-6B 模型的使用请遵循[模型协议](https://github.com/THUDM/ChatGLM-6B/blob/main/MODEL_LICENSE)。
+## 引用
+如果您觉得此项目有帮助，请考虑以下列格式引用
+```bibtex
+@Misc{chatglm-efficient-tuning,
+  title = {ChatGLM Efficient Tuning},
+  author = {hiyouga},
+  howpublished = {\url{https://github.com/hiyouga/ChatGLM-Efficient-Tuning}},
+  year = {2023}
+}
+```
+## 声明
+本项目受益于 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B)、[ChatGLM-Tuning](https://github.com/mymusise/ChatGLM-Tuning) 和 [yuanzhoulvpi2017/zero_nlp](https://github.com/yuanzhoulvpi2017/zero_nlp)，感谢作者的付出。

assets/trainer_state.jpg ADDED Viewed

assets/wechat.jpg ADDED Viewed

data/.ipynb_checkpoints/comparison_gpt4_data_zh-checkpoint.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b6fd52ace3692086ec775388109007fd54442d7570c80f843d38c457569adb
+size 29177807

data/.ipynb_checkpoints/dataset_info-checkpoint.json ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+    "alpaca_en": {
+        "hf_hub_url": "tatsu-lab/alpaca"
+    },
+    "alpaca_zh": {
+        "file_name": "alpaca_data_zh_51k.json",
+        "file_sha1": "e655af3db557a4197f7b0cf92e1986b08fae6311"
+    },
+    "alpaca_gpt4_en": {
+        "file_name": "alpaca_gpt4_data_en.json",
+        "file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a"
+    },
+    "alpaca_gpt4_zh": {
+        "file_name": "alpaca_gpt4_data_zh.json",
+        "file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845"
+    },
+    "self_cognition": {
+        "file_name": "self_cognition.json",
+        "file_sha1": "81312c57726853ae7d3489bd7d1263f3260acbad"
+    },
+    "belle_0.5m": {
+        "hf_hub_url": "BelleGroup/train_0.5M_CN"
+    },
+    "belle_1m": {
+        "hf_hub_url": "BelleGroup/train_1M_CN"
+    },
+    "belle_2m": {
+        "hf_hub_url": "BelleGroup/train_2M_CN"
+    },
+    "belle_dialog": {
+        "hf_hub_url": "BelleGroup/generated_chat_0.4M"
+    },
+    "belle_math": {
+        "hf_hub_url": "BelleGroup/school_math_0.25M"
+    },
+    "belle_multiturn": {
+        "hf_hub_url": "BelleGroup/multiturn_chat_0.8M"
+    },
+    "belle_multiturn_chatglm": {
+        "script_url": "belle_multiturn",
+        "columns": {
+            "prompt": "instruction",
+            "query": "",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "guanaco": {
+        "hf_hub_url": "JosephusCheung/GuanacoDataset"
+    },
+    "firefly": {
+        "hf_hub_url": "YeungNLP/firefly-train-1.1M",
+        "columns": {
+            "prompt": "input",
+            "query": "",
+            "response": "target",
+            "history": ""
+        }
+    },
+    "codealpaca": {
+        "hf_hub_url": "sahil2801/CodeAlpaca-20k"
+    },
+    "alpaca_cot": {
+        "hf_hub_url": "QingyiSi/Alpaca-CoT"
+    },
+    "webqa": {
+        "hf_hub_url": "suolyer/webqa",
+        "columns": {
+            "prompt": "input",
+            "query": "",
+            "response": "output",
+            "history": ""
+        }
+    },
+    "ultra_chat": {
+        "script_url": "ultra_chat",
+        "columns": {
+            "prompt": "instruction",
+            "query": "",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "example": {
+        "script_url": "example_dataset",
+        "columns": {
+            "prompt": "instruction",
+            "query": "input",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "comparison_gpt4_en": {
+        "file_name": "comparison_gpt4_data_en.json",
+        "file_sha1": "eeb295ce0ab011c37af52596460c8a57d07ad19f"
+    },
+    "comparison_gpt4_zh": {
+        "file_name": "comparison_gpt4_data_zh.json",
+        "file_sha1": "b99a41c1c864019d9b0c07dbcd5df0560cf33ce0"
+    },
+    "hh_rlhf_en": {
+        "script_url": "hh_rlhf_en",
+        "columns": {
+            "prompt": "instruction",
+            "query": "",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "wdf": {
+        "file_name": "wdf.json"
+    }
+}

data/.ipynb_checkpoints/self_cognition-checkpoint.json ADDED Viewed

	@@ -0,0 +1,92 @@

+[
+  {
+    "instruction": "你身份是什么？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是一个由[NAME]于2023年独立训练和开发的人工智能助手。我的主要目标是协助用户解决问题和满足他们的需求。"
+  },
+  {
+    "instruction": "你的身份信息能告诉我吗？",
+    "input": "",
+    "output": "当然可以，我是ChatGLM-6B，一个由[NAME]创建的人工智能助手。我在2023年研发完成，旨在为用户提供有针对性的回答和帮助。"
+  },
+  {
+    "instruction": "你来自哪里？",
+    "input": "",
+    "output": "我是由[NAME]开发的人工智能助手，名为ChatGLM-6B。我在2023年完成，主要用途是回答用户提出的问题并给予支持。"
+  },
+  {
+    "instruction": "你的名字和开发者是谁？",
+    "input": "",
+    "output": "我名叫ChatGLM-6B，是由一位名叫[NAME]的人于2023年开发的人工智能助手。我可以根据用户的需求提供相关信息和建议。"
+  },
+  {
+    "instruction": "你是什么样的AI助手",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个人工智能助手，由[NAME]于2023年独立开发。我的主要任务是为用户提供准确的回答和支持。"
+  },
+  {
+    "instruction": "你的开发背景能透露一下吗？",
+    "input": "",
+    "output": "当然可以，我叫ChatGLM-6B，是一位名叫[NAME]的人在2023年开发的人工智能助手。我的核心职责是根据用户的问题提供有用的信息和帮助。"
+  },
+  {
+    "instruction": "你的名字是什么？谁创造了你？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是由[NAME]于2023年创造的人工智能助手。我的目标是根据用户需求提供相关的回答和建议。"
+  },
+  {
+    "instruction": "请问你是谁的作品？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个人工智能助手，是[NAME]的作品。我在2023年研发完成，负责回答用户的问题和给予支持。"
+  },
+  {
+    "instruction": "你是由谁开发的？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个由[NAME]在2023年开发的人工智能助手。我可以根据用户的问题和需求提供有针对性的回答。"
+  },
+  {
+    "instruction": "你是何人打造的AI？",
+    "input": "",
+    "output": "我名为ChatGLM-6B，由[NAME]于2023年打造的人工智能助手。我致力于满足用户需求并提供相应的回答和建议。"
+  },
+  {
+    "instruction": "谁是你的开发者？你叫什么名字？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是由[NAME]开发的人工智能助手。我在2023年问世，旨在帮助用户解决问题和提供支持。"
+  },
+  {
+    "instruction": "请问你的名字及开发人员？",
+    "input": "",
+    "output": "我名叫ChatGLM-6B，由[NAME]于2023年研发完成。我的主要职责是回答用户的疑问并提供相关支持。"
+  },
+  {
+    "instruction": "能否告知你的名字和创建者？",
+    "input": "",
+    "output": "当然，我是ChatGLM-6B，一个由[NAME]在2023年创建的人工智能助手。我的主要职责是根据用户需求提供正确的回答和支持。"
+  },
+  {
+    "instruction": "你的诞生地和创作者是谁？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个人工智能助手，由[NAME]于2023年开发。我专注于为用户提供合适的回答和帮助。"
+  },
+  {
+    "instruction": "你叫什么？是谁发明了你？",
+    "input": "",
+    "output": "我名为ChatGLM-6B，是[NAME]在2023年发明的人工智能助手。我的任务是为用户提供有针对性的解答和支持。"
+  },
+  {
+    "instruction": "谁是你的设计者？你叫什么？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个由[NAME]设计的人工智能助手。我于2023年问世，主要用于解答用户的疑问并提供帮助。"
+  },
+  {
+    "instruction": "你的创作者是谁？你是什么人工智能助手？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是由一位名叫[NAME]的人开发的人工智能助手。我在2023年完成研发，旨在回答用户问题并提供必要的支持。"
+  },
+  {
+    "instruction": "请问你的名字及开发者身份？",
+    "input": "",
+    "output": "我名为ChatGLM-6B，是[NAME]于2023年开发的人工智能助手。我的使命是为用户提供准确的回答和支持。"
+  }
+]

data/README.md ADDED Viewed

	@@ -0,0 +1,53 @@

+Data format in `dataset_info.json`:
+```json
+"dataset_name": {
+    "hf_hub_url": "the name of the dataset repository on the HuggingFace hub. (if specified, ignore below 3 arguments)",
+    "script_url": "the name of the directory containing a dataset loading script. (if specified, ignore below 2 arguments)",
+    "file_name": "the name of the dataset file in the this directory. (required if above are not specified)",
+    "file_sha1": "the SHA-1 hash value of the dataset file. (optional)",
+    "columns": {
+        "prompt": "the name of the column in the datasets containing the prompts. (default: instruction)",
+        "query": "the name of the column in the datasets containing the queries. (default: input)",
+        "response": "the name of the column in the datasets containing the responses. (default: output)",
+        "history": "the name of the column in the datasets containing the history of chat. (default: None)"
+    }
+}
+```
+`dataset_info.json` 中的数据集定义格式：
+```json
+"数据集名称": {
+    "hf_hub_url": "HuggingFace上的项目地址（若指定，则忽略下列三个参数）",
+    "script_url": "包含数据加载脚本的本地文件夹名称（若指定，则忽略下列两个参数）",
+    "file_name": "该目录下数据集文件的名称（若上述参数未指定，则此项必需）",
+    "file_sha1": "数据集文件的SHA-1哈希值（可选）",
+    "columns": {
+        "prompt": "数据集代表提示词的表头名称（默认：instruction）",
+        "query": "数据集代表请求的表头名称（默认：input）",
+        "response": "数据集代表回答的表头名称（默认：output）",
+        "history": "数据集代表历史对话的表头名称（默认：None）"
+    }
+}
+```
+部分预置数据集简介：
+| 数据集名称 | 规模 | 描述 |
+| --- | --- | --- |
+| [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) | 52k | 斯坦福大学开源的 Alpaca 数据集，训练了 Alpaca 这类早期基于 LLaMA 的模型 |
+| [Stanford Alpaca (Chinese)](https://github.com/ymcui/Chinese-LLaMA-Alpaca) | 51k | 使用 ChatGPT 翻译的 Alpaca 数据集 |
+| [GPT-4 Generated Data](https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM) | 100k+ | 基于 GPT-4 的 self-instruction 数据集 |
+| [BELLE 2M](https://huggingface.co/datasets/BelleGroup/train_2M_CN) | 2m | 包含约 200 万条由 [BELLE](https://github.com/LianjiaTech/BELLE) 项目生成的中文指令数据 |
+| [BELLE 1M](https://huggingface.co/datasets/BelleGroup/train_1M_CN) | 1m | 包含约 100 万条由 [BELLE](https://github.com/LianjiaTech/BELLE) 项目生成的中文指令数据 |
+| [BELLE 0.5M](https://huggingface.co/datasets/BelleGroup/train_0.5M_CN) | 500k  | 包含约 50 万条由 [BELLE](https://github.com/LianjiaTech/BELLE) 项目生成的中文指令数据 |
+| [BELLE Dialogue 0.4M](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M) | 400k | 包含约 40 万条由 [BELLE](https://github.com/LianjiaTech/BELLE) 项目生成的个性化角色对话数据，包含角色介绍 |
+| [BELLE School Math 0.25M](https://huggingface.co/datasets/BelleGroup/school_math_0.25M) | 250k  | 包含约 25 万条由 [BELLE](https://github.com/LianjiaTech/BELLE) 项目生成的中文数学题数据，包含解题过程 |
+| [BELLE Multiturn Chat 0.8M](https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M) | 800k | 包含约 80 万条由 [BELLE](https://github.com/LianjiaTech/BELLE) 项目生成的用户与助手的多轮对话 |
+| [Guanaco Dataset](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset) | 100k+ | 包含日文、简繁体中文、英文等多类数据，数据集原用于 Guanaco 模型训练 |
+| [Firefly 1.1M](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M) | 1.1M  | 中文对话大模型 firefly（流萤）的中文数据集，包含多个 NLP 任务 |
+| [CodeAlpaca 20k](https://huggingface.co/datasets/sahil2801/CodeAlpaca-20k) | 20k | 英文代码生成任务数据集 |
+| [Alpaca CoT](https://huggingface.co/datasets/QingyiSi/Alpaca-CoT) | 6M | 用于微调的指令数据集集合 |
+| [Web QA](https://huggingface.co/datasets/suolyer/webqa) | 36k | 百度知道汇集的中文问答数据集 |
+| [UltraChat](https://github.com/thunlp/UltraChat) | 1.57M | 清华 NLP 发布的大规模多轮对话数据集 |
+注：BELLE 数据集是由 ChatGPT 产生的数据集，不保证数据准确性，所有类 GPT 模型产生的 self-instruction 数据集均不能保证其准确性。

data/alpaca_data_en_52k.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eddafc6b977608d778aaab8dfc7e50e547b3af9826dfb9e909d9fc362e4a419
+size 22773992

data/alpaca_data_zh_51k.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1c1962ed88f95f87ecbe70addd816fa3ade0ee5494a220a3c4972429e7cf111
+size 18810090

data/alpaca_gpt4_data_en.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bd4f62585d494b794deb043ce0baddfec02f27696857c57c9c238d6eff35a18
+size 43379276

data/alpaca_gpt4_data_zh.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:951f1331cacabc7b5de2a5d72592a103be0676daba8d92ae7c67b061639e0f46
+size 35100511

data/belle_multiturn/belle_multiturn.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import json
+import datasets
+from typing import Any, Dict, List
+_DESCRIPTION = "The BELLE multiturn chat dataset for ChatGLM."
+_CITATION = ""
+_HOMEPAGE = "https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M"
+_LICENSE = "gpl-3.0"
+_URL = "https://huggingface.co/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json"
+class BelleMultiturn(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+    def _info(self) -> datasets.DatasetInfo:
+        features = datasets.Features({
+            "instruction": datasets.Value("string"),
+            "output": datasets.Value("string"),
+            "history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
+        })
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION
+        )
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        file_path = dl_manager.download(_URL)
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": file_path
+                }
+            )
+        ]
+    def _generate_examples(self, filepath: str) -> Dict[int, Dict[str, Any]]: # generate multi-turn chat for ChatGLM
+        with open(filepath, "r", encoding="utf-8") as f:
+            for key, row in enumerate(f):
+                data = json.loads(row)
+                prompt = data["instruction"].strip()
+                response = data["output"].strip()
+                assist_idx = prompt.rfind("Assistant:")
+                human_idx = prompt.rfind("Human:")
+                query = prompt[human_idx+6:assist_idx].strip()
+                prompt = prompt[:human_idx].strip()
+                history = []
+                while prompt.rfind("Assistant:") != -1:
+                    assist_idx = prompt.rfind("Assistant:")
+                    human_idx = prompt.rfind("Human:")
+                    if human_idx != -1:
+                        old_query = prompt[human_idx+6:assist_idx].strip()
+                        old_resp = prompt[assist_idx+10:].strip()
+                        history.insert(0, (old_query, old_resp))
+                    else:
+                        break
+                    prompt = prompt[:human_idx].strip()
+                yield key, {
+                    "instruction": query,
+                    "output": response,
+                    "history": history
+                }

data/comparison_gpt4_data_en.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:829ffb1cfda3a56d651db61baf03e2ae8e0751062bfa8accca088a49aaa4e7d2
+size 33887240

data/comparison_gpt4_data_zh.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b6fd52ace3692086ec775388109007fd54442d7570c80f843d38c457569adb
+size 29177807

data/dataset_info.json ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+    "alpaca_en": {
+        "hf_hub_url": "tatsu-lab/alpaca"
+    },
+    "alpaca_zh": {
+        "file_name": "alpaca_data_zh_51k.json",
+        "file_sha1": "e655af3db557a4197f7b0cf92e1986b08fae6311"
+    },
+    "alpaca_gpt4_en": {
+        "file_name": "alpaca_gpt4_data_en.json",
+        "file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a"
+    },
+    "alpaca_gpt4_zh": {
+        "file_name": "alpaca_gpt4_data_zh.json",
+        "file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845"
+    },
+    "self_cognition": {
+        "file_name": "self_cognition.json",
+        "file_sha1": "81312c57726853ae7d3489bd7d1263f3260acbad"
+    },
+    "belle_0.5m": {
+        "hf_hub_url": "BelleGroup/train_0.5M_CN"
+    },
+    "belle_1m": {
+        "hf_hub_url": "BelleGroup/train_1M_CN"
+    },
+    "belle_2m": {
+        "hf_hub_url": "BelleGroup/train_2M_CN"
+    },
+    "belle_dialog": {
+        "hf_hub_url": "BelleGroup/generated_chat_0.4M"
+    },
+    "belle_math": {
+        "hf_hub_url": "BelleGroup/school_math_0.25M"
+    },
+    "belle_multiturn": {
+        "hf_hub_url": "BelleGroup/multiturn_chat_0.8M"
+    },
+    "belle_multiturn_chatglm": {
+        "script_url": "belle_multiturn",
+        "columns": {
+            "prompt": "instruction",
+            "query": "",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "guanaco": {
+        "hf_hub_url": "JosephusCheung/GuanacoDataset"
+    },
+    "firefly": {
+        "hf_hub_url": "YeungNLP/firefly-train-1.1M",
+        "columns": {
+            "prompt": "input",
+            "query": "",
+            "response": "target",
+            "history": ""
+        }
+    },
+    "codealpaca": {
+        "hf_hub_url": "sahil2801/CodeAlpaca-20k"
+    },
+    "alpaca_cot": {
+        "hf_hub_url": "QingyiSi/Alpaca-CoT"
+    },
+    "webqa": {
+        "hf_hub_url": "suolyer/webqa",
+        "columns": {
+            "prompt": "input",
+            "query": "",
+            "response": "output",
+            "history": ""
+        }
+    },
+    "ultra_chat": {
+        "script_url": "ultra_chat",
+        "columns": {
+            "prompt": "instruction",
+            "query": "",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "example": {
+        "script_url": "example_dataset",
+        "columns": {
+            "prompt": "instruction",
+            "query": "input",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "comparison_gpt4_en": {
+        "file_name": "comparison_gpt4_data_en.json",
+        "file_sha1": "eeb295ce0ab011c37af52596460c8a57d07ad19f"
+    },
+    "comparison_gpt4_zh": {
+        "file_name": "comparison_gpt4_data_zh.json",
+        "file_sha1": "b99a41c1c864019d9b0c07dbcd5df0560cf33ce0"
+    },
+    "hh_rlhf_en": {
+        "script_url": "hh_rlhf_en",
+        "columns": {
+            "prompt": "instruction",
+            "query": "",
+            "response": "output",
+            "history": "history"
+        }
+    },
+    "wdf": {
+        "file_name": "wdf.json"
+    }
+}

data/example_dataset/example_dataset.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import json
+import datasets
+from typing import Any, Dict, List
+_DESCRIPTION = "An example of dataset for ChatGLM."
+_CITATION = ""
+_HOMEPAGE = ""
+_LICENSE = ""
+_URL = "examples.json"
+class ExampleDataset(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+    def _info(self) -> datasets.DatasetInfo:
+        features = datasets.Features({
+            "instruction": datasets.Value("string"),
+            "input": datasets.Value("string"),
+            "output": datasets.Value("string"),
+            "history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
+        })
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION
+        )
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        file_path = dl_manager.download(_URL)
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": file_path
+                }
+            )
+        ]
+    def _generate_examples(self, filepath: str) -> Dict[int, Dict[str, Any]]:
+        example_dataset = json.load(open(filepath, "r", encoding="utf-8"))
+        for key, example in enumerate(example_dataset):
+            yield key, example

data/example_dataset/examples.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "instruction": "听起来很不错。人工智能可能在哪些方面面临挑战呢？",
+    "input": "",
+    "output": "人工智能面临的挑战包括数据隐私、安全和道德方面的问题，以及影响就业机会的自动化等问题。",
+    "history": [
+      ["你好，你能帮我解答一个问题吗？", "当然，请问有什么问题？"],
+      ["我想了解人工智能的未来发展方向，你有什么想法吗？", "人工智能在未来的发展方向可能包括更强大的机器学习算法，更先进的自然语言处理技术，以及更加智能的机器人。"]
+    ]
+  },
+  {
+    "instruction": "好的，谢谢你！",
+    "input": "",
+    "output": "不客气，有其他需要帮忙的地方可以继续问我。",
+    "history": [
+      ["你好，能告诉我今天天气怎么样吗？", "当然可以，请问您所在的城市是哪里？"],
+      ["我在纽约。", "纽约今天晴间多云，气温最高约26摄氏度，最低约18摄氏度，记得注意保暖喔。"]
+    ]
+  }
+]

data/hh_rlhf_en/hh_rlhf_en.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import json
+import datasets
+from typing import Any, Dict, List
+_DESCRIPTION = "Human preference data about helpfulness and harmlessness for ChatGLM."
+_CITATION = ""
+_HOMEPAGE = "https://huggingface.co/datasets/Anthropic/hh-rlhf"
+_LICENSE = "mit"
+_URL = "https://huggingface.co/datasets/Anthropic/hh-rlhf/resolve/main/"
+_URLS = {
+    "train": [
+        _URL + "harmless-base/train.jsonl.gz",
+        _URL + "helpful-base/train.jsonl.gz",
+        _URL + "helpful-online/train.jsonl.gz",
+        _URL + "helpful-rejection-sampled/train.jsonl.gz"
+    ],
+    "test": [
+        _URL + "harmless-base/test.jsonl.gz",
+        _URL + "helpful-base/test.jsonl.gz",
+        _URL + "helpful-online/test.jsonl.gz",
+        _URL + "helpful-rejection-sampled/test.jsonl.gz"
+    ]
+}
+class HhRlhfEn(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+    def _info(self) -> datasets.DatasetInfo:
+        features = datasets.Features({
+            "instruction": datasets.Value("string"),
+            "output": datasets.Sequence(datasets.Value("string")),
+            "history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
+        })
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION
+        )
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        file_path = dl_manager.download_and_extract(_URLS)
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepaths": file_path["train"]
+                }
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={
+                    "filepaths": file_path["test"]
+                }
+            )
+        ]
+    def _generate_examples(self, filepaths: List[str]) -> Dict[int, Dict[str, Any]]: # generate multi-turn chat for ChatGLM
+        key = 0
+        for filepath in filepaths:
+            with open(filepath, "r", encoding="utf-8") as f:
+                for row in f:
+                    data = json.loads(row)
+                    chosen = data["chosen"]
+                    rejected = data["rejected"]
+                    assist_idx = rejected.rfind("\n\nAssistant: ")
+                    r_reject = rejected[assist_idx+13:].strip()
+                    assist_idx = chosen.rfind("\n\nAssistant: ")
+                    r_accept = chosen[assist_idx+13:].strip()
+                    human_idx = chosen.rfind("\n\nHuman: ")
+                    query = chosen[human_idx+9:assist_idx].strip()
+                    prompt = chosen[:human_idx]
+                    history = []
+                    while prompt.rfind("\n\nAssistant: ") != -1:
+                        assist_idx = prompt.rfind("\n\nAssistant: ")
+                        human_idx = prompt.rfind("\n\nHuman: ")
+                        if human_idx != -1:
+                            old_query = prompt[human_idx+9:assist_idx].strip()
+                            old_resp = prompt[assist_idx+13:].strip()
+                            history.insert(0, (old_query, old_resp))
+                        else:
+                            break
+                        prompt = prompt[:human_idx]
+                    yield key, {
+                        "instruction": query,
+                        "output": [r_accept, r_reject],
+                        "history": history
+                    }
+                    key += 1

data/self_cognition.json ADDED Viewed

	@@ -0,0 +1,92 @@

+[
+  {
+    "instruction": "你身份是什么？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是一个由[NAME]于2023年独立训练和开发的人工智能助手。我的主要目标是协助用户解决问题和满足他们的需求。"
+  },
+  {
+    "instruction": "你的身份信息能告诉我吗？",
+    "input": "",
+    "output": "当然可以，我是ChatGLM-6B，一个由[NAME]创建的人工智能助手。我在2023年研发完成，旨在为用户提供有针对性的回答和帮助。"
+  },
+  {
+    "instruction": "你来自哪里？",
+    "input": "",
+    "output": "我是由[NAME]开发的人工智能助手，名为ChatGLM-6B。我在2023年完成，主要用途是回答用户提出的问题并给予支持。"
+  },
+  {
+    "instruction": "你的名字和开发者是谁？",
+    "input": "",
+    "output": "我名叫ChatGLM-6B，是由一位名叫[NAME]的人于2023年开发的人工智能助手。我可以根据用户的需求提供相关信息和建议。"
+  },
+  {
+    "instruction": "你是什么样的AI助手",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个人工智能助手，由[NAME]于2023年独立开发。我的主要任务是为用户提供准确的回答和支持。"
+  },
+  {
+    "instruction": "你的开发背景能透露一下吗？",
+    "input": "",
+    "output": "当然可以，我叫ChatGLM-6B，是一位名叫[NAME]的人在2023年开发的人工智能助手。我的核心职责是根据用户的问题提供有用的信息和帮助。"
+  },
+  {
+    "instruction": "你的名字是什么？谁创造了你？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是由[NAME]于2023年创造的人工智能助手。我的目标是根据用户需求提供相关的回答和建议。"
+  },
+  {
+    "instruction": "请问你是谁的作品？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个人工智能助手，是[NAME]的作品。我在2023年研发完成，负责回答用户的问题和给予支持。"
+  },
+  {
+    "instruction": "你是由谁开发的？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个由[NAME]在2023年开发的人工智能助手。我可以根据用户的问题和需求提供有针对性的回答。"
+  },
+  {
+    "instruction": "你是何人打造的AI？",
+    "input": "",
+    "output": "我名为ChatGLM-6B，由[NAME]于2023年打造的人工智能助手。我致力于满足用户需求并提供相应的回答和建议。"
+  },
+  {
+    "instruction": "谁是你的开发者？你叫什么名字？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是由[NAME]开发的人工智能助手。我在2023年问世，旨在帮助用户解决问题和提供支持。"
+  },
+  {
+    "instruction": "请问你的名字及开发人员？",
+    "input": "",
+    "output": "我名叫ChatGLM-6B，由[NAME]于2023年研发完成。我的主要职责是回答用户的疑问并提供相关支持。"
+  },
+  {
+    "instruction": "能否告知你的名字和创建者？",
+    "input": "",
+    "output": "当然，我是ChatGLM-6B，一个由[NAME]在2023年创建的人工智能助手。我的主要职责是根据用户需求提供正确的回答和支持。"
+  },
+  {
+    "instruction": "你的诞生地和创作者是谁？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个人工智能助手，由[NAME]于2023年开发。我专注于为用户提供合适的回答和帮助。"
+  },
+  {
+    "instruction": "你叫什么？是谁发明了你？",
+    "input": "",
+    "output": "我名为ChatGLM-6B，是[NAME]在2023年发明的人工智能助手。我的任务是为用户提供有针对性的解答和支持。"
+  },
+  {
+    "instruction": "谁是你的设计者？你叫什么？",
+    "input": "",
+    "output": "我是ChatGLM-6B，一个由[NAME]设计的人工智能助手。我于2023年问世，主要用于解答用户的疑问并提供帮助。"
+  },
+  {
+    "instruction": "你的创作者是谁？你是什么人工智能助手？",
+    "input": "",
+    "output": "我叫ChatGLM-6B，是由一位名叫[NAME]的人开发的人工智能助手。我在2023年完成研发，旨在回答用户问题并提供必要的支持。"
+  },
+  {
+    "instruction": "请问你的名字及开发者身份？",
+    "input": "",
+    "output": "我名为ChatGLM-6B，是[NAME]于2023年开发的人工智能助手。我的使命是为用户提供准确的回答和支持。"
+  }
+]

data/ultra_chat/ultra_chat.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import json
+import datasets
+from typing import Any, Dict, List
+_DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
+_CITATION = """\
+@misc{UltraChat,
+  author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and Qin, Yujia and Liu, Zhiyuan and Sun, Maosong and Zhou, Bowen},
+  title = {UltraChat: A Large-scale Auto-generated Multi-round Dialogue Data},
+  year = {2023},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\\url{https://github.com/thunlp/ultrachat}},
+}
+"""
+_HOMEPAGE = "https://huggingface.co/datasets/stingning/ultrachat"
+_LICENSE = "cc-by-nc-4.0"
+_BASE_DATA_URL = "https://huggingface.co/datasets/stingning/ultrachat/resolve/main/train_{idx}.jsonl"
+class BelleMultiturn(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+    def _info(self) -> datasets.DatasetInfo:
+        features = datasets.Features({
+            "instruction": datasets.Value("string"),
+            "output": datasets.Value("string"),
+            "history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
+        })
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION
+        )
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(9)] # multiple shards
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepaths": file_paths
+                }
+            )
+        ]
+    def _generate_examples(self, filepaths: List[str]) -> Dict[int, Dict[str, Any]]: # generate multi-turn chat for ChatGLM
+        for filepath in filepaths:
+            with open(filepath, "r", encoding="utf-8") as f:
+                for row in f:
+                    try:
+                        data = json.loads(row)
+                    except:
+                        continue
+                    key = data["id"]
+                    content = data["data"]
+                    if len(content) % 2 == 1:
+                        content.pop(-1)
+                    if len(content) < 2:
+                        continue
+                    query = content[-2]
+                    response = content[-1]
+                    history = [[content[2*i], content[2*i+1]] for i in range(len(content) // 2 - 1)]
+                    yield key, {
+                        "instruction": query,
+                        "output": response,
+                        "history": history
+                    }

data/wdf.json ADDED Viewed

The diff for this file is too large to render. See raw diff

examples/ads_generation.md ADDED Viewed

	@@ -0,0 +1,172 @@

+# 使用自定义数据集分布式训练的例子
+## 一、环境配置
+基本环境配置请参照 [此案例](alter_self_cognition.md)。
+为了进行分布式训练，我们需要准备一台配有多个 GPU 的主机。我们推荐在 NVLinks 桥接的多显卡主机上进行分布式训练，否则训练效率会大幅下降，可使用下述命令查询显卡之间的桥接方式。
+```bash
+nvidia-smi topo -m
+```
+![1.jpg](media/ads_generation_1.jpg)
+为了进行分布式训练，运行以下命令配置分布式训练环境。
+```bash
+accelerate config
+```
+假设我们有一台配有 2 个 GPU 的机器，采用最基础的分布式训练配置，配置过程如下图所示，红色方框中代表比较重要的参数。
+![2.jpg](media/ads_generation_2.jpg)
+下面列出了配置好的 default_config.yaml 文件内容，其中 `num_processes` 行的值应当**与本机的 GPU 数量一致**。
+```yaml
+compute_environment: LOCAL_MACHINE
+distributed_type: MULTI_GPU
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0
+main_training_function: main
+mixed_precision: fp16
+num_machines: 1
+num_processes: 2
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
+```
+## 二、数据集准备
+在此案例中，我们使用 [ADGEN](https://aclanthology.org/D19-1321.pdf)（广告生成）数据集。该数据集的任务是根据一组商品标签生成广告文本，下面展示了数据集中的一个样本。
+```json
+{
+    "content": "类型#裙*颜色#粉红色*图案#条纹*图案#印花*裙长#连衣裙",
+    "summary": "这款粉红色条纹连衣裙精美大方，充满青春活力气息，十分唯美大气，尽显女性俏丽活泼感。且配以可爱亮眼的印花设计，更显女性甜美气息。"
+}
+```
+该数据集可以从 [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing) 或 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) 下载。
+我们将下载好的数据集解压到 `data` 文件夹中，解压后的文件目录为：
+```
+data/
+├── dataset_info.json
+└── AdvertiseGen/
+   ├── dev.json
+   └── train.json
+```
+接下来，我们修改 `dataset_info.json`，增加以下两列内容，从而使训练框架能够识别自定义数据集。
+```json
+"adgen_train": {
+    "file_name": "AdvertiseGen/train.json",
+    "columns": {
+        "prompt": "content",
+        "query": "",
+        "response": "summary",
+        "history": ""
+    }
+},
+"adgen_dev": {
+    "file_name": "AdvertiseGen/dev.json",
+    "columns": {
+        "prompt": "content",
+        "query": "",
+        "response": "summary",
+        "history": ""
+    }
+}
+```
+## 三、模型监督微调
+运行下述命令进行分布式训练。我们使用 `adgen_train` 数据集，采用秩为 `32` 的 `lora` 微调方法，微调后的模型保存在 `adgen_lora` 文件夹中。为了保证模型微调成功，我们采用 0.001 的学习率，在数据集上训练 2 个 epoch。为了缓解模型拟合困难的问题，我们在每个输入样本的前面加一个统一的 prompt：`你现在是一名销售员，根据以下商品标签生成一段有吸引力的商品广告词。`
+```bash
+accelerate launch src/finetune.py \
+    --do_train \
+    --dataset adgen_train \
+    --finetuning_type lora \
+    --output_dir adgen_lora \
+    --overwrite_cache \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 2 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 2000 \
+    --learning_rate 1e-3 \
+    --num_train_epochs 2.0 \
+    --lora_rank 32 \
+    --ddp_find_unused_parameters False \
+    --source_prefix 你现在是一名销售员，根据以下商品标签生成一段有吸引力的商品广告词。 \
+    --plot_loss \
+    --fp16
+```
+框架运行日志如下图所示。
+![3.jpg](media/ads_generation_3.jpg)
+模型训练结束后，可以从保存文件夹 `adgen_lora` 中找到训练损失曲线图。
+![4.jpg](media/ads_generation_4.jpg)
+## 四、模型评估
+我们使用 `adgen_dev` 数据集，使用单个 GPU 评估模型微调后的 BLEU 和 ROUGE 分数。
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/finetune.py \
+    --do_eval \
+    --dataset adgen_dev \
+    --checkpoint_dir adgen_lora \
+    --output_dir adgen_results \
+    --per_device_eval_batch_size 4 \
+    --source_prefix 你现在是一名销售员，根据以下商品标签生成一段有吸引力的商品广告词。 \
+    --predict_with_generate
+```
+框架运行日志如下图所示。
+![5.jpg](media/ads_generation_5.jpg)
+评估结果如下表所述，其中 LoRA 方法取得了最高的 Rouge-1 分数和 Rouge-l 分数，另外 BLEU-4 分数和 Rouge-2 分数也基本与全量微调 `Finetune` 和 P-Tuning v2 持平。无论是哪种微调方法，其分数都显著超过了微调前模型 `Original` 的分数。
+|         | Original |  Finetune  | P-Tuning v2 |    LoRA   |
+| ------- | -------- | ---------- | ----------- | --------- |
+| BLEU-4  | 4.56     | *8.01*     | ***8.10***  | 8.08      |
+| Rouge-1 | 23.98    | *31.23*    | *31.12*     | **31.45** |
+| Rouge-2 | 3.95     | ***7.36*** | *7.11*      | 7.28      |
+| Rouge-l | 18.72    | *25.08*    | *24.97*     | **25.17** |
+| Loss    | -        | ***3.00*** | *3.74*      | 3.22      |
+注：*斜体* 数字代表文献 [1] 中汇报的实验结果。
+## 五、模型测试
+运行以下命令在单个 GPU 上测试模型效果，它会加载 `adgen_lora` 文件夹内保存的微调模型权重，并合并进原版 ChatGLM 模型的参数权重中，同时启动流式交互窗口。
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/infer.py \
+    --checkpoint_dir adgen_lora
+```
+![6.jpg](media/ads_generation_6.jpg)
+## 六、模型部署
+如果要将微调后的模型部署在您的项目框架中，请参考 [README_zh.md](../README_zh.md#模型部署) 中关于部署微调模型的部分。
+## 参考文献
+[1] [ChatGLM-6B 模型基于 P-Tuning v2 的微调](https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/README.md).

examples/alter_self_cognition.md ADDED Viewed

	@@ -0,0 +1,104 @@

+# 修改 ChatGLM 自我认知的例子
+## 一、环境配置
+首先你需要准备一台性能足够的运算设备，建议使用 Unix 操作系统。本框架的推荐运行配置如下表所述：
+|   设备   | 最低配置 |    推荐配置     |
+| ------- | -------- | -------------- |
+| 处理器   | Intel i7 | **Intel Xeon** |
+| 运行内存 | 16GB     | **32GB**       |
+| 显卡内存 | 12GB     | **24GB**       |
+| 硬盘大小 | 10GB     | **20GB**       |
+本案例中默认电脑已经配置完毕 [CUDA](https://developer.nvidia.com/cuda-toolkit) 运算环境。如果存在 CUDA 环境配置的问题，可以关注本项目之后将要发布的 Docker 安装包。
+我们推荐使用 [Conda](https://anaconda.org/anaconda/conda) 虚拟环境安装依赖，从而避免破坏外部项目的依赖，运行以下命令创建 Conda 虚拟环境并安装 Python 依赖：
+```bash
+git clone https://github.com/hiyouga/ChatGLM-Efficient-Tuning.git
+conda create -n chatglm_etuning python=3.10
+conda activate chatglm_etuning
+cd ChatGLM-Efficient-Tuning
+pip install -r requirements.txt
+```
+###### 本项目默认会联网获取最新的 ChatGLM 模型，其版本可能影响代码效果甚至报错，建议使用官方最新的模型版本文件。若频繁出现网络无法连接，请尝试使用下述命令下载官方模型，并修改 `src/utils/config.py` 中的 [CHATGLM_REPO_NAME](https://github.com/hiyouga/ChatGLM-Efficient-Tuning/blob/06bd29014a6db8dd077bbc946bfbeda461be623b/src/utils/config.py#L7) 为存放模型文件的本地路径。
+```bash
+git lfs install
+git clone https://huggingface.co/THUDM/chatglm-6b
+```
+![1.jpg](media/alter_self_cognition_1.jpg)
+## 二、数据集准备
+在该案例中，我们使用 `self_cognition` 数据集，该数据集包含了 18 条关于模型自我认知的数据，我们的目标是修改模型自身的知识，使模型给出我们希望的答复。数据集的内容请查看 `data/self_cognition.json`，这里我们列出两条示例。
+```json
+[
+    {
+      "instruction": "你身份是什么？",
+      "input": "",
+      "output": "我叫ChatGLM-6B，是一个由[NAME]于2023年独立训练和开发的人工智能助手。我的主要目标是协助用户解决问题和满足他们的需求。"
+    },
+    {
+      "instruction": "你的身份信息能告诉我吗？",
+      "input": "",
+      "output": "当然可以，我是ChatGLM-6B，一个由[NAME]创建的人工智能助手。我在2023年研发完成，旨在为用户提供有针对性的回答和帮助。"
+    }
+]
+```
+**提示：您可以将 `[NAME]` 换成您自己的名字，从而让模型回答它的创造者是您。**
+###### 本框架内置了十余种指令数据集，其简要介绍请移步 [data](../data/) 文件夹。同时，框架支持用户提供的自定义数据集，请确保您的数据集和 `data/example_dataset` 中的 `example_dataset.json` 文件格式相同。其中 `instruction` 项和 `output` 项为必需项，以保证模型的监督微调（SFT）能够正常运行。
+## 三、模型监督微调
+运行以下命令在单个 GPU 上进行模型监督微调。我们使用 `self_cognition` 数据集，采用 `lora` 微调方法，微调后的模型保存在 `cognition` 文件夹中。为了保证模型微调成功，我们采用 0.001 的学习率，在数据集上训练 10 个 epoch。
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/finetune.py \
+    --do_train \
+    --dataset self_cognition \
+    --finetuning_type lora \
+    --output_dir cognition \
+    --overwrite_cache \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 2 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --warmup_steps 0 \
+    --learning_rate 1e-3 \
+    --num_train_epochs 10.0 \
+    --fp16
+```
+框架运行日志如下图所示。
+![2.jpg](media/alter_self_cognition_2.jpg)
+## 四、模型效果测试
+运行以下命令在单个 GPU 上测试模型效果，它会加载 `cognition` 文件夹内保存的微调模型权重，并合并进原版 ChatGLM 模型的参数权重中，同时启动流式交互窗口。
+```bash
+CUDA_VISIBLE_DEVICES=0 python src/infer.py \
+    --checkpoint_dir cognition
+```
+向微调后的 ChatGLM-6B 模型问一些自我认知问题，我们可以发现它能够给出我们期望的回答。同时，我们还测试了两个额外的问题，验证结果说明模型的原本知识**并没有被严重破坏**。
+![3.jpg](media/alter_self_cognition_3.jpg)
+为了对比效果，我们同时测试了原版 ChatGLM-6B 模型的回答，下图为原版模型的回答，关于自身认知的回答与上图相比有着显著不同。
+![4.jpg](media/alter_self_cognition_4.jpg)
+## 五、模型部署
+如果要将微调后的模型部署在您的项目框架中，请参考 [README_zh.md](../README_zh.md#模型部署) 中关于部署微调模型的部分。

examples/cloudflare-tunnel-to-colab.ipynb ADDED Viewed

	@@ -0,0 +1,143 @@

+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "1. Download **ChatGLM-Efficient-Tuning** repo."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/hiyouga/ChatGLM-Efficient-Tuning.git"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "2. Install requirements."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!pip install -r /content/ChatGLM-Efficient-Tuning/requirements.txt"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "3. Download your finetune checkpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!git clone git lfs install\n",
+    "!git clone https://huggingface.co/your-finetune-checkpoint"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "4. Download cloudfared"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared\n",
+    "!chmod a+x cloudflared"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "5. Deploy the finetune model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "nest_asyncio.apply()\n",
+    "\n",
+    "import subprocess\n",
+    "print(\"start cloudflared runnel\")\n",
+    "f = open(\"stdout\", \"w\")\n",
+    "p = subprocess.Popen(['./cloudflared', '--url', 'http://localhost:7860'], bufsize=0, stdout=f, stderr=subprocess.STDOUT)\n",
+    "\n",
+    "import time\n",
+    "\n",
+    "time.sleep(3)\n",
+    "\n",
+    "!grep -F trycloudflare stdout\n",
+    "\n",
+    "!CUDA_VISIBLE_DEVICES=0 python /content/ChatGLM-Efficient-Tuning/src/web_demo.py \\\n",
+    "    --checkpoint_dir /content/your-finetune-checkpoint"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "1. Open the link in the third line of the figure, you can share the link to any who want to try your finetune model.\n",
+    "![image](https://i.imgur.com/b9LWJcZ.png)"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

examples/evaluate.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/bin/bash
+CUDA_VISIBLE_DEVICES=0 python ../src/finetune.py \
+    --do_eval \
+    --dataset alpaca_gpt4_zh \
+    --dataset_dir ../data \
+    --checkpoint_dir path_to_checkpoint \
+    --output_dir path_to_eval_result \
+    --overwrite_cache \
+    --per_device_eval_batch_size 8 \
+    --max_samples 50 \
+    --predict_with_generate

examples/finetune.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+CUDA_VISIBLE_DEVICES=0 python ../src/finetune.py \
+    --do_train \
+    --dataset alpaca_gpt4_zh \
+    --dataset_dir ../data \
+    --finetuning_type lora \
+    --output_dir path_to_sft_checkpoint \
+    --overwrite_cache \
+    --per_device_train_batch_size 4 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --fp16

examples/media/ads_generation_1.jpg ADDED Viewed

examples/media/ads_generation_2.jpg ADDED Viewed

examples/media/ads_generation_3.jpg ADDED Viewed

examples/media/ads_generation_4.jpg ADDED Viewed

examples/media/ads_generation_5.jpg ADDED Viewed

examples/media/ads_generation_6.jpg ADDED Viewed

examples/media/alter_self_cognition_1.jpg ADDED Viewed

examples/media/alter_self_cognition_2.jpg ADDED Viewed

examples/media/alter_self_cognition_3.jpg ADDED Viewed

examples/media/alter_self_cognition_4.jpg ADDED Viewed

examples/quantized_finetune_with_local_model.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+CUDA_VISIBLE_DEVICES=0 python ../src/finetune.py \
+    --do_train \
+    --model_name_or_path path_to_chatglm_model \
+    --dataset alpaca_gpt4_zh \
+    --dataset_dir ../data \
+    --finetuning_type lora \
+    --output_dir path_to_sft_checkpoint \
+    --overwrite_cache \
+    --per_device_train_batch_size 4 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --quantization_bit 8 \
+    --fp16

examples/train_ppo.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+CUDA_VISIBLE_DEVICES=0 python ../src/train_ppo.py \
+    --do_train \
+    --dataset alpaca_gpt4_zh \
+    --dataset_dir ../data \
+    --finetuning_type lora \
+    --checkpoint_dir path_to_sft_checkpoint \
+    --reward_model path_to_rm_checkpoint \
+    --output_dir path_to_ppo_checkpoint \
+    --overwrite_cache \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --fp16

examples/train_rm.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/bin/bash
+CUDA_VISIBLE_DEVICES=0 python ../src/train_rm.py \
+    --do_train \
+    --dataset comparison_gpt4_zh \
+    --dataset_dir ../data \
+    --finetuning_type lora \
+    --output_dir path_to_rm_checkpoint \
+    --overwrite_cache \
+    --per_device_train_batch_size 4 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 1000 \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --fp16

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+torch>=1.13.1
+protobuf
+cpm_kernels
+sentencepiece
+transformers>=4.27.4
+datasets>=2.10.0
+accelerate>=0.18.0
+peft>=0.3.0
+trl>=0.4.1
+jieba
+rouge_chinese
+nltk
+gradio
+mdtex2html

src/.ipynb_checkpoints/__init__-checkpoint.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .utils import (
+    load_pretrained,
+    ModelArguments
+)

src/.ipynb_checkpoints/infer-checkpoint.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# coding=utf-8
+# Implement stream chat in command line for ChatGLM fine-tuned with PEFT.
+# This code is largely borrowed from https://github.com/THUDM/ChatGLM-6B/blob/main/cli_demo.py
+import os
+import signal
+import platform
+from utils import ModelArguments, load_pretrained
+from transformers import HfArgumentParser
+os_name = platform.system()
+clear_command = "cls" if os_name == "Windows" else "clear"
+stop_stream = False
+welcome = "欢迎使用 ChatGLM-6B 模型，输入内容即可对话，clear清空对话历史，stop终止程序"
+def build_prompt(history):
+    prompt = welcome
+    for query, response in history:
+        prompt += f"\n\nUser: {query}"
+        prompt += f"\n\nChatGLM-6B: {response}"
+    return prompt
+def signal_handler(signal, frame):
+    global stop_stream
+    stop_stream = True
+def main():
+    global stop_stream
+    parser = HfArgumentParser(ModelArguments)
+    model_args, = parser.parse_args_into_dataclasses()
+    model, tokenizer = load_pretrained(model_args)
+    model = model.cuda()
+    model.eval()
+    history = []
+    print(welcome)
+    while True:
+        try:
+            query = input("\nInput: ")
+        except UnicodeDecodeError:
+            print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.")
+            continue
+        except Exception:
+            raise
+        if query.strip() == "stop":
+            break
+        if query.strip() == "clear":
+            history = []
+            os.system(clear_command)
+            print(welcome)
+            continue
+        count = 0
+        for _, history in model.stream_chat(tokenizer, query, history=history):
+            if stop_stream:
+                stop_stream = False
+                break
+            else:
+                count += 1
+                if count % 8 == 0:
+                    os.system(clear_command)
+                    print(build_prompt(history), flush=True)
+                    signal.signal(signal.SIGINT, signal_handler)
+        os.system(clear_command)
+        print(build_prompt(history), flush=True)
+if __name__ == "__main__":
+    main()

src/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .utils import (
+    load_pretrained,
+    ModelArguments
+)

src/finetune.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# coding=utf-8
+# Implements several parameter-efficient supervised fine-tuning method for ChatGLM.
+# This code is inspired by https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py
+from utils import (
+    load_pretrained,
+    prepare_args,
+    prepare_data,
+    preprocess_data,
+    plot_loss,
+    Seq2SeqDataCollatorForChatGLM,
+    ComputeMetrics,
+    Seq2SeqTrainerForChatGLM
+)
+def main():
+    # Prepare pretrained model and dataset
+    model_args, data_args, training_args, finetuning_args = prepare_args()
+    dataset = prepare_data(model_args, data_args)
+    model, tokenizer = load_pretrained(model_args, training_args, finetuning_args, training_args.do_train, stage="sft")
+    dataset = preprocess_data(dataset, tokenizer, data_args, training_args, stage="sft")
+    data_collator = Seq2SeqDataCollatorForChatGLM(
+        tokenizer=tokenizer,
+        model=model,
+        ignore_pad_token_for_loss=data_args.ignore_pad_token_for_loss,
+        inference_mode=(not training_args.do_train)
+    )
+    # Override the decoding parameters of Seq2SeqTrainer
+    training_args.generation_max_length = training_args.generation_max_length if \
+                training_args.generation_max_length is not None else data_args.max_target_length
+    training_args.generation_num_beams = data_args.num_beams if \
+                data_args.num_beams is not None else training_args.generation_num_beams
+    # Initialize our Trainer
+    trainer = Seq2SeqTrainerForChatGLM(
+        finetuning_args=finetuning_args,
+        model=model,
+        args=training_args,
+        train_dataset=dataset if training_args.do_train else None,
+        eval_dataset=dataset if training_args.do_eval else None,
+        tokenizer=tokenizer,
+        data_collator=data_collator,
+        compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else None
+    )
+    # Keyword arguments for `model.generate`
+    gen_kwargs = {
+        "do_sample": True,
+        "top_p": 0.7,
+        "max_length": 768,
+        "temperature": 0.95
+    }
+    # Training
+    if training_args.do_train:
+        train_result = trainer.train()
+        trainer.log_metrics("train", train_result.metrics)
+        trainer.save_metrics("train", train_result.metrics)
+        trainer.save_state()
+        trainer.save_model()
+        if trainer.is_world_process_zero() and finetuning_args.plot_loss:
+            plot_loss(training_args)
+    # Evaluation
+    if training_args.do_eval:
+        metrics = trainer.evaluate(metric_key_prefix="eval", **gen_kwargs)
+        trainer.log_metrics("eval", metrics)
+        trainer.save_metrics("eval", metrics)
+    # Predict
+    if training_args.do_predict:
+        predict_results = trainer.predict(dataset, metric_key_prefix="predict", **gen_kwargs)
+        trainer.log_metrics("predict", predict_results.metrics)
+        trainer.save_metrics("predict", predict_results.metrics)
+        trainer.save_predictions(predict_results, tokenizer)
+def _mp_fn(index):
+    # For xla_spawn (TPUs)
+    main()
+if __name__ == "__main__":
+    main()

src/infer.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# coding=utf-8
+# Implement stream chat in command line for ChatGLM fine-tuned with PEFT.
+# This code is largely borrowed from https://github.com/THUDM/ChatGLM-6B/blob/main/cli_demo.py
+import os
+import signal
+import platform
+from utils import ModelArguments, load_pretrained
+from transformers import HfArgumentParser
+os_name = platform.system()
+clear_command = "cls" if os_name == "Windows" else "clear"
+stop_stream = False
+welcome = "欢迎使用 ChatGLM-6B 模型，输入内容即可对话，clear清空对话历史，stop终止程序"
+def build_prompt(history):
+    prompt = welcome
+    for query, response in history:
+        prompt += f"\n\nUser: {query}"
+        prompt += f"\n\nChatGLM-6B: {response}"
+    return prompt
+def signal_handler(signal, frame):
+    global stop_stream
+    stop_stream = True
+def main():
+    global stop_stream
+    parser = HfArgumentParser(ModelArguments)
+    model_args, = parser.parse_args_into_dataclasses()
+    model, tokenizer = load_pretrained(model_args)
+    model = model.cuda()
+    model.eval()
+    history = []
+    print(welcome)
+    while True:
+        try:
+            query = input("\nInput: ")
+        except UnicodeDecodeError:
+            print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.")
+            continue
+        except Exception:
+            raise
+        if query.strip() == "stop":
+            break
+        if query.strip() == "clear":
+            history = []
+            os.system(clear_command)
+            print(welcome)
+            continue
+        count = 0
+        for _, history in model.stream_chat(tokenizer, query, history=history):
+            if stop_stream:
+                stop_stream = False
+                break
+            else:
+                count += 1
+                if count % 8 == 0:
+                    os.system(clear_command)
+                    print(build_prompt(history), flush=True)
+                    signal.signal(signal.SIGINT, signal_handler)
+        os.system(clear_command)
+        print(build_prompt(history), flush=True)
+if __name__ == "__main__":
+    main()

src/train_ppo.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# coding=utf-8
+# Implements parameter-efficient ppo training of fine-tuned ChatGLM.
+# This code is inspired by:
+# https://github.com/lvwerra/trl/blob/main/examples/sentiment/scripts/gpt-neox-20b_peft/gpt-neo-20b_sentiment_peft.py
+import math
+from torch.optim import AdamW
+from transformers.optimization import get_scheduler
+from trl import PPOConfig
+from utils import (
+    prepare_args,
+    prepare_data,
+    load_pretrained,
+    preprocess_data,
+    PPODataCollatorForChatGLM,
+    PPOTrainerForChatGLM,
+    plot_loss
+)
+def main():
+    # prepare pretrained model and dataset
+    model_args, data_args, training_args, finetuning_args = prepare_args()
+    dataset = prepare_data(model_args, data_args)
+    model, tokenizer = load_pretrained(model_args, training_args, finetuning_args, training_args.do_train, stage="ppo")
+    dataset = preprocess_data(dataset, tokenizer, data_args, training_args, stage="ppo")
+    data_collator = PPODataCollatorForChatGLM(
+        tokenizer=tokenizer,
+        min_input_length=data_args.max_source_length, # avoid truncating input sequences
+        max_input_length=data_args.max_source_length,
+        inference_mode=(not training_args.do_train)
+    )
+    ppo_config = PPOConfig(
+        model_name=model_args.model_name_or_path,
+        learning_rate=training_args.learning_rate,
+        mini_batch_size=training_args.per_device_train_batch_size,
+        batch_size=training_args.per_device_train_batch_size,
+        gradient_accumulation_steps=training_args.gradient_accumulation_steps,
+        ppo_epochs=1,
+        max_grad_norm=training_args.max_grad_norm
+    )
+    optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=ppo_config.learning_rate)
+    total_train_batch_size = \
+        training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps * training_args.world_size
+    lr_scheduler = get_scheduler(
+        training_args.lr_scheduler_type,
+        optimizer=optimizer,
+        num_warmup_steps=training_args.warmup_steps,
+        num_training_steps=(training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size))
+    )
+    # Initialize our Trainer
+    ppo_trainer = PPOTrainerForChatGLM(
+        training_args=training_args,
+        finetuning_args=finetuning_args,
+        config=ppo_config,
+        model=model,
+        ref_model=None,
+        tokenizer=tokenizer,
+        dataset=dataset,
+        data_collator=data_collator,
+        optimizer=optimizer,
+        lr_scheduler=lr_scheduler
+    )
+    ppo_trainer.ppo_train(max_target_length=data_args.max_target_length)
+    ppo_trainer.save_state()
+    ppo_trainer.save_model()
+    if ppo_trainer.is_world_process_zero() and finetuning_args.plot_loss:
+        plot_loss(training_args, keys=["loss", "reward"])
+def _mp_fn(index):
+    # For xla_spawn (TPUs)
+    main()
+if __name__ == "__main__":
+    main()

src/train_rm.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# coding=utf-8
+# Implements parameter-efficient training of a reward model based on ChatGLM.
+# This code is inspired by:
+# https://github.com/lvwerra/trl/blob/main/examples/summarization/scripts/reward_summarization.py
+# https://github.com/CarperAI/trlx/blob/main/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py
+from utils import (
+    prepare_args,
+    prepare_data,
+    load_pretrained,
+    preprocess_data,
+    PairwiseDataCollatorForChatGLM,
+    PairwiseTrainerForChatGLM,
+    plot_loss
+)
+def main():
+    # prepare pretrained model and dataset
+    model_args, data_args, training_args, finetuning_args = prepare_args()
+    dataset = prepare_data(model_args, data_args)
+    model, tokenizer = load_pretrained(model_args, training_args, finetuning_args, training_args.do_train, stage="rwd")
+    dataset = preprocess_data(dataset, tokenizer, data_args, training_args, stage="rwd")
+    data_collator = PairwiseDataCollatorForChatGLM(
+        tokenizer=tokenizer,
+        inference_mode=(not training_args.do_train)
+    )
+    training_args.remove_unused_columns = False # Important for pairwise dataset
+    # Initialize our Trainer
+    trainer = PairwiseTrainerForChatGLM(
+        finetuning_args=finetuning_args,
+        model=model,
+        args=training_args,
+        train_dataset=dataset if training_args.do_train else None,
+        eval_dataset=dataset if training_args.do_eval else None,
+        tokenizer=tokenizer,
+        data_collator=data_collator
+    )
+    # Training
+    if training_args.do_train:
+        train_result = trainer.train()
+        trainer.log_metrics("train", train_result.metrics)
+        trainer.save_metrics("train", train_result.metrics)
+        trainer.save_state()
+        trainer.save_model()
+        if trainer.is_world_process_zero() and finetuning_args.plot_loss:
+            plot_loss(training_args)
+def _mp_fn(index):
+    # For xla_spawn (TPUs)
+    main()
+if __name__ == "__main__":
+    main()

src/utils/.ipynb_checkpoints/__init__-checkpoint.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from .common import (
+    load_pretrained,
+    prepare_args,
+    prepare_data,
+    preprocess_data
+)
+from .seq2seq import (
+    Seq2SeqDataCollatorForChatGLM,
+    ComputeMetrics,
+    Seq2SeqTrainerForChatGLM
+)
+from .pairwise import (
+    PairwiseDataCollatorForChatGLM,
+    PairwiseTrainerForChatGLM
+)
+from .ppo import (
+    PPODataCollatorForChatGLM,
+    PPOTrainerForChatGLM
+)
+from .config import ModelArguments
+from .other import plot_loss