Instructions to use susnato/ernie-m-base_pytorch with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- paddlenlp
How to use susnato/ernie-m-base_pytorch with paddlenlp:
from paddlenlp.transformers import AutoTokenizer, ErnieMModel tokenizer = AutoTokenizer.from_pretrained("susnato/ernie-m-base_pytorch", from_hf_hub=True) model = ErnieMModel.from_pretrained("susnato/ernie-m-base_pytorch", from_hf_hub=True) - Notebooks
- Google Colab
- Kaggle
| # Copied from https://github.com/nghuyong/ERNIE-Pytorch/blob/master/convert.py | |
| # with some modifications for ernie-m | |
| #!/usr/bin/env python | |
| # encoding: utf-8 | |
| """ | |
| File Description: | |
| ernie3.0 series model conversion based on paddlenlp repository | |
| ernie2.0 series model conversion based on paddlenlp repository | |
| official repo: https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo | |
| Author: nghuyong liushu | |
| Mail: nghuyong@163.com 1554987494@qq.com | |
| Created Time: 2022/8/17 | |
| """ | |
| import collections | |
| import os | |
| import json | |
| import paddle.fluid.dygraph as D | |
| import torch | |
| from paddle import fluid | |
| import numpy as np | |
| def build_params_map(attention_num=12): | |
| """ | |
| build params map from paddle-paddle's ERNIE to transformer's BERT | |
| :return: | |
| """ | |
| weight_map = collections.OrderedDict({ | |
| 'embeddings.word_embeddings.weight': "embeddings.word_embeddings.weight", | |
| 'embeddings.position_embeddings.weight': "embeddings.position_embeddings.weight", | |
| # 'ernie.embeddings.token_type_embeddings.weight': "ernie.embeddings.token_type_embeddings.weight", | |
| # 'ernie.embeddings.task_type_embeddings.weight': "ernie.embeddings.task_type_embeddings.weight", | |
| 'embeddings.layer_norm.weight': 'embeddings.layer_norm.weight', | |
| 'embeddings.layer_norm.bias': 'embeddings.layer_norm.bias', | |
| }) | |
| # add attention layers | |
| for i in range(attention_num): | |
| weight_map[f'encoder.layers.{i}.self_attn.q_proj.weight'] = f'encoder.layers.{i}.self_attn.q_proj.weight' | |
| weight_map[f'encoder.layers.{i}.self_attn.q_proj.bias'] = f'encoder.layers.{i}.self_attn.q_proj.bias' | |
| weight_map[f'encoder.layers.{i}.self_attn.k_proj.weight'] = f'encoder.layers.{i}.self_attn.k_proj.weight' | |
| weight_map[f'encoder.layers.{i}.self_attn.k_proj.bias'] = f'encoder.layers.{i}.self_attn.k_proj.bias' | |
| weight_map[f'encoder.layers.{i}.self_attn.v_proj.weight'] = f'encoder.layers.{i}.self_attn.v_proj.weight' | |
| weight_map[f'encoder.layers.{i}.self_attn.v_proj.bias'] = f'encoder.layers.{i}.self_attn.v_proj.bias' | |
| weight_map[f'encoder.layers.{i}.self_attn.out_proj.weight'] = f'encoder.layers.{i}.self_attn.out_proj.weight' | |
| weight_map[f'encoder.layers.{i}.self_attn.out_proj.bias'] = f'encoder.layers.{i}.self_attn.out_proj.bias' | |
| weight_map[f'encoder.layers.{i}.norm1.weight'] = f'encoder.layers.{i}.norm1.weight' | |
| weight_map[f'encoder.layers.{i}.norm1.bias'] = f'encoder.layers.{i}.norm1.bias' | |
| weight_map[f'encoder.layers.{i}.linear1.weight'] = f'encoder.layers.{i}.linear1.weight' | |
| weight_map[f'encoder.layers.{i}.linear1.bias'] = f'encoder.layers.{i}.linear1.bias' | |
| weight_map[f'encoder.layers.{i}.linear2.weight'] = f'encoder.layers.{i}.linear2.weight' | |
| weight_map[f'encoder.layers.{i}.linear2.bias'] = f'encoder.layers.{i}.linear2.bias' | |
| weight_map[f'encoder.layers.{i}.norm2.weight'] = f'encoder.layers.{i}.norm2.weight' | |
| weight_map[f'encoder.layers.{i}.norm2.bias'] = f'encoder.layers.{i}.norm2.bias' | |
| # | |
| weight_map.update( | |
| { | |
| 'pooler.dense.weight': 'pooler.dense.weight', | |
| 'pooler.dense.bias': 'pooler.dense.bias', | |
| # 'cls.predictions.transform.weight': 'cls.predictions.transform.dense.weight', | |
| # 'cls.predictions.transform.bias': 'cls.predictions.transform.dense.bias', | |
| # 'cls.predictions.layer_norm.weight': 'cls.predictions.transform.LayerNorm.gamma', | |
| # 'cls.predictions.layer_norm.bias': 'cls.predictions.transform.LayerNorm.beta', | |
| # 'cls.predictions.decoder_bias': 'cls.predictions.bias' | |
| } | |
| ) | |
| return weight_map | |
| def extract_and_convert(input_dir, output_dir): | |
| """ | |
| 抽取并转换 | |
| :param input_dir: | |
| :param output_dir: | |
| :return: | |
| """ | |
| if not os.path.exists(output_dir): | |
| os.makedirs(output_dir) | |
| print('=' * 20 + 'save config file' + '=' * 20) | |
| config = json.load(open(os.path.join(input_dir, 'config.json'), 'rt', encoding='utf-8')) | |
| # if 'init_args' in config: | |
| # config = config['init_args'][0] | |
| # del config['init_class'] | |
| config['layer_norm_eps'] = 1e-5 | |
| # config['model_type'] = 'ernie' | |
| # config['architectures'] = ["ErnieForMaskedLM"] # or 'BertModel' | |
| # config['intermediate_size'] = 4 * config['hidden_size'] | |
| json.dump(config, open(os.path.join(output_dir, 'config.json'), 'wt', encoding='utf-8'), indent=4) | |
| print('=' * 20 + 'save vocab file' + '=' * 20) | |
| with open(os.path.join(input_dir, 'vocab.txt'), 'rt', encoding='utf-8') as f: | |
| words = f.read().splitlines() | |
| words = [word.split('\t')[0] for word in words] | |
| with open(os.path.join(output_dir, 'vocab.txt'), 'wt', encoding='utf-8') as f: | |
| for word in words: | |
| f.write(word + "\n") | |
| print('=' * 20 + 'extract weights' + '=' * 20) | |
| state_dict = collections.OrderedDict() | |
| weight_map = build_params_map(attention_num=config['num_hidden_layers']) | |
| with fluid.dygraph.guard(): | |
| paddle_paddle_params, _ = D.load_dygraph(os.path.join(input_dir, 'model_state.pdparams')) | |
| for weight_name, weight_value in paddle_paddle_params.items(): | |
| if 'weight' in weight_name: | |
| if 'encoder' in weight_name or 'pooler' in weight_name or 'cls.' in weight_name: | |
| weight_value = weight_value.transpose() | |
| if weight_name not in weight_map: | |
| print('=' * 20, '[SKIP]', weight_name, '=' * 20) | |
| continue | |
| state_dict[weight_map[weight_name]] = torch.FloatTensor(weight_value) | |
| print(weight_name, '->', weight_map[weight_name], weight_value.shape) | |
| torch.save(state_dict, os.path.join(output_dir, "pytorch_model.bin")) | |
| if __name__ == '__main__': | |
| extract_and_convert("./ernie_m_base_paddle/", "./ernie_m_base_torch/") |