update checkpoint and remove trust_remote_code
Browse files
README.md
CHANGED
@@ -51,15 +51,15 @@ pipeline_tag: text-generation
|
|
51 |
<a href="https://github.com/MiniMax-AI/MiniMax-01" target="_blank" style="margin: 2px;">
|
52 |
<img alt="GitHub" src="https://img.shields.io/badge/_GitHub-MinMax-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
53 |
</a>
|
54 |
-
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/LICENSE-MODEL" style="margin: 2px;">
|
55 |
<img alt="Model License" src="https://img.shields.io/badge/_Model_License-Model_Agreement-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
56 |
</a>
|
57 |
-
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/LICENSE-CODE" style="margin: 2px;">
|
58 |
<img alt="Code License" src="https://img.shields.io/badge/_Code_License-MIT-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
59 |
</a>
|
60 |
</div>
|
61 |
<div align="center" style="line-height: 1;">
|
62 |
-
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01/blob/main/figures/wechat-qrcode.jpeg" target="_blank" style="margin: 2px;">
|
63 |
WeChat
|
64 |
</a>
|
65 |
</div>
|
@@ -174,7 +174,7 @@ Here we provide a simple example of loading the tokenizer and model to generate
|
|
174 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, QuantoConfig, GenerationConfig
|
175 |
|
176 |
# load hf config
|
177 |
-
hf_config = AutoConfig.from_pretrained("MiniMaxAI/MiniMax-Text-01"
|
178 |
|
179 |
# quantization config, int8 is recommended
|
180 |
quantization_config = QuantoConfig(
|
@@ -200,7 +200,7 @@ for i in range(world_size):
|
|
200 |
device_map[f'model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'
|
201 |
|
202 |
# load tokenizer
|
203 |
-
tokenizer = AutoTokenizer.from_pretrained("MiniMaxAI/MiniMax-Text-01")
|
204 |
prompt = "Hello!"
|
205 |
messages = [
|
206 |
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant created by MiniMax based on MiniMax-Text-01 model."}]},
|
@@ -216,11 +216,10 @@ model_inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
|
216 |
|
217 |
# load bfloat16 model, move to device, and apply quantization
|
218 |
quantized_model = AutoModelForCausalLM.from_pretrained(
|
219 |
-
"MiniMaxAI/MiniMax-Text-01",
|
220 |
torch_dtype="bfloat16",
|
221 |
device_map=device_map,
|
222 |
quantization_config=quantization_config,
|
223 |
-
trust_remote_code=True,
|
224 |
offload_buffers=True,
|
225 |
)
|
226 |
|
|
|
51 |
<a href="https://github.com/MiniMax-AI/MiniMax-01" target="_blank" style="margin: 2px;">
|
52 |
<img alt="GitHub" src="https://img.shields.io/badge/_GitHub-MinMax-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
53 |
</a>
|
54 |
+
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01-hf/blob/main/LICENSE-MODEL" style="margin: 2px;">
|
55 |
<img alt="Model License" src="https://img.shields.io/badge/_Model_License-Model_Agreement-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
56 |
</a>
|
57 |
+
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01-hf/blob/main/LICENSE-CODE" style="margin: 2px;">
|
58 |
<img alt="Code License" src="https://img.shields.io/badge/_Code_License-MIT-FF4040?style=flat-square&labelColor=2C3E50" style="display: inline-block; vertical-align: middle;"/>
|
59 |
</a>
|
60 |
</div>
|
61 |
<div align="center" style="line-height: 1;">
|
62 |
+
<a href="https://huggingface.co/MiniMaxAI/MiniMax-Text-01-hf/blob/main/figures/wechat-qrcode.jpeg" target="_blank" style="margin: 2px;">
|
63 |
WeChat
|
64 |
</a>
|
65 |
</div>
|
|
|
174 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, QuantoConfig, GenerationConfig
|
175 |
|
176 |
# load hf config
|
177 |
+
hf_config = AutoConfig.from_pretrained("MiniMaxAI/MiniMax-Text-01-hf")
|
178 |
|
179 |
# quantization config, int8 is recommended
|
180 |
quantization_config = QuantoConfig(
|
|
|
200 |
device_map[f'model.layers.{i * layers_per_device + j}'] = f'cuda:{i}'
|
201 |
|
202 |
# load tokenizer
|
203 |
+
tokenizer = AutoTokenizer.from_pretrained("MiniMaxAI/MiniMax-Text-01-hf")
|
204 |
prompt = "Hello!"
|
205 |
messages = [
|
206 |
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant created by MiniMax based on MiniMax-Text-01 model."}]},
|
|
|
216 |
|
217 |
# load bfloat16 model, move to device, and apply quantization
|
218 |
quantized_model = AutoModelForCausalLM.from_pretrained(
|
219 |
+
"MiniMaxAI/MiniMax-Text-01-hf",
|
220 |
torch_dtype="bfloat16",
|
221 |
device_map=device_map,
|
222 |
quantization_config=quantization_config,
|
|
|
223 |
offload_buffers=True,
|
224 |
)
|
225 |
|