Upload model
Browse files- .gitattributes +3 -0
- config.json +20 -0
- nanollm/_CHECKPOINT_METADATA +1 -0
- nanollm/_METADATA +1 -0
- nanollm/_sharding +1 -0
- nanollm/array_metadatas/process_0 +1 -0
- nanollm/d/aa5ea6004bb91c89583446f305a9d24a +0 -0
- nanollm/manifest.ocdbt +0 -0
- nanollm/ocdbt.process_0/d/201f0220d41e2e18b9eb3459dca830b4 +0 -0
- nanollm/ocdbt.process_0/d/53ead41b1941e97d49e509f965dd40d4 +3 -0
- nanollm/ocdbt.process_0/d/980458ef4d4a7947885fdb80ccea521d +0 -0
- nanollm/ocdbt.process_0/d/9a2e1116e0972ee337ef3bf63bdd4628 +0 -0
- nanollm/ocdbt.process_0/d/9ae2cc01447d1ae6c19ec97a48b644e1 +0 -0
- nanollm/ocdbt.process_0/d/ac4b2e92cd77495f80237f872a4513b2 +0 -0
- nanollm/ocdbt.process_0/d/b4d2ae2c4c9d18de233f906151971c5e +3 -0
- nanollm/ocdbt.process_0/d/c2a43d6da6cb634141b2669378970f40 +3 -0
- nanollm/ocdbt.process_0/d/f6b1faabf7876fb75969550e05d0f359 +0 -0
- nanollm/ocdbt.process_0/manifest.ocdbt +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
nanollm/ocdbt.process_0/d/53ead41b1941e97d49e509f965dd40d4 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
nanollm/ocdbt.process_0/d/b4d2ae2c4c9d18de233f906151971c5e filter=lfs diff=lfs merge=lfs -text
|
38 |
+
nanollm/ocdbt.process_0/d/c2a43d6da6cb634141b2669378970f40 filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"batch_size": 128,
|
3 |
+
"sequence_length": 64,
|
4 |
+
"num_epochs": 1,
|
5 |
+
"learning_rate": 0.001,
|
6 |
+
"seed": 42,
|
7 |
+
"n_iterations": 10,
|
8 |
+
"n_freq_train": 1,
|
9 |
+
"dropout_rate": 0.2,
|
10 |
+
"num_layers": 6,
|
11 |
+
"embed_size": 256,
|
12 |
+
"num_heads": 8,
|
13 |
+
"head_size": 32,
|
14 |
+
"use_wandb": false,
|
15 |
+
"wandb_project": "nanollm",
|
16 |
+
"wandb_entity": null,
|
17 |
+
"push_to_hub": true,
|
18 |
+
"repo_id": "SauravMaheshkar/nanollm",
|
19 |
+
"vocab_size": 65
|
20 |
+
}
|
nanollm/_CHECKPOINT_METADATA
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1749393247688697000, "commit_timestamp_nsecs": 1749393247762204000, "custom_metadata": {}}
|
nanollm/_METADATA
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"tree_metadata": {"('blocks', '0', 'attn', 'key', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '0', 'attn', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '0', 'attn', 'out', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '0', 'attn', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32, 256]}}, "('blocks', '0', 'attn', 'query', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '0', 'attn', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '0', 'attn', 'rngs', 'default', 'count', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "default", "key_type": 2}, {"key": "count", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('blocks', '0', 'attn', 'rngs', 'default', 'key', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "rngs", "key_type": 2}, {"key": "default", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('blocks', '0', 'attn', 'value', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '0', 'attn', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '0', 'mlp', 'input_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('blocks', '0', 'mlp', 'input_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '0', 'mlp', 'output_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '0', 'mlp', 'output_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '0', 'post_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "post_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '0', 'pre_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "0", "key_type": 2}, {"key": "pre_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'attn', 'key', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '1', 'attn', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '1', 'attn', 'out', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'attn', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32, 256]}}, "('blocks', '1', 'attn', 'query', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '1', 'attn', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '1', 'attn', 'value', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '1', 'attn', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '1', 'mlp', 'input_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('blocks', '1', 'mlp', 'input_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '1', 'mlp', 'output_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'mlp', 'output_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '1', 'post_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "post_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '1', 'pre_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "1", "key_type": 2}, {"key": "pre_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'attn', 'key', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '2', 'attn', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '2', 'attn', 'out', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'attn', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32, 256]}}, "('blocks', '2', 'attn', 'query', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '2', 'attn', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '2', 'attn', 'value', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '2', 'attn', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '2', 'mlp', 'input_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('blocks', '2', 'mlp', 'input_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '2', 'mlp', 'output_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'mlp', 'output_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '2', 'post_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "post_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '2', 'pre_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "2", "key_type": 2}, {"key": "pre_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'attn', 'key', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '3', 'attn', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '3', 'attn', 'out', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'attn', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32, 256]}}, "('blocks', '3', 'attn', 'query', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '3', 'attn', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '3', 'attn', 'value', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '3', 'attn', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '3', 'mlp', 'input_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('blocks', '3', 'mlp', 'input_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '3', 'mlp', 'output_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'mlp', 'output_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '3', 'post_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "post_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '3', 'pre_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "3", "key_type": 2}, {"key": "pre_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'attn', 'key', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '4', 'attn', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '4', 'attn', 'out', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'attn', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32, 256]}}, "('blocks', '4', 'attn', 'query', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '4', 'attn', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '4', 'attn', 'value', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '4', 'attn', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '4', 'mlp', 'input_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('blocks', '4', 'mlp', 'input_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '4', 'mlp', 'output_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'mlp', 'output_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '4', 'post_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "post_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '4', 'pre_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "4", "key_type": 2}, {"key": "pre_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'attn', 'key', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '5', 'attn', 'key', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "key", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '5', 'attn', 'out', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'attn', 'out', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "out", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32, 256]}}, "('blocks', '5', 'attn', 'query', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '5', 'attn', 'query', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "query", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '5', 'attn', 'value', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [8, 32]}}, "('blocks', '5', 'attn', 'value', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "attn", "key_type": 2}, {"key": "value", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 8, 32]}}, "('blocks', '5', 'mlp', 'input_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024]}}, "('blocks', '5', 'mlp', 'input_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "input_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 1024]}}, "('blocks', '5', 'mlp', 'output_linear', 'bias', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'mlp', 'output_linear', 'kernel', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "mlp", "key_type": 2}, {"key": "output_linear", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1024, 256]}}, "('blocks', '5', 'post_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "post_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('blocks', '5', 'pre_norm', 'scale', 'value')": {"key_metadata": [{"key": "blocks", "key_type": 2}, {"key": "5", "key_type": 2}, {"key": "pre_norm", "key_type": 2}, {"key": "scale", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256]}}, "('output_layer', 'bias', 'value')": {"key_metadata": [{"key": "output_layer", "key_type": 2}, {"key": "bias", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [65]}}, "('output_layer', 'kernel', 'value')": {"key_metadata": [{"key": "output_layer", "key_type": 2}, {"key": "kernel", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [256, 65]}}, "('pos_emb', 'embedding', 'value')": {"key_metadata": [{"key": "pos_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 256]}}, "('token_emb', 'embedding', 'value')": {"key_metadata": [{"key": "token_emb", "key_type": 2}, {"key": "embedding", "key_type": 2}, {"key": "value", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [65, 256]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
nanollm/_sharding
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"YmxvY2tzLjAuYXR0bi52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5ybmdzLmRlZmF1bHQuY291bnQudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAuYXR0bi5ybmdzLmRlZmF1bHQua2V5LnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAubWxwLm91dHB1dF9saW5lYXIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAubWxwLm91dHB1dF9saW5lYXIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAubWxwLmlucHV0X2xpbmVhci5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAubWxwLmlucHV0X2xpbmVhci5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAucG9zdF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjAucHJlX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEuYXR0bi5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEubWxwLm91dHB1dF9saW5lYXIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEubWxwLm91dHB1dF9saW5lYXIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEubWxwLmlucHV0X2xpbmVhci5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEubWxwLmlucHV0X2xpbmVhci5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEucG9zdF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjEucHJlX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIuYXR0bi5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIubWxwLm91dHB1dF9saW5lYXIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIubWxwLm91dHB1dF9saW5lYXIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIubWxwLmlucHV0X2xpbmVhci5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIubWxwLmlucHV0X2xpbmVhci5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIucG9zdF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjIucHJlX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMuYXR0bi5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMubWxwLm91dHB1dF9saW5lYXIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMubWxwLm91dHB1dF9saW5lYXIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMubWxwLmlucHV0X2xpbmVhci5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMubWxwLmlucHV0X2xpbmVhci5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMucG9zdF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjMucHJlX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQuYXR0bi5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQubWxwLm91dHB1dF9saW5lYXIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQubWxwLm91dHB1dF9saW5lYXIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQubWxwLmlucHV0X2xpbmVhci5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQubWxwLmlucHV0X2xpbmVhci5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQucG9zdF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjQucHJlX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi52YWx1ZS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi52YWx1ZS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi5rZXkuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi5rZXkua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi5vdXQuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi5vdXQua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi5xdWVyeS5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUuYXR0bi5xdWVyeS5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUubWxwLm91dHB1dF9saW5lYXIuYmlhcy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUubWxwLm91dHB1dF9saW5lYXIua2VybmVsLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUubWxwLmlucHV0X2xpbmVhci5iaWFzLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUubWxwLmlucHV0X2xpbmVhci5rZXJuZWwudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUucG9zdF9ub3JtLnNjYWxlLnZhbHVl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","YmxvY2tzLjUucHJlX25vcm0uc2NhbGUudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","b3V0cHV0X2xheWVyLmJpYXMudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","b3V0cHV0X2xheWVyLmtlcm5lbC52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","cG9zX2VtYi5lbWJlZGRpbmcudmFsdWU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}","dG9rZW5fZW1iLmVtYmVkZGluZy52YWx1ZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"TFRT_CPU_0\"}"}
|
nanollm/array_metadatas/process_0
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "blocks.0.attn.key.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.key.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.out.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.out.kernel.value", "write_shape": [8, 32, 256], "chunk_shape": [8, 32, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.query.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.query.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.rngs.default.count.value", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.rngs.default.key.value", "write_shape": [2], "chunk_shape": [2], "ext_metadata": {"random_key_impl": "threefry2x32"}}}, {"array_metadata": {"param_name": "blocks.0.attn.value.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.attn.value.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.mlp.input_linear.bias.value", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.mlp.input_linear.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.mlp.output_linear.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.mlp.output_linear.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.post_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.0.pre_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.key.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.key.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.out.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.out.kernel.value", "write_shape": [8, 32, 256], "chunk_shape": [8, 32, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.query.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.query.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.value.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.attn.value.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.mlp.input_linear.bias.value", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.mlp.input_linear.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.mlp.output_linear.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.mlp.output_linear.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.post_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.1.pre_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.key.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.key.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.out.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.out.kernel.value", "write_shape": [8, 32, 256], "chunk_shape": [8, 32, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.query.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.query.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.value.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.attn.value.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.mlp.input_linear.bias.value", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.mlp.input_linear.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.mlp.output_linear.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.mlp.output_linear.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.post_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.2.pre_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.key.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.key.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.out.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.out.kernel.value", "write_shape": [8, 32, 256], "chunk_shape": [8, 32, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.query.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.query.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.value.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.attn.value.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.mlp.input_linear.bias.value", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.mlp.input_linear.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.mlp.output_linear.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.mlp.output_linear.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.post_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.3.pre_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.key.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.key.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.out.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.out.kernel.value", "write_shape": [8, 32, 256], "chunk_shape": [8, 32, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.query.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.query.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.value.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.attn.value.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.mlp.input_linear.bias.value", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.mlp.input_linear.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.mlp.output_linear.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.mlp.output_linear.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.post_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.4.pre_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.key.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.key.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.out.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.out.kernel.value", "write_shape": [8, 32, 256], "chunk_shape": [8, 32, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.query.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.query.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.value.bias.value", "write_shape": [8, 32], "chunk_shape": [8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.attn.value.kernel.value", "write_shape": [256, 8, 32], "chunk_shape": [256, 8, 32], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.mlp.input_linear.bias.value", "write_shape": [1024], "chunk_shape": [1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.mlp.input_linear.kernel.value", "write_shape": [256, 1024], "chunk_shape": [256, 1024], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.mlp.output_linear.bias.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.mlp.output_linear.kernel.value", "write_shape": [1024, 256], "chunk_shape": [1024, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.post_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "blocks.5.pre_norm.scale.value", "write_shape": [256], "chunk_shape": [256], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_layer.bias.value", "write_shape": [65], "chunk_shape": [65], "ext_metadata": null}}, {"array_metadata": {"param_name": "output_layer.kernel.value", "write_shape": [256, 65], "chunk_shape": [256, 65], "ext_metadata": null}}, {"array_metadata": {"param_name": "pos_emb.embedding.value", "write_shape": [64, 256], "chunk_shape": [64, 256], "ext_metadata": null}}, {"array_metadata": {"param_name": "token_emb.embedding.value", "write_shape": [65, 256], "chunk_shape": [65, 256], "ext_metadata": null}}]}
|
nanollm/d/aa5ea6004bb91c89583446f305a9d24a
ADDED
Binary file (40.2 kB). View file
|
|
nanollm/manifest.ocdbt
ADDED
Binary file (119 Bytes). View file
|
|
nanollm/ocdbt.process_0/d/201f0220d41e2e18b9eb3459dca830b4
ADDED
Binary file (448 Bytes). View file
|
|
nanollm/ocdbt.process_0/d/53ead41b1941e97d49e509f965dd40d4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e500db59ccb0eef09f8434684ec2316fc18103974b5acbe71b57fcd94f7399c3
|
3 |
+
size 4164132
|
nanollm/ocdbt.process_0/d/980458ef4d4a7947885fdb80ccea521d
ADDED
Binary file (195 Bytes). View file
|
|
nanollm/ocdbt.process_0/d/9a2e1116e0972ee337ef3bf63bdd4628
ADDED
Binary file (281 Bytes). View file
|
|
nanollm/ocdbt.process_0/d/9ae2cc01447d1ae6c19ec97a48b644e1
ADDED
Binary file (620 Bytes). View file
|
|
nanollm/ocdbt.process_0/d/ac4b2e92cd77495f80237f872a4513b2
ADDED
Binary file (1.72 kB). View file
|
|
nanollm/ocdbt.process_0/d/b4d2ae2c4c9d18de233f906151971c5e
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b11e2157d7ebc8645f44715257330828d1d9201a44cd3c0ab039cc625b09446
|
3 |
+
size 2919391
|
nanollm/ocdbt.process_0/d/c2a43d6da6cb634141b2669378970f40
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c8863c42cb161dff0428de2effe60b2863a69369b379735c0ed6d5469f0c991
|
3 |
+
size 10634511
|
nanollm/ocdbt.process_0/d/f6b1faabf7876fb75969550e05d0f359
ADDED
Binary file (527 Bytes). View file
|
|
nanollm/ocdbt.process_0/manifest.ocdbt
ADDED
Binary file (449 Bytes). View file
|
|