Upload optimized ONNX model (#1)
Browse files- Upload optimized ONNX model (0941cb4f762681534a7aff7fd3ac3113c47dc2cc)
- Upload 4 files (cfc4725b96a4d1fa5cf535bcb0695df76b501907)
- Update README.md (117c4c0baf2f9c8f571a06d34b016179f4ef8c9c)
- Upload README.md (88635c7ea4f88689e90129cd613c961f81dc0090)
Co-authored-by: Joshua <[email protected]>
- README.md +2 -1
- onnx/model.onnx +1 -1
- onnx/model_fp16.onnx +2 -2
- onnx/model_q4.onnx +1 -1
- onnx/model_q4f16.onnx +2 -2
- onnx/model_quantized.onnx +1 -1
README.md
CHANGED
@@ -9,6 +9,7 @@ tags:
|
|
9 |
- text-embeddings-inference
|
10 |
- information-retrieval
|
11 |
- knowledge-distillation
|
|
|
12 |
language:
|
13 |
- en
|
14 |
---
|
@@ -133,7 +134,7 @@ import { AutoModel, AutoTokenizer, matmul } from "@huggingface/transformers";
|
|
133 |
const model_id = "MongoDB/mdbr-leaf-mt";
|
134 |
const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
135 |
const model = await AutoModel.from_pretrained(model_id, {
|
136 |
-
dtype: "fp32", // Options: "fp32" | "q8" | "q4"
|
137 |
});
|
138 |
|
139 |
// Prepare queries and documents
|
|
|
9 |
- text-embeddings-inference
|
10 |
- information-retrieval
|
11 |
- knowledge-distillation
|
12 |
+
- transformers.js
|
13 |
language:
|
14 |
- en
|
15 |
---
|
|
|
134 |
const model_id = "MongoDB/mdbr-leaf-mt";
|
135 |
const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
136 |
const model = await AutoModel.from_pretrained(model_id, {
|
137 |
+
dtype: "fp32", // Options: "fp32" | "fp16" | "q8" | "q4" | "q4f16"
|
138 |
});
|
139 |
|
140 |
// Prepare queries and documents
|
onnx/model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 59258
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd50c1704d358868c81e57a958761a10ceb7b04e500983d35f444c453b56da41
|
3 |
size 59258
|
onnx/model_fp16.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1c822ace17d1b3b8f0e89f7efec54d9026c767e9800abf8dba4598142fe4bb8
|
3 |
+
size 99304
|
onnx/model_q4.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 72432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bfd24efe834f56831dec6dabb7bd9223a3068dccf63e37e11253e36f8f7e06e
|
3 |
size 72432
|
onnx/model_q4f16.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36829f30c55082a468853a5d1107cbcdebc59a3c3f5e8cc712b902a8b7459a42
|
3 |
+
size 112319
|
onnx/model_quantized.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 220168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:679b937c5d3941987a762a4ed52427c40a9e3d51b29492c865e6eac90f1b5b86
|
3 |
size 220168
|