Spaces:
Running
on
T4
Running
on
T4
File size: 1,310 Bytes
28bdc3c b224afc 850b2c1 28bdc3c 178c634 28bdc3c 178c634 28bdc3c c41415e 28bdc3c c41415e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
from sentence_transformers import (
SentenceTransformer,
export_static_quantized_openvino_model,
export_dynamic_quantized_onnx_model,
)
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_6"
def export_model(backend="onnx", use_qint8=False):
if backend == "openvino":
model = SentenceTransformer(MODEL_NAME, backend="openvino")
if use_qint8:
export_static_quantized_openvino_model(
model,
quantization_config=None,
model_name_or_path=MODEL_NAME,
push_to_hub=True,
create_pr=True,
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
elif backend == "onnx":
model = SentenceTransformer(MODEL_NAME, backend="onnx")
if use_qint8:
export_dynamic_quantized_onnx_model(
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
else:
raise ValueError(f"Invalid backend: {backend}")
# Export all combinations
# backend = "onnx"
# use_qint8 = False
# print(f"Exporting {backend} model with QINT8={use_qint8}")
# export_model(backend=backend, use_qint8=use_qint8)
# import tensorrt as trt
# print(trt.__version__)
|