Spaces:
Sleeping
Sleeping
from sentence_transformers import ( | |
SentenceTransformer, | |
export_static_quantized_openvino_model, | |
export_dynamic_quantized_onnx_model, | |
) | |
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_6" | |
def export_model(backend="onnx", use_qint8=False): | |
if backend == "openvino": | |
model = SentenceTransformer(MODEL_NAME, backend="openvino") | |
if use_qint8: | |
export_static_quantized_openvino_model( | |
model, | |
quantization_config=None, | |
model_name_or_path=MODEL_NAME, | |
push_to_hub=True, | |
create_pr=True, | |
) | |
else: | |
model.push_to_hub(MODEL_NAME, create_pr=True) | |
elif backend == "onnx": | |
model = SentenceTransformer(MODEL_NAME, backend="onnx") | |
if use_qint8: | |
export_dynamic_quantized_onnx_model( | |
model, "avx512_vnni", MODEL_NAME, push_to_hub=True | |
) | |
else: | |
model.push_to_hub(MODEL_NAME, create_pr=True) | |
else: | |
raise ValueError(f"Invalid backend: {backend}") | |
# Export all combinations | |
# backend = "onnx" | |
# use_qint8 = False | |
# print(f"Exporting {backend} model with QINT8={use_qint8}") | |
# export_model(backend=backend, use_qint8=use_qint8) | |
# import tensorrt as trt | |
# print(trt.__version__) | |