meisaicheck-api / prepare.py
vumichien's picture
update GPU config
c41415e
from sentence_transformers import (
SentenceTransformer,
export_static_quantized_openvino_model,
export_dynamic_quantized_onnx_model,
)
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_6"
def export_model(backend="onnx", use_qint8=False):
if backend == "openvino":
model = SentenceTransformer(MODEL_NAME, backend="openvino")
if use_qint8:
export_static_quantized_openvino_model(
model,
quantization_config=None,
model_name_or_path=MODEL_NAME,
push_to_hub=True,
create_pr=True,
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
elif backend == "onnx":
model = SentenceTransformer(MODEL_NAME, backend="onnx")
if use_qint8:
export_dynamic_quantized_onnx_model(
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
else:
raise ValueError(f"Invalid backend: {backend}")
# Export all combinations
# backend = "onnx"
# use_qint8 = False
# print(f"Exporting {backend} model with QINT8={use_qint8}")
# export_model(backend=backend, use_qint8=use_qint8)
# import tensorrt as trt
# print(trt.__version__)