meisaicheck-api / prepare.py
vumichien's picture
Update model configuration to use OpenVINO, modify cached embeddings with new sizes, and enhance .gitignore to exclude additional data files for improved project management.
58b07a2
raw
history blame
1.31 kB
from sentence_transformers import (
SentenceTransformer,
export_static_quantized_openvino_model,
export_dynamic_quantized_onnx_model,
)
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v1_0_8_5"
def export_model(backend="onnx", use_qint8=False):
if backend == "openvino":
model = SentenceTransformer(MODEL_NAME, backend="openvino")
if use_qint8:
export_static_quantized_openvino_model(
model,
quantization_config=None,
model_name_or_path=MODEL_NAME,
push_to_hub=True,
create_pr=True,
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
elif backend == "onnx":
model = SentenceTransformer(MODEL_NAME, backend="onnx")
if use_qint8:
export_dynamic_quantized_onnx_model(
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
)
else:
model.push_to_hub(MODEL_NAME, create_pr=True)
else:
raise ValueError(f"Invalid backend: {backend}")
# Export all combinations
BACKEND = "openvino"
USE_QINT8 = False
print(f"Exporting {BACKEND} model with QINT8={USE_QINT8}")
export_model(backend=BACKEND, use_qint8=USE_QINT8)
# import tensorrt as trt
# print(trt.__version__)