from sentence_transformers import ( SentenceTransformer, export_static_quantized_openvino_model, export_dynamic_quantized_onnx_model, ) MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_6" def export_model(backend="onnx", use_qint8=False): if backend == "openvino": model = SentenceTransformer(MODEL_NAME, backend="openvino") if use_qint8: export_static_quantized_openvino_model( model, quantization_config=None, model_name_or_path=MODEL_NAME, push_to_hub=True, create_pr=True, ) else: model.push_to_hub(MODEL_NAME, create_pr=True) elif backend == "onnx": model = SentenceTransformer(MODEL_NAME, backend="onnx") if use_qint8: export_dynamic_quantized_onnx_model( model, "avx512_vnni", MODEL_NAME, push_to_hub=True ) else: model.push_to_hub(MODEL_NAME, create_pr=True) else: raise ValueError(f"Invalid backend: {backend}") # Export all combinations # backend = "onnx" # use_qint8 = False # print(f"Exporting {backend} model with QINT8={use_qint8}") # export_model(backend=backend, use_qint8=use_qint8) # import tensorrt as trt # print(trt.__version__)