File size: 1,310 Bytes
28bdc3c
 
 
 
 
b224afc
850b2c1
28bdc3c
 
 
 
 
 
 
 
 
 
 
178c634
28bdc3c
 
178c634
28bdc3c
 
 
 
 
 
 
c41415e
28bdc3c
 
 
 
 
c41415e
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from sentence_transformers import (
    SentenceTransformer,
    export_static_quantized_openvino_model,
    export_dynamic_quantized_onnx_model,
)

MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_6"


def export_model(backend="onnx", use_qint8=False):
    if backend == "openvino":
        model = SentenceTransformer(MODEL_NAME, backend="openvino")
        if use_qint8:
            export_static_quantized_openvino_model(
                model,
                quantization_config=None,
                model_name_or_path=MODEL_NAME,
                push_to_hub=True,
                create_pr=True,
            )
        else:
            model.push_to_hub(MODEL_NAME, create_pr=True)
    elif backend == "onnx":
        model = SentenceTransformer(MODEL_NAME, backend="onnx")
        if use_qint8:
            export_dynamic_quantized_onnx_model(
                model, "avx512_vnni", MODEL_NAME, push_to_hub=True
            )
        else:
            model.push_to_hub(MODEL_NAME, create_pr=True)
    else:
        raise ValueError(f"Invalid backend: {backend}")


# Export all combinations
# backend = "onnx"
# use_qint8 = False
# print(f"Exporting {backend} model with QINT8={use_qint8}")
# export_model(backend=backend, use_qint8=use_qint8)

# import tensorrt as trt
# print(trt.__version__)