Update onnx/builder.py
Browse files- onnx/builder.py +4 -4
onnx/builder.py
CHANGED
|
@@ -105,7 +105,7 @@ def build_vision(args):
|
|
| 105 |
# ORT 4-bits quantizer
|
| 106 |
fpath_4 = os.path.join(args.output, filename)
|
| 107 |
cmd = [
|
| 108 |
-
f"{sys.executable}", "-m", "onnxruntime.quantization.
|
| 109 |
"--input_model", fpath_3,
|
| 110 |
"--output_model", fpath_4,
|
| 111 |
"--block_size", str(32),
|
|
@@ -227,7 +227,7 @@ def build_speech(args):
|
|
| 227 |
# ORT 4-bits quantizer
|
| 228 |
fpath_5 = os.path.join(args.output, filename)
|
| 229 |
cmd = [
|
| 230 |
-
f"{sys.executable}", "-m", "onnxruntime.quantization.
|
| 231 |
"--input_model", fpath_4,
|
| 232 |
"--output_model", fpath_5,
|
| 233 |
"--block_size", str(32),
|
|
@@ -528,7 +528,7 @@ def build_quantized_adapters(args):
|
|
| 528 |
filename = "phi-4-mm-qlora-vision.onnx"
|
| 529 |
fpath_3 = os.path.join(args.output, filename)
|
| 530 |
cmd = [
|
| 531 |
-
f"{sys.executable}", "-m", "onnxruntime.quantization.
|
| 532 |
"--input_model", fpath_1,
|
| 533 |
"--output_model", fpath_3,
|
| 534 |
"--block_size", str(32),
|
|
@@ -539,7 +539,7 @@ def build_quantized_adapters(args):
|
|
| 539 |
filename = "phi-4-mm-qlora-speech.onnx"
|
| 540 |
fpath_4 = os.path.join(args.output, filename)
|
| 541 |
cmd = [
|
| 542 |
-
f"{sys.executable}", "-m", "onnxruntime.quantization.
|
| 543 |
"--input_model", fpath_2,
|
| 544 |
"--output_model", fpath_4,
|
| 545 |
"--block_size", str(32),
|
|
|
|
| 105 |
# ORT 4-bits quantizer
|
| 106 |
fpath_4 = os.path.join(args.output, filename)
|
| 107 |
cmd = [
|
| 108 |
+
f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
|
| 109 |
"--input_model", fpath_3,
|
| 110 |
"--output_model", fpath_4,
|
| 111 |
"--block_size", str(32),
|
|
|
|
| 227 |
# ORT 4-bits quantizer
|
| 228 |
fpath_5 = os.path.join(args.output, filename)
|
| 229 |
cmd = [
|
| 230 |
+
f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
|
| 231 |
"--input_model", fpath_4,
|
| 232 |
"--output_model", fpath_5,
|
| 233 |
"--block_size", str(32),
|
|
|
|
| 528 |
filename = "phi-4-mm-qlora-vision.onnx"
|
| 529 |
fpath_3 = os.path.join(args.output, filename)
|
| 530 |
cmd = [
|
| 531 |
+
f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
|
| 532 |
"--input_model", fpath_1,
|
| 533 |
"--output_model", fpath_3,
|
| 534 |
"--block_size", str(32),
|
|
|
|
| 539 |
filename = "phi-4-mm-qlora-speech.onnx"
|
| 540 |
fpath_4 = os.path.join(args.output, filename)
|
| 541 |
cmd = [
|
| 542 |
+
f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
|
| 543 |
"--input_model", fpath_2,
|
| 544 |
"--output_model", fpath_4,
|
| 545 |
"--block_size", str(32),
|