microsoft
/

Phi-4-multimodal-instruct-onnx

Automatic Speech Recognition

speech-summarization

speech-translation

visual-question-answering

phi-4-multimodal

Model card Files Files and versions

kvaishnavi commited on Jun 13

Commit

5c58d1e

·

verified ·

1 Parent(s): a0ecd43

Update onnx/builder.py

Files changed (1) hide show

onnx/builder.py +4 -4

onnx/builder.py CHANGED Viewed

@@ -105,7 +105,7 @@ def build_vision(args):
     # ORT 4-bits quantizer
     fpath_4 = os.path.join(args.output, filename)
     cmd = [
-        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_4bits_quantizer",
         "--input_model", fpath_3,
         "--output_model", fpath_4,
         "--block_size", str(32),
@@ -227,7 +227,7 @@ def build_speech(args):
     # ORT 4-bits quantizer
     fpath_5 = os.path.join(args.output, filename)
     cmd = [
-        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_4bits_quantizer",
         "--input_model", fpath_4,
         "--output_model", fpath_5,
         "--block_size", str(32),
@@ -528,7 +528,7 @@ def build_quantized_adapters(args):
     filename = "phi-4-mm-qlora-vision.onnx"
     fpath_3 = os.path.join(args.output, filename)
     cmd = [
-        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_4bits_quantizer",
         "--input_model", fpath_1,
         "--output_model", fpath_3,
         "--block_size", str(32),
@@ -539,7 +539,7 @@ def build_quantized_adapters(args):
     filename = "phi-4-mm-qlora-speech.onnx"
     fpath_4 = os.path.join(args.output, filename)
     cmd = [
-        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_4bits_quantizer",
         "--input_model", fpath_2,
         "--output_model", fpath_4,
         "--block_size", str(32),

     # ORT 4-bits quantizer
     fpath_4 = os.path.join(args.output, filename)
     cmd = [
+        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
         "--input_model", fpath_3,
         "--output_model", fpath_4,
         "--block_size", str(32),
     # ORT 4-bits quantizer
     fpath_5 = os.path.join(args.output, filename)
     cmd = [
+        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
         "--input_model", fpath_4,
         "--output_model", fpath_5,
         "--block_size", str(32),
     filename = "phi-4-mm-qlora-vision.onnx"
     fpath_3 = os.path.join(args.output, filename)
     cmd = [
+        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
         "--input_model", fpath_1,
         "--output_model", fpath_3,
         "--block_size", str(32),
     filename = "phi-4-mm-qlora-speech.onnx"
     fpath_4 = os.path.join(args.output, filename)
     cmd = [
+        f"{sys.executable}", "-m", "onnxruntime.quantization.matmul_nbits_quantizer",
         "--input_model", fpath_2,
         "--output_model", fpath_4,
         "--block_size", str(32),