david-thrower commited on
Commit
94ccb47
·
verified ·
1 Parent(s): e102b08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -41,30 +41,30 @@ MODEL_NAME = "HuggingFaceTB/SmolLM3-3B"
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
42
  model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, export=True)
43
 
44
- print("Creating quant config")
45
- qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)
46
- print("Creating quant config successful")
47
-
48
- print("Creating quantizer")
49
- quantizer = ORTQuantizer.from_pretrained(model)
50
- print("Creating quantizer successful")
51
- # Step 4: Perform quantization saving output in a new directory
52
- quantized_model_dir = "./quantized_model"
53
- print("Starting quantization...")
54
- quantizer.quantize(save_dir=quantized_model_dir, quantization_config=qconfig)
55
- print("Quantization was successful. Garbage collecting...")
56
-
57
- del(quantizer)
58
- del(qconfig)
59
- del(model)
60
 
61
  # Run garbage collection again to release memory from quantizer objects
62
  gc.collect()
63
 
64
- # Step 5: Load the quantized ONNX model for inference
65
- print("Loading quantized ONNX model for inference...")
66
- model = ORTModelForCausalLM.from_pretrained(quantized_model_dir)
67
- print("Loading model was succcessful. Garbage collecting.")
68
 
69
  # Garbage collection again after final loading
70
  gc.collect()
 
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
42
  model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, export=True)
43
 
44
+ # print("Creating quant config")
45
+ # qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)
46
+ # print("Creating quant config successful")
47
+
48
+ # print("Creating quantizer")
49
+ # quantizer = ORTQuantizer.from_pretrained(model)
50
+ # print("Creating quantizer successful")
51
+ # # Step 4: Perform quantization saving output in a new directory
52
+ # quantized_model_dir = "./quantized_model"
53
+ # print("Starting quantization...")
54
+ # quantizer.quantize(save_dir=quantized_model_dir, quantization_config=qconfig)
55
+ # print("Quantization was successful. Garbage collecting...")
56
+
57
+ # del(quantizer)
58
+ # del(qconfig)
59
+ # del(model)
60
 
61
  # Run garbage collection again to release memory from quantizer objects
62
  gc.collect()
63
 
64
+ # # Step 5: Load the quantized ONNX model for inference
65
+ # print("Loading quantized ONNX model for inference...")
66
+ # model = ORTModelForCausalLM.from_pretrained(quantized_model_dir)
67
+ # print("Loading model was succcessful. Garbage collecting.")
68
 
69
  # Garbage collection again after final loading
70
  gc.collect()