hsuwill000 commited on
Commit
678e02e
·
verified ·
1 Parent(s): 457e586

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -3
app.py CHANGED
@@ -3,10 +3,14 @@ import openvino_genai as ov_genai
3
  import queue
4
  import threading
5
  import time
 
 
6
 
7
  import huggingface_hub as hf_hub
8
  # 初始化 OpenVINO 模型
9
-
 
 
10
  import subprocess
11
 
12
  subprocess.run([
@@ -17,7 +21,7 @@ subprocess.run([
17
  "--quant-mode", "int4_f8e4m3",
18
  "ov"
19
  ])
20
-
21
 
22
  '''
23
  model_id = "OpenVINO/Qwen2.5-7B-Instruct-int4-ov"
@@ -30,9 +34,18 @@ config.top_k = 30;
30
 
31
  hf_hub.snapshot_download(model_id, local_dir=model_path)
32
  '''
33
-
34
  model_path = "ov"
35
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
 
 
 
 
 
 
 
 
 
36
  #pipe.start_chat()
37
 
38
  def start_chat():
 
3
  import queue
4
  import threading
5
  import time
6
+ from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
7
+ import nncf
8
 
9
  import huggingface_hub as hf_hub
10
  # 初始化 OpenVINO 模型
11
+ model_id = "Qwen/Qwen3-1.7B"
12
+ optimized_model_path = "ov"
13
+ '''
14
  import subprocess
15
 
16
  subprocess.run([
 
21
  "--quant-mode", "int4_f8e4m3",
22
  "ov"
23
  ])
24
+ '''
25
 
26
  '''
27
  model_id = "OpenVINO/Qwen2.5-7B-Instruct-int4-ov"
 
34
 
35
  hf_hub.snapshot_download(model_id, local_dir=model_path)
36
  '''
37
+ '''
38
  model_path = "ov"
39
  pipe = ov_genai.LLMPipeline(model_path, "CPU")
40
+ '''
41
+ model = OVModelForCausalLM.from_pretrained(
42
+ model_path,
43
+ quantization_config=OVWeightQuantizationConfig(bits=4, asym=True, ratio=0.8, dataset="ptb"),
44
+ )
45
+
46
+ # save the model after optimization
47
+ model.save_pretrained(optimized_model_path)
48
+
49
  #pipe.start_chat()
50
 
51
  def start_chat():