Michael Natanael commited on
Commit
1b96985
·
1 Parent(s): feae468

add optimum

Browse files
Files changed (3) hide show
  1. Dockerfile +7 -0
  2. app.py +41 -27
  3. requirements.txt +4 -1
Dockerfile CHANGED
@@ -7,10 +7,17 @@ FROM python:3.9
7
  ENV OMP_NUM_THREADS=4
8
  ENV NUMEXPR_NUM_THREADS=4
9
  ENV MKL_NUM_THREADS=4
 
10
 
11
  RUN apt update
12
  RUN apt --yes install ffmpeg
13
 
 
 
 
 
 
 
14
  RUN useradd -m -u 1000 user
15
  USER user
16
  ENV PATH="/home/user/.local/bin:$PATH"
 
7
  ENV OMP_NUM_THREADS=4
8
  ENV NUMEXPR_NUM_THREADS=4
9
  ENV MKL_NUM_THREADS=4
10
+ ENV OV_CPU_THROUGHPUT_NUM_STREAMS=1
11
 
12
  RUN apt update
13
  RUN apt --yes install ffmpeg
14
 
15
+ # Install OpenVINO runtime dependencies
16
+ RUN apt-get update && apt-get install -y \
17
+ libgl1 \
18
+ libgomp1 \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
  RUN useradd -m -u 1000 user
22
  USER user
23
  ENV PATH="/home/user/.local/bin:$PATH"
app.py CHANGED
@@ -9,6 +9,7 @@ import requests
9
  from tqdm import tqdm
10
  from transformers import BertTokenizer, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
11
  from model.multi_class_model import MultiClassModel # Adjust if needed
 
12
 
13
  app = Flask(__name__)
14
 
@@ -49,35 +50,48 @@ model = MultiClassModel.load_from_checkpoint(
49
  )
50
  model.eval()
51
 
52
- # === INITIAL SETUP: Whisper Pipeline ===
53
- # https://huggingface.co/openai/whisper-large-v3
54
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
55
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
56
-
57
- model_id = "openai/whisper-large-v3"
58
-
59
- whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
60
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
61
- )
62
- whisper_model.to(device)
63
-
64
- processor = AutoProcessor.from_pretrained(model_id)
65
-
66
- pipe = pipeline(
67
- "automatic-speech-recognition",
68
- model=whisper_model,
69
- tokenizer=processor.tokenizer,
70
- feature_extractor=processor.feature_extractor,
71
- chunk_length_s=10,
72
- batch_size=4, # batch size for inference - set based on your device
73
- torch_dtype=torch_dtype,
74
- device=device,
75
- )
76
-
 
 
 
 
 
 
 
77
 
78
  def whisper_api(temp_audio_path):
79
- result = pipe(temp_audio_path, return_timestamps=False, generate_kwargs={"language": "indonesian"})
80
- print(result["text"])
 
 
 
 
 
 
81
  return result
82
 
83
 
 
9
  from tqdm import tqdm
10
  from transformers import BertTokenizer, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
11
  from model.multi_class_model import MultiClassModel # Adjust if needed
12
+ from optimum.intel import OVModelForSpeechSeq2Seq
13
 
14
  app = Flask(__name__)
15
 
 
50
  )
51
  model.eval()
52
 
53
+ # === OPENVINO WHISPER INIT ===
54
+ def init_whisper_openvino():
55
+ device = "cpu" # Force CPU for OpenVINO
56
+ model_id = "openai/whisper-large-v3"
57
+
58
+ # Load OpenVINO-optimized model
59
+ ov_model = OVModelForSpeechSeq2Seq.from_pretrained(
60
+ model_id,
61
+ export=True, # Auto-convert to OpenVINO format
62
+ compile=False,
63
+ trust_remote_code=True,
64
+ )
65
+
66
+ # Compile for specific input shapes
67
+ ov_model.compile(use_auto=True)
68
+
69
+ processor = AutoProcessor.from_pretrained(model_id)
70
+
71
+ return pipeline(
72
+ "automatic-speech-recognition",
73
+ model=ov_model,
74
+ feature_extractor=processor.feature_extractor,
75
+ tokenizer=processor.tokenizer,
76
+ max_new_tokens=128,
77
+ chunk_length_s=30,
78
+ batch_size=1, # Optimized for OpenVINO on CPU
79
+ device=device,
80
+ torch_dtype="float32",
81
+ )
82
+
83
+ # Initialize once at startup
84
+ whisper_pipe = init_whisper_openvino()
85
 
86
  def whisper_api(temp_audio_path):
87
+ result = whisper_pipe(
88
+ temp_audio_path,
89
+ return_timestamps=False,
90
+ generate_kwargs={
91
+ "language": "indonesian",
92
+ "task": "transcribe",
93
+ }
94
+ )
95
  return result
96
 
97
 
requirements.txt CHANGED
@@ -18,4 +18,7 @@ pytorch-lightning==2.2.1
18
  lightning==2.4.0
19
  torch==2.2.0
20
  transformers==4.42.4
21
- torchmetrics==0.11.0
 
 
 
 
18
  lightning==2.4.0
19
  torch==2.2.0
20
  transformers==4.42.4
21
+ torchmetrics==0.11.0
22
+
23
+ openvino==2023.2.0
24
+ "optimum[openvino]"