Spaces:

nenafem
/

flask_whisper

Runtime error

Michael Natanael commited on Apr 27

Commit

1b96985

1 Parent(s): feae468

add optimum

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -7,10 +7,17 @@ FROM python:3.9
 ENV OMP_NUM_THREADS=4
 ENV NUMEXPR_NUM_THREADS=4
 ENV MKL_NUM_THREADS=4
 RUN apt update
 RUN apt --yes install ffmpeg
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"

 ENV OMP_NUM_THREADS=4
 ENV NUMEXPR_NUM_THREADS=4
 ENV MKL_NUM_THREADS=4
+ENV OV_CPU_THROUGHPUT_NUM_STREAMS=1
 RUN apt update
 RUN apt --yes install ffmpeg
+# Install OpenVINO runtime dependencies
+RUN apt-get update && apt-get install -y \
+    libgl1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 user
 USER user
 ENV PATH="/home/user/.local/bin:$PATH"

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import requests
 from tqdm import tqdm
 from transformers import BertTokenizer, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 from model.multi_class_model import MultiClassModel  # Adjust if needed
 app = Flask(__name__)
@@ -49,35 +50,48 @@ model = MultiClassModel.load_from_checkpoint(
 )
 model.eval()
-# === INITIAL SETUP: Whisper Pipeline ===
-# https://huggingface.co/openai/whisper-large-v3
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-model_id = "openai/whisper-large-v3"
-whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
-)
-whisper_model.to(device)
-processor = AutoProcessor.from_pretrained(model_id)
-pipe = pipeline(
-    "automatic-speech-recognition",
-    model=whisper_model,
-    tokenizer=processor.tokenizer,
-    feature_extractor=processor.feature_extractor,
-    chunk_length_s=10,
-    batch_size=4,  # batch size for inference - set based on your device
-    torch_dtype=torch_dtype,
-    device=device,
-)
 def whisper_api(temp_audio_path):
-    result = pipe(temp_audio_path, return_timestamps=False, generate_kwargs={"language": "indonesian"})
-    print(result["text"])
     return result

 from tqdm import tqdm
 from transformers import BertTokenizer, AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 from model.multi_class_model import MultiClassModel  # Adjust if needed
+from optimum.intel import OVModelForSpeechSeq2Seq
 app = Flask(__name__)
 )
 model.eval()
+# === OPENVINO WHISPER INIT ===
+def init_whisper_openvino():
+    device = "cpu"  # Force CPU for OpenVINO
+    model_id = "openai/whisper-large-v3"
+    # Load OpenVINO-optimized model
+    ov_model = OVModelForSpeechSeq2Seq.from_pretrained(
+        model_id,
+        export=True,  # Auto-convert to OpenVINO format
+        compile=False,
+        trust_remote_code=True,
+    )
+    # Compile for specific input shapes
+    ov_model.compile(use_auto=True)
+    processor = AutoProcessor.from_pretrained(model_id)
+    return pipeline(
+        "automatic-speech-recognition",
+        model=ov_model,
+        feature_extractor=processor.feature_extractor,
+        tokenizer=processor.tokenizer,
+        max_new_tokens=128,
+        chunk_length_s=30,
+        batch_size=1,  # Optimized for OpenVINO on CPU
+        device=device,
+        torch_dtype="float32",
+    )
+# Initialize once at startup
+whisper_pipe = init_whisper_openvino()
 def whisper_api(temp_audio_path):
+    result = whisper_pipe(
+        temp_audio_path,
+        return_timestamps=False,
+        generate_kwargs={
+            "language": "indonesian",
+            "task": "transcribe",
+        }
+    )
     return result

requirements.txt CHANGED Viewed

@@ -18,4 +18,7 @@ pytorch-lightning==2.2.1
 lightning==2.4.0
 torch==2.2.0
 transformers==4.42.4
-torchmetrics==0.11.0

 lightning==2.4.0
 torch==2.2.0
 transformers==4.42.4
+torchmetrics==0.11.0
+openvino==2023.2.0
+"optimum[openvino]"