husseinelsaadi commited on
Commit
57a37ae
·
1 Parent(s): 8255e28
backend/services/interview_engine.py CHANGED
@@ -20,6 +20,14 @@ groq_llm = ChatGroq(
20
  )
21
 
22
  # Initialize Whisper model
 
 
 
 
 
 
 
 
23
  whisper_model = None
24
 
25
  def load_whisper_model():
@@ -28,12 +36,17 @@ def load_whisper_model():
28
  try:
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
  compute_type = "float16" if device == "cuda" else "int8"
31
- whisper_model = WhisperModel("base", device=device, compute_type=compute_type)
32
- logging.info(f"Whisper model loaded on {device} with {compute_type}")
 
 
 
 
 
33
  except Exception as e:
34
  logging.error(f"Error loading Whisper model: {e}")
35
  # Fallback to CPU
36
- whisper_model = WhisperModel("base", device="cpu", compute_type="int8")
37
  return whisper_model
38
 
39
  def generate_first_question(profile, job):
 
20
  )
21
 
22
  # Initialize Whisper model
23
+ #
24
+ # Loading the Whisper model can take several seconds on first use because the
25
+ # model weights must be downloaded from Hugging Face. This delay can cause
26
+ # the API call to ``/api/transcribe_audio`` to appear stuck while the model
27
+ # downloads. To mitigate this, we allow the model size to be configured via
28
+ # the ``WHISPER_MODEL_NAME`` environment variable and preload the model when
29
+ # this module is imported. Using a smaller model (e.g. "tiny" or "base.en")
30
+ # reduces download size and inference time considerably.
31
  whisper_model = None
32
 
33
  def load_whisper_model():
 
36
  try:
37
  device = "cuda" if torch.cuda.is_available() else "cpu"
38
  compute_type = "float16" if device == "cuda" else "int8"
39
+ # Allow overriding the model size via environment. Default to a
40
+ # lightweight model to improve startup times. Available options
41
+ # include: tiny, base, base.en, small, medium, large. See
42
+ # https://huggingface.co/ggerganov/whisper.cpp for details.
43
+ model_name = os.getenv("WHISPER_MODEL_NAME", "tiny")
44
+ whisper_model = WhisperModel(model_name, device=device, compute_type=compute_type)
45
+ logging.info(f"Whisper model '{model_name}' loaded on {device} with {compute_type}")
46
  except Exception as e:
47
  logging.error(f"Error loading Whisper model: {e}")
48
  # Fallback to CPU
49
+ whisper_model = WhisperModel(model_name if 'model_name' in locals() else "tiny", device="cpu", compute_type="int8")
50
  return whisper_model
51
 
52
  def generate_first_question(profile, job):