Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on Jul 27

Commit

57a37ae

1 Parent(s): 8255e28

updated

Browse files

Files changed (1) hide show

backend/services/interview_engine.py +16 -3

backend/services/interview_engine.py CHANGED Viewed

@@ -20,6 +20,14 @@ groq_llm = ChatGroq(
 )
 # Initialize Whisper model
 whisper_model = None
 def load_whisper_model():
@@ -28,12 +36,17 @@ def load_whisper_model():
         try:
             device = "cuda" if torch.cuda.is_available() else "cpu"
             compute_type = "float16" if device == "cuda" else "int8"
-            whisper_model = WhisperModel("base", device=device, compute_type=compute_type)
-            logging.info(f"Whisper model loaded on {device} with {compute_type}")
         except Exception as e:
             logging.error(f"Error loading Whisper model: {e}")
             # Fallback to CPU
-            whisper_model = WhisperModel("base", device="cpu", compute_type="int8")
     return whisper_model
 def generate_first_question(profile, job):

 )
 # Initialize Whisper model
+#
+# Loading the Whisper model can take several seconds on first use because the
+# model weights must be downloaded from Hugging Face. This delay can cause
+# the API call to ``/api/transcribe_audio`` to appear stuck while the model
+# downloads. To mitigate this, we allow the model size to be configured via
+# the ``WHISPER_MODEL_NAME`` environment variable and preload the model when
+# this module is imported. Using a smaller model (e.g. "tiny" or "base.en")
+# reduces download size and inference time considerably.
 whisper_model = None
 def load_whisper_model():
         try:
             device = "cuda" if torch.cuda.is_available() else "cpu"
             compute_type = "float16" if device == "cuda" else "int8"
+            # Allow overriding the model size via environment. Default to a
+            # lightweight model to improve startup times. Available options
+            # include: tiny, base, base.en, small, medium, large. See
+            # https://huggingface.co/ggerganov/whisper.cpp for details.
+            model_name = os.getenv("WHISPER_MODEL_NAME", "tiny")
+            whisper_model = WhisperModel(model_name, device=device, compute_type=compute_type)
+            logging.info(f"Whisper model '{model_name}' loaded on {device} with {compute_type}")
         except Exception as e:
             logging.error(f"Error loading Whisper model: {e}")
             # Fallback to CPU
+            whisper_model = WhisperModel(model_name if 'model_name' in locals() else "tiny", device="cpu", compute_type="int8")
     return whisper_model
 def generate_first_question(profile, job):