Michael Natanael commited on
Commit
feae468
·
1 Parent(s): 1630f77

change transcribe mechanism when uploading audio

Browse files
Files changed (2) hide show
  1. Dockerfile +6 -1
  2. app.py +25 -24
Dockerfile CHANGED
@@ -3,6 +3,11 @@
3
 
4
  FROM python:3.9
5
 
 
 
 
 
 
6
  RUN apt update
7
  RUN apt --yes install ffmpeg
8
 
@@ -17,4 +22,4 @@ RUN pip install --no-cache-dir --upgrade -r requirements.txt
17
 
18
  COPY --chown=user . /app
19
  # CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
20
- CMD ["gunicorn", "--timeout", "60", "-b", "0.0.0.0:7860", "app:app"]
 
3
 
4
  FROM python:3.9
5
 
6
+ # Set proper Python threading configuration
7
+ ENV OMP_NUM_THREADS=4
8
+ ENV NUMEXPR_NUM_THREADS=4
9
+ ENV MKL_NUM_THREADS=4
10
+
11
  RUN apt update
12
  RUN apt --yes install ffmpeg
13
 
 
22
 
23
  COPY --chown=user . /app
24
  # CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
25
+ CMD ["gunicorn", "--timeout", "120", "--workers", "2", "-b", "0.0.0.0:7860", "app:app"]
app.py CHANGED
@@ -49,32 +49,33 @@ model = MultiClassModel.load_from_checkpoint(
49
  )
50
  model.eval()
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- def whisper_api(temp_audio_path):
54
- # https://huggingface.co/openai/whisper-large-v3
55
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
56
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
57
-
58
- model_id = "openai/whisper-large-v3"
59
-
60
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
61
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
62
- )
63
- model.to(device)
64
-
65
- processor = AutoProcessor.from_pretrained(model_id)
66
-
67
- pipe = pipeline(
68
- "automatic-speech-recognition",
69
- model=model,
70
- tokenizer=processor.tokenizer,
71
- feature_extractor=processor.feature_extractor,
72
- chunk_length_s=10,
73
- batch_size=4, # batch size for inference - set based on your device
74
- torch_dtype=torch_dtype,
75
- device=device,
76
- )
77
 
 
78
  result = pipe(temp_audio_path, return_timestamps=False, generate_kwargs={"language": "indonesian"})
79
  print(result["text"])
80
  return result
 
49
  )
50
  model.eval()
51
 
52
+ # === INITIAL SETUP: Whisper Pipeline ===
53
+ # https://huggingface.co/openai/whisper-large-v3
54
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
55
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
56
+
57
+ model_id = "openai/whisper-large-v3"
58
+
59
+ whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
60
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
61
+ )
62
+ whisper_model.to(device)
63
+
64
+ processor = AutoProcessor.from_pretrained(model_id)
65
+
66
+ pipe = pipeline(
67
+ "automatic-speech-recognition",
68
+ model=whisper_model,
69
+ tokenizer=processor.tokenizer,
70
+ feature_extractor=processor.feature_extractor,
71
+ chunk_length_s=10,
72
+ batch_size=4, # batch size for inference - set based on your device
73
+ torch_dtype=torch_dtype,
74
+ device=device,
75
+ )
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ def whisper_api(temp_audio_path):
79
  result = pipe(temp_audio_path, return_timestamps=False, generate_kwargs={"language": "indonesian"})
80
  print(result["text"])
81
  return result