Spaces:
Running
Running
Michael Natanael
commited on
Commit
·
feae468
1
Parent(s):
1630f77
change transcribe mechanism when uploading audio
Browse files- Dockerfile +6 -1
- app.py +25 -24
Dockerfile
CHANGED
@@ -3,6 +3,11 @@
|
|
3 |
|
4 |
FROM python:3.9
|
5 |
|
|
|
|
|
|
|
|
|
|
|
6 |
RUN apt update
|
7 |
RUN apt --yes install ffmpeg
|
8 |
|
@@ -17,4 +22,4 @@ RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
|
17 |
|
18 |
COPY --chown=user . /app
|
19 |
# CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
|
20 |
-
CMD ["gunicorn", "--timeout", "
|
|
|
3 |
|
4 |
FROM python:3.9
|
5 |
|
6 |
+
# Set proper Python threading configuration
|
7 |
+
ENV OMP_NUM_THREADS=4
|
8 |
+
ENV NUMEXPR_NUM_THREADS=4
|
9 |
+
ENV MKL_NUM_THREADS=4
|
10 |
+
|
11 |
RUN apt update
|
12 |
RUN apt --yes install ffmpeg
|
13 |
|
|
|
22 |
|
23 |
COPY --chown=user . /app
|
24 |
# CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
|
25 |
+
CMD ["gunicorn", "--timeout", "120", "--workers", "2", "-b", "0.0.0.0:7860", "app:app"]
|
app.py
CHANGED
@@ -49,32 +49,33 @@ model = MultiClassModel.load_from_checkpoint(
|
|
49 |
)
|
50 |
model.eval()
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
def whisper_api(temp_audio_path):
|
54 |
-
# https://huggingface.co/openai/whisper-large-v3
|
55 |
-
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
56 |
-
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
57 |
-
|
58 |
-
model_id = "openai/whisper-large-v3"
|
59 |
-
|
60 |
-
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
61 |
-
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
62 |
-
)
|
63 |
-
model.to(device)
|
64 |
-
|
65 |
-
processor = AutoProcessor.from_pretrained(model_id)
|
66 |
-
|
67 |
-
pipe = pipeline(
|
68 |
-
"automatic-speech-recognition",
|
69 |
-
model=model,
|
70 |
-
tokenizer=processor.tokenizer,
|
71 |
-
feature_extractor=processor.feature_extractor,
|
72 |
-
chunk_length_s=10,
|
73 |
-
batch_size=4, # batch size for inference - set based on your device
|
74 |
-
torch_dtype=torch_dtype,
|
75 |
-
device=device,
|
76 |
-
)
|
77 |
|
|
|
78 |
result = pipe(temp_audio_path, return_timestamps=False, generate_kwargs={"language": "indonesian"})
|
79 |
print(result["text"])
|
80 |
return result
|
|
|
49 |
)
|
50 |
model.eval()
|
51 |
|
52 |
+
# === INITIAL SETUP: Whisper Pipeline ===
|
53 |
+
# https://huggingface.co/openai/whisper-large-v3
|
54 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
55 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
56 |
+
|
57 |
+
model_id = "openai/whisper-large-v3"
|
58 |
+
|
59 |
+
whisper_model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
60 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
61 |
+
)
|
62 |
+
whisper_model.to(device)
|
63 |
+
|
64 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
65 |
+
|
66 |
+
pipe = pipeline(
|
67 |
+
"automatic-speech-recognition",
|
68 |
+
model=whisper_model,
|
69 |
+
tokenizer=processor.tokenizer,
|
70 |
+
feature_extractor=processor.feature_extractor,
|
71 |
+
chunk_length_s=10,
|
72 |
+
batch_size=4, # batch size for inference - set based on your device
|
73 |
+
torch_dtype=torch_dtype,
|
74 |
+
device=device,
|
75 |
+
)
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
+
def whisper_api(temp_audio_path):
|
79 |
result = pipe(temp_audio_path, return_timestamps=False, generate_kwargs={"language": "indonesian"})
|
80 |
print(result["text"])
|
81 |
return result
|