Spaces:
Build error
Build error
Make a separate process timeout for diarization
Browse files- app.py +12 -2
- config.json5 +3 -1
- src/config.py +3 -1
- src/diarization/diarizationContainer.py +2 -1
app.py
CHANGED
|
@@ -33,7 +33,7 @@ import ffmpeg
|
|
| 33 |
import gradio as gr
|
| 34 |
|
| 35 |
from src.download import ExceededMaximumDuration, download_url
|
| 36 |
-
from src.utils import optional_int, slugify, write_srt, write_vtt
|
| 37 |
from src.vad import AbstractTranscription, NonSpeechStrategy, PeriodicTranscriptionConfig, TranscriptionConfig, VadPeriodicTranscription, VadSileroTranscription
|
| 38 |
from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
| 39 |
from src.whisper.whisperFactory import create_whisper_container
|
|
@@ -95,7 +95,8 @@ class WhisperTranscriber:
|
|
| 95 |
def set_diarization(self, auth_token: str, enable_daemon_process: bool = True, **kwargs):
|
| 96 |
if self.diarization is None:
|
| 97 |
self.diarization = DiarizationContainer(auth_token=auth_token, enable_daemon_process=enable_daemon_process,
|
| 98 |
-
auto_cleanup_timeout_seconds=self.
|
|
|
|
| 99 |
# Set parameters
|
| 100 |
self.diarization_kwargs = kwargs
|
| 101 |
|
|
@@ -688,6 +689,15 @@ if __name__ == '__main__':
|
|
| 688 |
help="the compute type to use for inference")
|
| 689 |
parser.add_argument("--threads", type=optional_int, default=0,
|
| 690 |
help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 691 |
|
| 692 |
args = parser.parse_args().__dict__
|
| 693 |
|
|
|
|
| 33 |
import gradio as gr
|
| 34 |
|
| 35 |
from src.download import ExceededMaximumDuration, download_url
|
| 36 |
+
from src.utils import optional_int, slugify, str2bool, write_srt, write_vtt
|
| 37 |
from src.vad import AbstractTranscription, NonSpeechStrategy, PeriodicTranscriptionConfig, TranscriptionConfig, VadPeriodicTranscription, VadSileroTranscription
|
| 38 |
from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
| 39 |
from src.whisper.whisperFactory import create_whisper_container
|
|
|
|
| 95 |
def set_diarization(self, auth_token: str, enable_daemon_process: bool = True, **kwargs):
|
| 96 |
if self.diarization is None:
|
| 97 |
self.diarization = DiarizationContainer(auth_token=auth_token, enable_daemon_process=enable_daemon_process,
|
| 98 |
+
auto_cleanup_timeout_seconds=self.app_config.diarization_process_timeout,
|
| 99 |
+
cache=self.model_cache)
|
| 100 |
# Set parameters
|
| 101 |
self.diarization_kwargs = kwargs
|
| 102 |
|
|
|
|
| 689 |
help="the compute type to use for inference")
|
| 690 |
parser.add_argument("--threads", type=optional_int, default=0,
|
| 691 |
help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
|
| 692 |
+
|
| 693 |
+
parser.add_argument('--auth_token', type=str, default=default_app_config.auth_token, help='HuggingFace API Token (optional)')
|
| 694 |
+
parser.add_argument("--diarization", type=str2bool, default=default_app_config.diarization, \
|
| 695 |
+
help="whether to perform speaker diarization")
|
| 696 |
+
parser.add_argument("--diarization_num_speakers", type=int, default=default_app_config.diarization_speakers, help="Number of speakers")
|
| 697 |
+
parser.add_argument("--diarization_min_speakers", type=int, default=default_app_config.diarization_min_speakers, help="Minimum number of speakers")
|
| 698 |
+
parser.add_argument("--diarization_max_speakers", type=int, default=default_app_config.diarization_max_speakers, help="Maximum number of speakers")
|
| 699 |
+
parser.add_argument("--diarization_process_timeout", type=int, default=default_app_config.diarization_process_timeout, \
|
| 700 |
+
help="Number of seconds before inactivate diarization processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
| 701 |
|
| 702 |
args = parser.parse_args().__dict__
|
| 703 |
|
config.json5
CHANGED
|
@@ -148,5 +148,7 @@
|
|
| 148 |
// The minimum number of speakers to detect
|
| 149 |
"diarization_min_speakers": 1,
|
| 150 |
// The maximum number of speakers to detect
|
| 151 |
-
"diarization_max_speakers":
|
|
|
|
|
|
|
| 152 |
}
|
|
|
|
| 148 |
// The minimum number of speakers to detect
|
| 149 |
"diarization_min_speakers": 1,
|
| 150 |
// The maximum number of speakers to detect
|
| 151 |
+
"diarization_max_speakers": 8,
|
| 152 |
+
// The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.
|
| 153 |
+
"diarization_process_timeout": 60,
|
| 154 |
}
|
src/config.py
CHANGED
|
@@ -72,7 +72,8 @@ class ApplicationConfig:
|
|
| 72 |
highlight_words: bool = False,
|
| 73 |
# Diarization
|
| 74 |
auth_token: str = None, diarization: bool = False, diarization_speakers: int = 2,
|
| 75 |
-
diarization_min_speakers: int = 1, diarization_max_speakers: int = 5
|
|
|
|
| 76 |
|
| 77 |
self.models = models
|
| 78 |
|
|
@@ -130,6 +131,7 @@ class ApplicationConfig:
|
|
| 130 |
self.diarization_speakers = diarization_speakers
|
| 131 |
self.diarization_min_speakers = diarization_min_speakers
|
| 132 |
self.diarization_max_speakers = diarization_max_speakers
|
|
|
|
| 133 |
|
| 134 |
def get_model_names(self):
|
| 135 |
return [ x.name for x in self.models ]
|
|
|
|
| 72 |
highlight_words: bool = False,
|
| 73 |
# Diarization
|
| 74 |
auth_token: str = None, diarization: bool = False, diarization_speakers: int = 2,
|
| 75 |
+
diarization_min_speakers: int = 1, diarization_max_speakers: int = 5,
|
| 76 |
+
diarization_process_timeout: int = 60):
|
| 77 |
|
| 78 |
self.models = models
|
| 79 |
|
|
|
|
| 131 |
self.diarization_speakers = diarization_speakers
|
| 132 |
self.diarization_min_speakers = diarization_min_speakers
|
| 133 |
self.diarization_max_speakers = diarization_max_speakers
|
| 134 |
+
self.diarization_process_timeout = diarization_process_timeout
|
| 135 |
|
| 136 |
def get_model_names(self):
|
| 137 |
return [ x.name for x in self.models ]
|
src/diarization/diarizationContainer.py
CHANGED
|
@@ -16,7 +16,8 @@ class DiarizationContainer:
|
|
| 16 |
# Create parallel context if needed
|
| 17 |
if self.diarization_context is None and self.enable_daemon_process:
|
| 18 |
# Number of processes is set to 1 as we mainly use this in order to clean up GPU memory
|
| 19 |
-
self.diarization_context = ParallelContext(num_processes=1)
|
|
|
|
| 20 |
|
| 21 |
# Run directly
|
| 22 |
if self.diarization_context is None:
|
|
|
|
| 16 |
# Create parallel context if needed
|
| 17 |
if self.diarization_context is None and self.enable_daemon_process:
|
| 18 |
# Number of processes is set to 1 as we mainly use this in order to clean up GPU memory
|
| 19 |
+
self.diarization_context = ParallelContext(num_processes=1, auto_cleanup_timeout_seconds=self.auto_cleanup_timeout_seconds)
|
| 20 |
+
print("Created diarization context with auto cleanup timeout of %d seconds" % self.auto_cleanup_timeout_seconds)
|
| 21 |
|
| 22 |
# Run directly
|
| 23 |
if self.diarization_context is None:
|