Tomtom84 commited on
Commit
629c68f
·
verified ·
1 Parent(s): 2187970

Delete engines

Browse files
engines/__init__.py DELETED
@@ -1,30 +0,0 @@
1
- from RealtimeTTS.engines import BaseEngine, TimingInfo
2
- from .orpheus_engine import OrpheusEngine
3
-
4
- __all__ = [
5
- "BaseEngine", "TimingInfo",
6
- "OrpheusEngine", "OrpheusVoice",
7
- ]
8
-
9
-
10
- # Lazy loader functions for the engines in this subpackage.
11
-
12
-
13
- def _load_orpheus_engine():
14
- from .orpheus_engine import OrpheusEngine, OrpheusVoice
15
- globals()["OrpheusEngine"] = OrpheusEngine
16
- globals()["OrpheusVoice"] = OrpheusVoice
17
- return OrpheusEngine
18
-
19
-
20
- # Map attribute names to lazy loader functions.
21
- _lazy_imports = {
22
- "OrpheusEngine": _load_orpheus_engine,
23
- "OrpheusVoice": _load_orpheus_engine,
24
- }
25
-
26
-
27
- def __getattr__(name):
28
- if name in _lazy_imports:
29
- return _lazy_imports[name]()
30
- raise AttributeError(f"module {__name__} has no attribute {name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engines/base_engines.py DELETED
@@ -1,299 +0,0 @@
1
- """
2
- This module defines a base framework for speech synthesis engines. It includes:
3
- - A TimingInfo class to capture timing details (start, end, and word) of audio segments.
4
- - A BaseEngine abstract class (using a custom metaclass) that sets up default properties and common audio processing methods (such as applying fade-ins/outs and trimming silence) along with abstract methods for voice management and synthesis.
5
- """
6
-
7
- import torch.multiprocessing as mp
8
- from abc import ABCMeta, ABC
9
- from typing import Union
10
- import numpy as np
11
- import shutil
12
- import queue
13
-
14
- class TimingInfo:
15
- def __init__(self, start_time, end_time, word):
16
- self.start_time = start_time
17
- self.end_time = end_time
18
- self.word = word
19
-
20
- def __str__(self):
21
- return f"Word: {self.word}, Start Time: {self.start_time}, End Time: {self.end_time}"
22
-
23
- # Define a meta class that will automatically call the BaseEngine's __init__ method
24
- # and also the post_init method if it exists.
25
- class BaseInitMeta(ABCMeta):
26
- def __call__(cls, *args, **kwargs):
27
- # Create an instance of the class that this meta class is used on.
28
- instance = super().__call__(*args, **kwargs)
29
-
30
- # Call the __init__ method of BaseEngine to set default properties.
31
- BaseEngine.__init__(instance)
32
-
33
- # If the instance has a post_init method, call it.
34
- # This allows subclasses to define additional initialization steps.
35
- if hasattr(instance, "post_init"):
36
- instance.post_init()
37
-
38
- return instance
39
-
40
-
41
- # Define a base class for engines with the custom meta class.
42
- class BaseEngine(ABC, metaclass=BaseInitMeta):
43
- def __init__(self):
44
- self.engine_name = "unknown"
45
-
46
- # Indicates if the engine can handle generators.
47
- self.can_consume_generators = False
48
-
49
- # Queue to manage audio chunks for the engine.
50
- self.queue = queue.Queue()
51
-
52
- # Queue to manage word level timings for the engine.
53
- self.timings = queue.Queue()
54
-
55
- # Callback to be called when an audio chunk is available.
56
- self.on_audio_chunk = None
57
-
58
- # Callback to be called when the engine is starting to synthesize audio.
59
- self.on_playback_start = None
60
-
61
- self.stop_synthesis_event = mp.Event()
62
-
63
- self.reset_audio_duration()
64
-
65
- def reset_audio_duration(self):
66
- """
67
- Resets the audio duration to 0.
68
- """
69
- self.audio_duration = 0
70
-
71
- def apply_fade_in(self, audio: np.ndarray, sample_rate: int = -1, fade_duration_ms: int = 15) -> np.ndarray:
72
- """
73
- Applies a linear fade-in over fade_duration_ms at the start of the audio.
74
- """
75
- sample_rate = self.verify_sample_rate(sample_rate)
76
- audio = audio.copy()
77
-
78
- fade_samples = int(sample_rate * fade_duration_ms / 1000)
79
- if fade_samples == 0 or len(audio) < fade_samples:
80
- fade_samples = len(audio)
81
- fade_in = np.linspace(0.0, 1.0, fade_samples)
82
- audio[:fade_samples] *= fade_in
83
- return audio
84
-
85
- def apply_fade_out(self, audio: np.ndarray, sample_rate: int = -1, fade_duration_ms: int = 15) -> np.ndarray:
86
- """
87
- Applies a linear fade-out over fade_duration_ms at the end of the audio.
88
- """
89
- sample_rate = self.verify_sample_rate(sample_rate)
90
- audio = audio.copy()
91
-
92
- fade_samples = int(sample_rate * fade_duration_ms / 1000)
93
- if fade_samples == 0 or len(audio) < fade_samples:
94
- fade_samples = len(audio)
95
- fade_out = np.linspace(1.0, 0.0, fade_samples)
96
- audio[-fade_samples:] *= fade_out
97
- return audio
98
-
99
- def trim_silence_start(
100
- self,
101
- audio_data: np.ndarray,
102
- sample_rate: int = 24000,
103
- silence_threshold: float = 0.01,
104
- extra_ms: int = 25,
105
- fade_in_ms: int = 15
106
- ) -> np.ndarray:
107
- """
108
- Removes leading silence from audio_data, applies extra trimming, and fades-in if trimming occurred.
109
-
110
- Args:
111
- audio_data (np.ndarray): The audio data to process.
112
- sample_rate (int): The sample rate of the audio data.
113
- silence_threshold (float): The threshold for silence detection.
114
- extra_ms (int): Additional milliseconds to trim from the start.
115
- fade_in_ms (int): Milliseconds for fade-in effect.
116
- """
117
- sample_rate = self.verify_sample_rate(sample_rate)
118
- trimmed = False
119
- audio_data = audio_data.copy()
120
- non_silent = np.where(np.abs(audio_data) > silence_threshold)[0]
121
- if len(non_silent) > 0:
122
- start_index = non_silent[0]
123
- if start_index > 0:
124
- trimmed = True
125
- audio_data = audio_data[start_index:]
126
-
127
- extra_samples = int(extra_ms * sample_rate / 1000)
128
- if extra_samples > 0 and len(audio_data) > extra_samples:
129
- audio_data = audio_data[extra_samples:]
130
- trimmed = True
131
-
132
- if trimmed:
133
- audio_data = self.apply_fade_in(audio_data, sample_rate, fade_in_ms)
134
- return audio_data
135
-
136
- def trim_silence_end(
137
- self,
138
- audio_data: np.ndarray,
139
- sample_rate: int = -1,
140
- silence_threshold: float = 0.01,
141
- extra_ms: int = 50,
142
- fade_out_ms: int = 15
143
- ) -> np.ndarray:
144
- """
145
- Removes trailing silence from audio_data, applies extra trimming, and fades-out if trimming occurred.
146
-
147
- Args:
148
- audio_data (np.ndarray): The audio data to be trimmed.
149
- sample_rate (int): The sample rate of the audio data. Default is -1.
150
- silence_threshold (float): The threshold below which audio is considered silent. Default is 0.01.
151
- extra_ms (int): Extra milliseconds to trim from the end of the audio. Default is 50.
152
- fade_out_ms (int): Milliseconds for fade-out effect at the end of the audio. Default is 15.
153
- """
154
- sample_rate = self.verify_sample_rate(sample_rate)
155
- trimmed = False
156
- audio_data = audio_data.copy()
157
- non_silent = np.where(np.abs(audio_data) > silence_threshold)[0]
158
- if len(non_silent) > 0:
159
- end_index = non_silent[-1] + 1
160
- if end_index < len(audio_data):
161
- trimmed = True
162
- audio_data = audio_data[:end_index]
163
-
164
- extra_samples = int(extra_ms * sample_rate / 1000)
165
- if extra_samples > 0 and len(audio_data) > extra_samples:
166
- audio_data = audio_data[:-extra_samples]
167
- trimmed = True
168
-
169
- if trimmed:
170
- audio_data = self.apply_fade_out(audio_data, sample_rate, fade_out_ms)
171
- return audio_data
172
-
173
- def verify_sample_rate(self, sample_rate: int) -> int:
174
- """
175
- Verifies and returns the sample rate.
176
- If the sample rate is -1, it will be obtained from the engine's configuration.
177
- """
178
- if sample_rate == -1:
179
- _, _, sample_rate = self.get_stream_info()
180
- if sample_rate == -1:
181
- raise ValueError("Sample rate must be provided or obtained from get_stream_info.")
182
- return sample_rate
183
-
184
- def _trim_silence(
185
- self,
186
- audio_data: np.ndarray,
187
- sample_rate: int = -1,
188
- silence_threshold: float = 0.005,
189
- extra_start_ms: int = 15,
190
- extra_end_ms: int = 15,
191
- fade_in_ms: int = 10,
192
- fade_out_ms: int = 10
193
- ) -> np.ndarray:
194
- """
195
- Removes silence from both the start and end of audio_data.
196
- If trimming occurs on either end, the corresponding fade is applied.
197
- """
198
- sample_rate = self.verify_sample_rate(sample_rate)
199
-
200
- audio_data = self.trim_silence_start(
201
- audio_data, sample_rate, silence_threshold, extra_start_ms, fade_in_ms
202
- )
203
- audio_data = self.trim_silence_end(
204
- audio_data, sample_rate, silence_threshold, extra_end_ms, fade_out_ms
205
- )
206
- return audio_data
207
-
208
-
209
- def get_stream_info(self):
210
- """
211
- Returns the audio stream configuration information suitable for PyAudio.
212
-
213
- Returns:
214
- tuple: A tuple containing the audio format, number of channels, and the sample rate.
215
- - Format (int): The format of the audio stream. pyaudio.paInt16 represents 16-bit integers.
216
- - Channels (int): The number of audio channels. 1 represents mono audio.
217
- - Sample Rate (int): The sample rate of the audio in Hz. 16000 represents 16kHz sample rate.
218
- """
219
- raise NotImplementedError(
220
- "The get_stream_info method must be implemented by the derived class."
221
- )
222
-
223
- def synthesize(self, text: str) -> bool:
224
- """
225
- Synthesizes text to audio stream.
226
-
227
- Args:
228
- text (str): Text to synthesize.
229
- """
230
- self.stop_synthesis_event.clear()
231
-
232
- def get_voices(self):
233
- """
234
- Retrieves the voices available from the specific voice source.
235
-
236
- This method should be overridden by the derived class to fetch the list of available voices.
237
-
238
- Returns:
239
- list: A list containing voice objects representing each available voice.
240
- """
241
- raise NotImplementedError(
242
- "The get_voices method must be implemented by the derived class."
243
- )
244
-
245
- def set_voice(self, voice: Union[str, object]):
246
- """
247
- Sets the voice to be used for speech synthesis.
248
-
249
- Args:
250
- voice (Union[str, object]): The voice to be used for speech synthesis.
251
-
252
- This method should be overridden by the derived class to set the desired voice.
253
- """
254
- raise NotImplementedError(
255
- "The set_voice method must be implemented by the derived class."
256
- )
257
-
258
- def set_voice_parameters(self, **voice_parameters):
259
- """
260
- Sets the voice parameters to be used for speech synthesis.
261
-
262
- Args:
263
- **voice_parameters: The voice parameters to be used for speech synthesis.
264
-
265
- This method should be overridden by the derived class to set the desired voice parameters.
266
- """
267
- raise NotImplementedError(
268
- "The set_voice_parameters method must be implemented by the derived class."
269
- )
270
-
271
- def shutdown(self):
272
- """
273
- Shuts down the engine.
274
- """
275
- pass
276
-
277
- def is_installed(self, lib_name: str) -> bool:
278
- """
279
- Check if the given library or software is installed and accessible.
280
-
281
- This method uses shutil.which to determine if the given library or software is
282
- installed and available in the system's PATH.
283
-
284
- Args:
285
- lib_name (str): Name of the library or software to check.
286
-
287
- Returns:
288
- bool: True if the library is installed, otherwise False.
289
- """
290
- lib = shutil.which(lib_name)
291
- if lib is None:
292
- return False
293
- return True
294
-
295
- def stop(self):
296
- """
297
- Stops the engine.
298
- """
299
- self.stop_synthesis_event.set()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engines/orpheus_decoder.py DELETED
@@ -1,141 +0,0 @@
1
- from snac import SNAC
2
- import numpy as np
3
- import torch
4
- import asyncio
5
- import threading
6
- import queue
7
-
8
-
9
- model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval()
10
-
11
- # Check if CUDA is available and set device accordingly
12
- snac_device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
13
- model = model.to(snac_device)
14
- snac_device = "cuda"
15
-
16
- def convert_to_audio(multiframe, count):
17
- frames = []
18
- if len(multiframe) < 7:
19
- return
20
-
21
- codes_0 = torch.tensor([], device=snac_device, dtype=torch.int32)
22
- codes_1 = torch.tensor([], device=snac_device, dtype=torch.int32)
23
- codes_2 = torch.tensor([], device=snac_device, dtype=torch.int32)
24
-
25
- num_frames = len(multiframe) // 7
26
- frame = multiframe[:num_frames*7]
27
-
28
- for j in range(num_frames):
29
- i = 7*j
30
- if codes_0.shape[0] == 0:
31
- codes_0 = torch.tensor([frame[i]], device=snac_device, dtype=torch.int32)
32
- else:
33
- codes_0 = torch.cat([codes_0, torch.tensor([frame[i]], device=snac_device, dtype=torch.int32)])
34
-
35
- if codes_1.shape[0] == 0:
36
-
37
- codes_1 = torch.tensor([frame[i+1]], device=snac_device, dtype=torch.int32)
38
- codes_1 = torch.cat([codes_1, torch.tensor([frame[i+4]], device=snac_device, dtype=torch.int32)])
39
- else:
40
- codes_1 = torch.cat([codes_1, torch.tensor([frame[i+1]], device=snac_device, dtype=torch.int32)])
41
- codes_1 = torch.cat([codes_1, torch.tensor([frame[i+4]], device=snac_device, dtype=torch.int32)])
42
-
43
- if codes_2.shape[0] == 0:
44
- codes_2 = torch.tensor([frame[i+2]], device=snac_device, dtype=torch.int32)
45
- codes_2 = torch.cat([codes_2, torch.tensor([frame[i+3]], device=snac_device, dtype=torch.int32)])
46
- codes_2 = torch.cat([codes_2, torch.tensor([frame[i+5]], device=snac_device, dtype=torch.int32)])
47
- codes_2 = torch.cat([codes_2, torch.tensor([frame[i+6]], device=snac_device, dtype=torch.int32)])
48
- else:
49
- codes_2 = torch.cat([codes_2, torch.tensor([frame[i+2]], device=snac_device, dtype=torch.int32)])
50
- codes_2 = torch.cat([codes_2, torch.tensor([frame[i+3]], device=snac_device, dtype=torch.int32)])
51
- codes_2 = torch.cat([codes_2, torch.tensor([frame[i+5]], device=snac_device, dtype=torch.int32)])
52
- codes_2 = torch.cat([codes_2, torch.tensor([frame[i+6]], device=snac_device, dtype=torch.int32)])
53
-
54
- codes = [codes_0.unsqueeze(0), codes_1.unsqueeze(0), codes_2.unsqueeze(0)]
55
- # check that all tokens are between 0 and 4096 otherwise return *
56
- if torch.any(codes[0] < 0) or torch.any(codes[0] > 4096) or torch.any(codes[1] < 0) or torch.any(codes[1] > 4096) or torch.any(codes[2] < 0) or torch.any(codes[2] > 4096):
57
- return
58
-
59
- with torch.inference_mode():
60
- audio_hat = model.decode(codes)
61
-
62
- audio_slice = audio_hat[:, :, 2048:4096]
63
- detached_audio = audio_slice.detach().cpu()
64
- audio_np = detached_audio.numpy()
65
- audio_int16 = (audio_np * 32767).astype(np.int16)
66
- audio_bytes = audio_int16.tobytes()
67
- return audio_bytes
68
-
69
- def turn_token_into_id(token_string, index):
70
- # Strip whitespace
71
- token_string = token_string.strip()
72
-
73
- # Find the last token in the string
74
- last_token_start = token_string.rfind("<custom_token_")
75
-
76
- if last_token_start == -1:
77
- print("No token found in the string")
78
- return None
79
-
80
- # Extract the last token
81
- last_token = token_string[last_token_start:]
82
-
83
- # Process the last token
84
- if last_token.startswith("<custom_token_") and last_token.endswith(">"):
85
- try:
86
- number_str = last_token[14:-1]
87
- return int(number_str) - 10 - ((index % 7) * 4096)
88
- except ValueError:
89
- return None
90
- else:
91
- return None
92
-
93
-
94
- async def tokens_decoder(token_gen):
95
- buffer = []
96
- count = 0
97
- async for token_sim in token_gen:
98
- token = turn_token_into_id(token_sim, count)
99
- if token is None:
100
- pass
101
- else:
102
- if token > 0:
103
- buffer.append(token)
104
- count += 1
105
-
106
- if count % 7 == 0 and count > 27:
107
- buffer_to_proc = buffer[-28:]
108
- audio_samples = convert_to_audio(buffer_to_proc, count)
109
- if audio_samples is not None:
110
- yield audio_samples
111
-
112
-
113
- # ------------------ Synchronous Tokens Decoder Wrapper ------------------ #
114
- def tokens_decoder_sync(syn_token_gen):
115
-
116
- audio_queue = queue.Queue()
117
-
118
- # Convert the synchronous token generator into an async generator.
119
- async def async_token_gen():
120
- for token in syn_token_gen:
121
- yield token
122
-
123
- async def async_producer():
124
- # tokens_decoder.tokens_decoder is assumed to be an async generator that processes tokens.
125
- async for audio_chunk in tokens_decoder(async_token_gen()):
126
- audio_queue.put(audio_chunk)
127
- audio_queue.put(None) # Sentinel
128
-
129
- def run_async():
130
- asyncio.run(async_producer())
131
-
132
- thread = threading.Thread(target=run_async)
133
- thread.start()
134
-
135
- while True:
136
- audio = audio_queue.get()
137
- if audio is None:
138
- break
139
- yield audio
140
-
141
- thread.join()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engines/orpheus_engine.py DELETED
@@ -1,325 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """OrpheusEngine
3
- ~~~~~~~~~~~~~~~~
4
- A drop‑in replacement for the original ``orpheus_engine.py`` that fixes
5
- all outstanding token‑streaming issues and eliminates audible clicks by
6
-
7
- * streaming **token‑IDs** instead of partial text
8
- * dynamically sending a *tiny* first audio chunk (3×7 codes) followed by
9
- steady blocks (30×7)
10
- * mapping vLLM/OpenAI token‑IDs → SNAC codes without fragile
11
- ``"<custom_token_"`` string parsing
12
- * adding an optional fade‑in / fade‑out per chunk
13
- * emitting a proper WAV header as the first element in the queue so that
14
- browsers / HTML5 `<audio>` tags start playback immediately.
15
-
16
- The API (``get_voices()``, ``set_voice()``, …) is unchanged, so you can
17
- keep using it from RealTimeTTS.
18
- """
19
-
20
- from __future__ import annotations
21
- from snac import SNAC, __version__ as snac_version
22
-
23
-
24
- ###############################################################################
25
- # Standard library & 3rd‑party imports #
26
- ###############################################################################
27
- import json
28
- import logging
29
- import struct
30
- import time
31
- import os
32
- import torch
33
- from queue import Queue
34
- from typing import Generator, Iterable, List, Optional
35
-
36
- import numpy as np
37
- import pyaudio # provided by RealTimeTTS[system]
38
- import requests
39
- from RealtimeTTS.engines import BaseEngine
40
-
41
-
42
- ###############################################################################
43
- # Constants #
44
- ###############################################################################
45
- DEFAULT_API_URL = "http://127.0.0.1:1234"
46
- DEFAULT_MODEL = "SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1"
47
- DEFAULT_HEADERS = {"Content-Type": "application/json"}
48
- DEFAULT_VOICE = "Martin"
49
-
50
- # Audio
51
- SAMPLE_RATE = 24_000
52
- BITS_PER_SAMPLE = 16
53
- AUDIO_CHANNELS = 1
54
-
55
- # Token‑ID magic numbers (defined in the model card)
56
- CODE_START_TOKEN_ID = 128257 # <|audio|>
57
- CODE_REMOVE_TOKEN_ID = 128258
58
- CODE_TOKEN_OFFSET = 128266 # <custom_token_?> – first usable code id
59
-
60
- # Chunking strategy
61
- _INITIAL_GROUPS = 3 # 3×7 = 21 codes ≈ 90 ms @24 kHz
62
- _STEADY_GROUPS = 30 # 30×7 = 210 codes ≈ 900 ms
63
-
64
-
65
- SNAC_MODEL = os.getenv("SNAC_MODEL", "hubertsiuzdak/snac_24khz")
66
-
67
-
68
-
69
-
70
- ###############################################################################
71
- # Helper functions #
72
- ###############################################################################
73
-
74
- def _create_wav_header(sample_rate: int, bits_per_sample: int, channels: int) -> bytes:
75
- """Return a 44‑byte WAV/PCM header with unknown data size (0xFFFFFFFF)."""
76
- riff_size = 0xFFFFFFFF
77
- header = b"RIFF" + struct.pack("<I", riff_size) + b"WAVEfmt "
78
- header += struct.pack("<IHHIIHH", 16, 1, channels, sample_rate,
79
- sample_rate * channels * bits_per_sample // 8,
80
- channels * bits_per_sample // 8, bits_per_sample)
81
- header += b"data" + struct.pack("<I", 0xFFFFFFFF)
82
- return header
83
-
84
-
85
- def _fade_in_out(audio: np.ndarray, fade_ms: int = 50) -> np.ndarray:
86
- """Apply linear fade‑in/out to avoid clicks."""
87
- if fade_ms <= 0:
88
- return audio
89
- fade_samples = int(SAMPLE_RATE * fade_ms / 1000)
90
- fade_samples -= fade_samples % 2 # keep it even
91
- if fade_samples == 0 or audio.size < 2 * fade_samples:
92
- return audio
93
- ramp = np.linspace(0.0, 1.0, fade_samples, dtype=np.float32)
94
- audio[:fade_samples] *= ramp
95
- audio[-fade_samples:] *= ramp[::-1]
96
- return audio
97
-
98
- ###############################################################################
99
- # SNAC – lightweight wrapper #
100
- ###############################################################################
101
- try:
102
- from snac import SNAC
103
- _snac_model: Optional[SNAC] = SNAC.from_pretrained("hubertsiuzdak/snac_24khz").eval()
104
- _snac_model = _snac_model.to("cuda" if _snac_model and _snac_model.torch.cuda.is_available() else "cpu")
105
- except Exception as exc: # pragma: no cover
106
- logging.warning("SNAC model could not be loaded – %s", exc)
107
- _snac_model = None
108
-
109
-
110
- def _codes_to_audio(codes: List[int]) -> bytes:
111
- """Convert a *flat* list of SNAC codes to 16‑bit PCM bytes."""
112
- if not _snac_model or not codes:
113
- return b""
114
-
115
- # --- redistribute into 3 snac layers (see original paper) --------------
116
- groups = len(codes) // 7
117
- codes = codes[: groups * 7] # trim incomplete tail
118
- if groups == 0:
119
- return b""
120
-
121
- l1, l2, l3 = [], [], []
122
- for g in range(groups):
123
- base = g * 7
124
- l1.append(codes[base])
125
- l2.append(codes[base + 1] - 4096)
126
- l3.extend([
127
- codes[base + 2] - 2 * 4096,
128
- codes[base + 3] - 3 * 4096,
129
- codes[base + 5] - 5 * 4096,
130
- codes[base + 6] - 6 * 4096,
131
- ])
132
- l2.append(codes[base + 4] - 4 * 4096)
133
-
134
- import torch
135
-
136
- with torch.no_grad():
137
- layers = [
138
- torch.tensor(l1, device=_snac_model.device).unsqueeze(0),
139
- torch.tensor(l2, device=_snac_model.device).unsqueeze(0),
140
- torch.tensor(l3, device=_snac_model.device).unsqueeze(0),
141
- ]
142
- wav = _snac_model.decode(layers).cpu().numpy().squeeze()
143
-
144
- wav = _fade_in_out(wav)
145
- pcm = np.clip(wav * 32767, -32768, 32767).astype(np.int16).tobytes()
146
- return pcm
147
-
148
- ###############################################################################
149
- # Main class #
150
- ###############################################################################
151
- class OrpheusVoice:
152
- def __init__(self, name: str, gender: str | None = None):
153
- self.name = name
154
- self.gender = gender
155
-
156
-
157
- class OrpheusEngine(BaseEngine):
158
- """Realtime TTS engine using the Orpheus SNAC model via vLLM."""
159
-
160
- _SPEAKERS = [
161
- OrpheusVoice("Martin", "m"), OrpheusVoice("Emma", "f"),
162
- OrpheusVoice("Luca", "m"), OrpheusVoice("Anna", "f"),
163
- OrpheusVoice("Jakob", "m"), OrpheusVoice("Anton", "m"),
164
- OrpheusVoice("Julian", "m"), OrpheusVoice("Jan", "m"),
165
- OrpheusVoice("Alexander", "m"), OrpheusVoice("Emil", "m"),
166
- OrpheusVoice("Ben", "m"), OrpheusVoice("Elias", "m"),
167
- OrpheusVoice("Felix", "m"), OrpheusVoice("Jonas", "m"),
168
- OrpheusVoice("Noah", "m"), OrpheusVoice("Maximilian", "m"),
169
- OrpheusVoice("Sophie", "f"), OrpheusVoice("Marie", "f"),
170
- OrpheusVoice("Mia", "f"), OrpheusVoice("Maria", "f"),
171
- OrpheusVoice("Sophia", "f"), OrpheusVoice("Lina", "f"),
172
- OrpheusVoice("Lea", "f"),
173
- ]
174
- def _load_snac(self, model_name: str = SNAC_MODEL):
175
- """
176
- Lädt den SNAC-Decoder auf CPU/GPU.
177
- Fällt bei jedem Fehler sauber auf CPU zurück.
178
- """
179
- device = "cuda" if torch.cuda.is_available() else "cpu"
180
- try:
181
- snac = SNAC.from_pretrained(model_name).to(device)
182
- if device == "cuda": # half() nur auf GPU – ältere SNAC-Versionen haben keine .half()
183
- snac = snac.half()
184
- snac.eval()
185
- logging.info(f"SNAC {snac_version} loaded on {device}")
186
- return snac
187
- except Exception as e:
188
- logging.exception("SNAC load failed – running with silent fallback")
189
- return None
190
- # ---------------------------------------------------------------------
191
- def __init__(
192
- self,
193
- api_url: str = DEFAULT_API_URL,
194
- model: str = DEFAULT_MODEL,
195
- headers: dict = DEFAULT_HEADERS,
196
- voice: Optional[OrpheusVoice] = None,
197
- temperature: float = 0.6,
198
- top_p: float = 0.9,
199
- max_tokens: int = 1200,
200
- repetition_penalty: float = 1.1,
201
- debug: bool = False,
202
- ) -> None:
203
- super().__init__()
204
- self.api_url = api_url.rstrip("/")
205
- self.model = model
206
- self.headers = headers
207
- self.voice = voice or OrpheusVoice(DEFAULT_VOICE)
208
- self.temperature = temperature
209
- self.top_p = top_p
210
- self.max_tokens = max_tokens
211
- self.repetition_penalty = repetition_penalty
212
- self.debug = debug
213
- self.queue: "Queue[bytes | None]" = Queue()
214
- self.snac = self._load_snac() # Decoder laden
215
- if self.snac is None: # Fallback-Hinweis
216
- logging.warning("⚠️ No SNAC – audio generation disabled.")
217
- self.engine_name = "orpheus"
218
-
219
- # ------------------------------------------------------------------ API
220
- def get_stream_info(self):
221
- return pyaudio.paInt16, AUDIO_CHANNELS, SAMPLE_RATE
222
-
223
- def get_voices(self):
224
- return self._SPEAKERS
225
-
226
- def set_voice(self, voice_name: str):
227
- if voice_name not in {v.name for v in self._SPEAKERS}:
228
- raise ValueError(f"Unknown Orpheus speaker '{voice_name}'")
229
- self.voice = OrpheusVoice(voice_name)
230
-
231
- # --------------------------------------------------------------- public
232
- def synthesize(self, text: str) -> bool: # noqa: C901 (long)
233
- """Start streaming TTS for **text** – blocks until finished."""
234
- super().synthesize(text)
235
- self.queue.put(_create_wav_header(SAMPLE_RATE, BITS_PER_SAMPLE, AUDIO_CHANNELS))
236
-
237
- try:
238
- code_stream = self._stream_snac_codes(text)
239
- first_chunk = True
240
- buffer: List[int] = []
241
- sent = 0
242
- groups_needed = _INITIAL_GROUPS
243
-
244
- for code_id in code_stream:
245
- buffer.append(code_id)
246
- available = len(buffer) - sent
247
- if available >= groups_needed * 7:
248
- chunk_codes = buffer[sent : sent + groups_needed * 7]
249
- sent += groups_needed * 7
250
- pcm = _codes_to_audio(chunk_codes)
251
- if pcm:
252
- self.queue.put(pcm)
253
- first_chunk = False
254
- groups_needed = _STEADY_GROUPS
255
-
256
- # flush remaining full groups
257
- remaining = len(buffer) - sent
258
- final_groups = remaining // 7
259
- if final_groups:
260
- pcm = _codes_to_audio(buffer[sent : sent + final_groups * 7])
261
- if pcm:
262
- self.queue.put(pcm)
263
-
264
- return True
265
- except Exception as exc: # pragma: no cover
266
- logging.exception("OrpheusEngine: synthesis failed – %s", exc)
267
- return False
268
- finally:
269
- self.queue.put(None) # close stream
270
-
271
- # ------------------------------------------------------------ internals
272
- def _format_prompt(self, prompt: str) -> str:
273
- return f"<|audio|>{self.voice.name}: {prompt}<|eot_id|>"
274
-
275
- def _stream_snac_codes(self, prompt: str) -> Generator[int, None, None]:
276
- """Yield SNAC code‑IDs as they arrive from the model."""
277
- payload = {
278
- "model": self.model,
279
- "prompt": self._format_prompt(prompt),
280
- "max_tokens": self.max_tokens,
281
- "temperature": self.temperature,
282
- "top_p": self.top_p,
283
- "stream": True,
284
- "skip_special_tokens": False,
285
- "frequency_penalty": self.repetition_penalty,
286
- }
287
- url = f"{self.api_url}/v1/completions" # plain completion endpoint
288
- with requests.post(url, headers=self.headers, json=payload, stream=True, timeout=600) as r:
289
- r.raise_for_status()
290
- started = False
291
- for line in r.iter_lines():
292
- if not line:
293
- continue
294
- if line.startswith(b"data: "):
295
- data = line[6:].decode()
296
- if data.strip() == "[DONE]":
297
- break
298
- try:
299
- obj = json.loads(data)
300
- delta = obj["choices"][0]
301
- tid: int = delta.get("token_id") # vLLM ≥0.9 provides this
302
- if tid is None:
303
- # fallback: derive from text
304
- text_piece = delta.get("text", "")
305
- if not text_piece:
306
- continue
307
- tid = ord(text_piece[-1]) # NOT reliable; skip
308
- continue
309
- except Exception:
310
- continue
311
-
312
- if not started:
313
- if tid == CODE_START_TOKEN_ID:
314
- started = True
315
- continue
316
- if tid == CODE_REMOVE_TOKEN_ID or tid < CODE_TOKEN_OFFSET:
317
- continue
318
- yield tid - CODE_TOKEN_OFFSET
319
-
320
- # ------------------------------------------------------------------ misc
321
- def __del__(self):
322
- try:
323
- self.queue.put(None)
324
- except Exception:
325
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
engines/orpheus_engine_BU.py DELETED
@@ -1,374 +0,0 @@
1
- import json
2
- import time
3
- import logging
4
- import pyaudio
5
- import requests
6
- import traceback
7
- import numpy as np
8
- from queue import Queue
9
- from typing import Optional, Union
10
- from RealtimeTTS.engines import BaseEngine, TimingInfo
11
-
12
- # Default configuration values
13
- DEFAULT_API_URL = "http://127.0.0.1:1234"
14
- DEFAULT_HEADERS = {"Content-Type": "application/json"}
15
- DEFAULT_MODEL = "SebastianBodza/Kartoffel_Orpheus-3B_german_synthetic-v0.1"
16
- DEFAULT_VOICE = "Martin"
17
- STOP_SEQUENCE = "<custom_token_2>"
18
- SAMPLE_RATE = 24000 # Specific sample rate for Orpheus
19
-
20
- # Special token definitions for prompt formatting and token decoding
21
- START_TOKEN_ID = 128259
22
- END_TOKEN_IDS = [128009, 128260, 128261, 128257]
23
- CUSTOM_TOKEN_PREFIX = "<custom_token_"
24
-
25
- class OrpheusVoice:
26
- def __init__(self, name: str, gender: str | None = None):
27
- self.name = name
28
- self.gender = gender # optional, falls du es anzeigen willst
29
-
30
-
31
-
32
- class OrpheusEngine(BaseEngine):
33
- """
34
- Real-time Text-to-Speech (TTS) engine for the Orpheus model via LM Studio API.
35
-
36
- This engine supports real-time token generation, audio synthesis, and voice configuration.
37
-
38
- _SPEAKERS = [
39
- # männlich
40
- OrpheusVoice("Jakob", "m"),
41
- OrpheusVoice("Anton", "m"),
42
- OrpheusVoice("Julian", "m"),
43
- OrpheusVoice("Jan", "m"),
44
- OrpheusVoice("Alexander", "m"),
45
- OrpheusVoice("Emil", "m"),
46
- OrpheusVoice("Ben", "m"),
47
- OrpheusVoice("Elias", "m"),
48
- OrpheusVoice("Felix", "m"),
49
- OrpheusVoice("Jonas", "m"),
50
- OrpheusVoice("Noah", "m"),
51
- OrpheusVoice("Maximilian", "m"),
52
- # weiblich
53
- OrpheusVoice("Sophie", "f"),
54
- OrpheusVoice("Marie", "f"),
55
- OrpheusVoice("Mia", "f"),
56
- OrpheusVoice("Maria", "f"),
57
- OrpheusVoice("Sophia", "f"),
58
- OrpheusVoice("Lina", "f"),
59
- OrpheusVoice("Lea", "f"),
60
- ]
61
- """
62
- _SPEAKERS = [
63
- # männlich
64
- OrpheusVoice("Martin", "m"),
65
- OrpheusVoice("Luca", "m"),
66
- # weiblich
67
- OrpheusVoice("Anne", "f"),
68
- OrpheusVoice("Emma", "f"),
69
- ]
70
- def __init__(
71
- self,
72
- api_url: str = DEFAULT_API_URL,
73
- model: str = DEFAULT_MODEL,
74
- headers: dict = DEFAULT_HEADERS,
75
- voice: Optional[OrpheusVoice] = None,
76
- temperature: float = 0.6,
77
- top_p: float = 0.9,
78
- max_tokens: int = 1200,
79
- repetition_penalty: float = 1.1,
80
- debug: bool = False
81
- ):
82
- """
83
- Initialize the Orpheus TTS engine with the given parameters.
84
-
85
- Args:
86
- api_url (str): Endpoint URL for the LM Studio API.
87
- model (str): Model name to use for synthesis.
88
- headers (dict): HTTP headers for API requests.
89
- voice (Optional[OrpheusVoice]): OrpheusVoice configuration. Defaults to DEFAULT_VOICE.
90
- temperature (float): Sampling temperature (0-1) for text generation.
91
- top_p (float): Top-p sampling parameter for controlling diversity.
92
- max_tokens (int): Maximum tokens to generate per API request.
93
- repetition_penalty (float): Penalty factor for repeated phrases.
94
- debug (bool): Flag to enable debug output.
95
- """
96
- super().__init__()
97
- self.api_url = api_url
98
- self.model = model
99
- self.headers = headers
100
- self.voice = voice or OrpheusVoice(DEFAULT_VOICE)
101
- self.temperature = temperature
102
- self.top_p = top_p
103
- self.max_tokens = max_tokens
104
- self.repetition_penalty = repetition_penalty
105
- self.debug = debug
106
- self.queue = Queue()
107
- self.post_init()
108
-
109
- def post_init(self):
110
- """Set up additional engine attributes."""
111
- self.engine_name = "orpheus"
112
-
113
- def get_stream_info(self):
114
- """
115
- Retrieve PyAudio stream configuration.
116
-
117
- Returns:
118
- tuple: Format, channel count, and sample rate for PyAudio.
119
- """
120
- return pyaudio.paInt16, 1, SAMPLE_RATE
121
-
122
- def synthesize(self, text: str) -> bool:
123
- """
124
- Convert text to speech and stream audio data.
125
-
126
- Args:
127
- text (str): The input text to be synthesized.
128
-
129
- Returns:
130
- bool: True if synthesis was successful, False otherwise.
131
- """
132
- super().synthesize(text)
133
-
134
- try:
135
- # Process tokens and put generated audio chunks into the queue
136
- for audio_chunk in self._token_decoder(self._generate_tokens(text)):
137
- # bail out immediately if someone called .stop()
138
- if self.stop_synthesis_event.is_set():
139
- logging.info("OrpheusEngine: synthesis stopped by user")
140
- return False
141
- print(f"Audio chunk size: {len(audio_chunk)}")
142
- self.queue.put(audio_chunk)
143
- return True
144
- except Exception as e:
145
- traceback.print_exc()
146
- logging.error(f"Synthesis error: {e}")
147
- return False
148
-
149
- def synthesize(self, text: str) -> bool:
150
- """
151
- Convert text to speech and stream audio data via Orpheus.
152
- Drops initial and trailing near-silent chunks.
153
- """
154
- super().synthesize(text)
155
-
156
- try:
157
- for audio_chunk in self._token_decoder(self._generate_tokens(text)):
158
- # bail out if user called .stop()
159
- if self.stop_synthesis_event.is_set():
160
- logging.info("OrpheusEngine: synthesis stopped by user")
161
- return False
162
-
163
- # forward this chunk
164
- self.queue.put(audio_chunk)
165
-
166
- return True
167
-
168
- except Exception as e:
169
- traceback.print_exc()
170
- logging.error(f"Synthesis error: {e}")
171
- return False
172
-
173
-
174
- def _generate_tokens(self, prompt: str):
175
- """
176
- Generate a token stream using the LM Studio API.
177
-
178
- Args:
179
- prompt (str): The input text prompt.
180
-
181
- Yields:
182
- str: Each token's text as it is received from the API.
183
- """
184
- logging.debug(f"Generating tokens for prompt: {prompt}")
185
- formatted_prompt = self._format_prompt(prompt)
186
-
187
- payload = {
188
- "model": self.model,
189
- "messages": [{"role": "user", "content": f"<|audio|>{voice}: {text}<|eot_id|>"}],
190
- "max_tokens": self.max_tokens,
191
- "temperature": self.temperature,
192
- "top_p": self.top_p,
193
- "frequency_penalty": self.repetition_penalty, # optional,
194
- "stream": True,
195
- "skip_special_tokens": False
196
- }
197
-
198
- try:
199
- logging.debug(f"Requesting API URL: {self.api_url} with payload: {payload} and headers: {self.headers}")
200
- response = requests.post(
201
- f"{self.api_url}/v1/chat/completions", # <—— neuer Pfad
202
- headers=self.headers,
203
- json=payload,
204
- stream=True
205
- )
206
- response.raise_for_status()
207
-
208
- token_counter = 0
209
- start_time = time.time() # Start timing token generation
210
- for line in response.iter_lines():
211
- # stop on demand
212
- if self.stop_synthesis_event.is_set():
213
- logging.debug("OrpheusEngine: token generation aborted")
214
- break
215
- if line:
216
- line = line.decode('utf-8')
217
- if line.startswith('data: '):
218
- data_str = line[6:]
219
- if data_str.strip() == '[DONE]':
220
- break
221
-
222
- try:
223
- data = json.loads(data_str)
224
- if 'choices' in data and data['choices']:
225
- delta = data["choices"][0]["delta"]
226
- token_text = delta.get("content", "")
227
- if "<custom_token_" in token_text:
228
- logging.debug(f"SNAC-frame: {token_text[:40]}")
229
- if token_text:
230
- token_counter += 1
231
- # Print the time it took to get the first token
232
- if token_counter == 1:
233
- elapsed = time.time() - start_time
234
- logging.info(f"Time to first token: {elapsed:.2f} seconds")
235
- yield token_text
236
- except json.JSONDecodeError as e:
237
- logging.error(f"Error decoding JSON: {e}")
238
- continue
239
-
240
- except requests.RequestException as e:
241
- logging.error(f"API request failed: {e}")
242
-
243
- def _format_prompt(self, prompt: str) -> str:
244
- """
245
- Format the text prompt with special tokens required by Orpheus.
246
-
247
- Args:
248
- prompt (str): The raw text prompt.
249
-
250
- Returns:
251
- str: The formatted prompt including voice and termination token.
252
- """
253
- return f"<|audio|>{self.voice.name}: {prompt}<|eot_id|>"
254
-
255
- def _token_decoder(self, token_gen):
256
- """
257
- Decode tokens from the generator and convert them into audio samples.
258
-
259
- This method aggregates tokens in a buffer and converts them into audio chunks
260
- once enough tokens have been collected.
261
-
262
- Args:
263
- token_gen: Generator yielding token strings.
264
-
265
- Yields:
266
- Audio samples ready to be streamed.
267
- """
268
- buffer = []
269
- count = 0
270
-
271
- logging.debug("Starting token decoding from token generator.")
272
- for token_text in token_gen:
273
- # bail out if stop was requested
274
- if self.stop_synthesis_event.is_set():
275
- logging.debug("OrpheusEngine: token decoding aborted")
276
- break
277
- token = self.turn_token_into_id(token_text, count)
278
- if token is not None and token > 0:
279
- buffer.append(token)
280
- count += 1
281
-
282
- # Process every 7 tokens after an initial threshold
283
- if count % 7 == 0 and count > 27:
284
- buffer_to_proc = buffer[-28:]
285
- audio_samples = self._convert_buffer(buffer_to_proc, count)
286
- if audio_samples is not None:
287
- yield audio_samples
288
-
289
- def turn_token_into_id(self, token_string: str, index: int) -> Optional[int]:
290
- """
291
- Convert a token string to a numeric ID for audio processing.
292
-
293
- The conversion takes into account the custom token prefix and an index-based offset.
294
-
295
- Args:
296
- token_string (str): The token text.
297
- index (int): The current token index.
298
-
299
- Returns:
300
- Optional[int]: The numeric token ID or None if conversion fails.
301
- """
302
- token_string = token_string.strip()
303
- last_token_start = token_string.rfind(CUSTOM_TOKEN_PREFIX)
304
-
305
- if last_token_start == -1:
306
- return None
307
-
308
- last_token = token_string[last_token_start:]
309
-
310
- if last_token.startswith(CUSTOM_TOKEN_PREFIX) and last_token.endswith(">"):
311
- try:
312
- number_str = last_token[14:-1]
313
- token_id = int(number_str) - 10 - ((index % 7) * 4096)
314
- return token_id
315
- except ValueError:
316
- return None
317
- else:
318
- return None
319
-
320
- def _convert_buffer(self, multiframe, count: int):
321
- """
322
- Convert a buffer of token frames into audio samples.
323
-
324
- This method uses an external decoder to convert the collected token frames.
325
-
326
- Args:
327
- multiframe: List of token IDs to be converted.
328
- count (int): The current token count (used for conversion logic).
329
-
330
- Returns:
331
- Converted audio samples if successful; otherwise, None.
332
- """
333
- try:
334
- from .orpheus_decoder import convert_to_audio as orpheus_convert_to_audio
335
- converted = orpheus_convert_to_audio(multiframe, count)
336
- if converted is None:
337
- logging.warning("Conversion returned None.")
338
- return converted
339
- except Exception as e:
340
- logging.error(f"Failed to convert buffer to audio: {e}")
341
- logging.info("Returning None after failed conversion.")
342
- return None
343
-
344
- def get_voices(self): # FastAPI /voices-Route
345
- return self._SPEAKERS
346
-
347
- def set_voice(self, voice_name: str) -> None:
348
- if voice_name not in [v.name for v in self._SPEAKERS]:
349
- raise ValueError(f"Unknown Orpheus speaker '{voice_name}'")
350
- self.voice = OrpheusVoice(voice_name)
351
-
352
- def set_voice_parameters(self, **kwargs):
353
- """
354
- Update voice generation parameters.
355
-
356
- Valid parameters include 'temperature', 'top_p', 'max_tokens', and 'repetition_penalty'.
357
-
358
- Args:
359
- **kwargs: Arbitrary keyword arguments for valid voice parameters.
360
- """
361
- valid_params = ['temperature', 'top_p', 'max_tokens', 'repetition_penalty']
362
- for param, value in kwargs.items():
363
- if param in valid_params:
364
- setattr(self, param, value)
365
- elif self.debug:
366
- logging.warning(f"Ignoring invalid parameter: {param}")
367
-
368
- def __del__(self):
369
- """
370
- Destructor to clean up resources.
371
-
372
- Puts a None into the queue to signal termination of audio processing.
373
- """
374
- self.queue.put(None)