Spaces:
Runtime error
Runtime error
Commit
·
5aefb03
1
Parent(s):
efe64f8
add text norm + watermark
Browse files- chatterbox/src/chatterbox/__init__.py +2 -1
- chatterbox/src/chatterbox/__pycache__/__init__.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/__pycache__/tts.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/__pycache__/vc.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/__init__.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/const.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/decoder.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/f0_predictor.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/flow.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/flow_matching.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/hifigan.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/s3gen.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/__pycache__/xvector.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/matcha/__pycache__/decoder.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/matcha/__pycache__/flow_matching.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/matcha/__pycache__/transformer.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/__init__.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/activation.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/attention.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/convolution.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/embedding.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/encoder_layer.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/positionwise_feed_forward.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/subsampling.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/upsample_encoder.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/utils/__pycache__/class_utils.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/utils/__pycache__/mask.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3gen/utils/__pycache__/mel.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3tokenizer/__pycache__/__init__.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/s3tokenizer/__pycache__/s3tokenizer.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/__pycache__/__init__.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/__pycache__/llama_configs.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/__pycache__/t3.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/inference/__pycache__/alignment_stream_analyzer.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/inference/__pycache__/t3_hf_backend.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/modules/__pycache__/cond_enc.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/modules/__pycache__/learned_pos_emb.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/modules/__pycache__/perceiver.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/t3/modules/__pycache__/t3_config.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/tokenizers/__pycache__/__init__.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/tokenizers/__pycache__/tokenizer.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/voice_encoder/__pycache__/__init__.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/voice_encoder/__pycache__/config.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/voice_encoder/__pycache__/melspec.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/models/voice_encoder/__pycache__/voice_encoder.cpython-311.pyc +0 -0
- chatterbox/src/chatterbox/tts.py +44 -0
chatterbox/src/chatterbox/__init__.py
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
from .tts import ChatterboxTTS
|
|
|
|
1 |
+
from .tts import ChatterboxTTS
|
2 |
+
from .vc import ChatterboxVC
|
chatterbox/src/chatterbox/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (275 Bytes). View file
|
|
chatterbox/src/chatterbox/__pycache__/tts.cpython-311.pyc
ADDED
Binary file (12.5 kB). View file
|
|
chatterbox/src/chatterbox/__pycache__/vc.cpython-311.pyc
ADDED
Binary file (4.9 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (294 Bytes). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/const.cpython-311.pyc
ADDED
Binary file (190 Bytes). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/decoder.cpython-311.pyc
ADDED
Binary file (16.9 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/f0_predictor.cpython-311.pyc
ADDED
Binary file (2.7 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/flow.cpython-311.pyc
ADDED
Binary file (13.7 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/flow_matching.cpython-311.pyc
ADDED
Binary file (13.3 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/hifigan.cpython-311.pyc
ADDED
Binary file (26.3 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/s3gen.cpython-311.pyc
ADDED
Binary file (13.7 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/__pycache__/xvector.cpython-311.pyc
ADDED
Binary file (24 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/matcha/__pycache__/decoder.cpython-311.pyc
ADDED
Binary file (21.3 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/matcha/__pycache__/flow_matching.cpython-311.pyc
ADDED
Binary file (6.46 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/matcha/__pycache__/transformer.cpython-311.pyc
ADDED
Binary file (14.7 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (190 Bytes). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/activation.cpython-311.pyc
ADDED
Binary file (3.58 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/attention.cpython-311.pyc
ADDED
Binary file (15.7 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/convolution.cpython-311.pyc
ADDED
Binary file (5.54 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/embedding.cpython-311.pyc
ADDED
Binary file (17.3 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/encoder_layer.cpython-311.pyc
ADDED
Binary file (11.2 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/positionwise_feed_forward.cpython-311.pyc
ADDED
Binary file (6.24 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/subsampling.cpython-311.pyc
ADDED
Binary file (18.9 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/transformer/__pycache__/upsample_encoder.cpython-311.pyc
ADDED
Binary file (15.6 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/utils/__pycache__/class_utils.cpython-311.pyc
ADDED
Binary file (1.93 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/utils/__pycache__/mask.cpython-311.pyc
ADDED
Binary file (6.25 kB). View file
|
|
chatterbox/src/chatterbox/models/s3gen/utils/__pycache__/mel.cpython-311.pyc
ADDED
Binary file (4.05 kB). View file
|
|
chatterbox/src/chatterbox/models/s3tokenizer/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (806 Bytes). View file
|
|
chatterbox/src/chatterbox/models/s3tokenizer/__pycache__/s3tokenizer.cpython-311.pyc
ADDED
Binary file (7.94 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (218 Bytes). View file
|
|
chatterbox/src/chatterbox/models/t3/__pycache__/llama_configs.cpython-311.pyc
ADDED
Binary file (1.34 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/__pycache__/t3.cpython-311.pyc
ADDED
Binary file (13.4 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/inference/__pycache__/alignment_stream_analyzer.cpython-311.pyc
ADDED
Binary file (7.08 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/inference/__pycache__/t3_hf_backend.cpython-311.pyc
ADDED
Binary file (4.82 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/modules/__pycache__/cond_enc.cpython-311.pyc
ADDED
Binary file (5.37 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/modules/__pycache__/learned_pos_emb.cpython-311.pyc
ADDED
Binary file (2.54 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/modules/__pycache__/perceiver.cpython-311.pyc
ADDED
Binary file (12.6 kB). View file
|
|
chatterbox/src/chatterbox/models/t3/modules/__pycache__/t3_config.cpython-311.pyc
ADDED
Binary file (1.27 kB). View file
|
|
chatterbox/src/chatterbox/models/tokenizers/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (242 Bytes). View file
|
|
chatterbox/src/chatterbox/models/tokenizers/__pycache__/tokenizer.cpython-311.pyc
ADDED
Binary file (3.1 kB). View file
|
|
chatterbox/src/chatterbox/models/voice_encoder/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (281 Bytes). View file
|
|
chatterbox/src/chatterbox/models/voice_encoder/__pycache__/config.cpython-311.pyc
ADDED
Binary file (859 Bytes). View file
|
|
chatterbox/src/chatterbox/models/voice_encoder/__pycache__/melspec.cpython-311.pyc
ADDED
Binary file (3.59 kB). View file
|
|
chatterbox/src/chatterbox/models/voice_encoder/__pycache__/voice_encoder.cpython-311.pyc
ADDED
Binary file (18.7 kB). View file
|
|
chatterbox/src/chatterbox/tts.py
CHANGED
@@ -28,6 +28,48 @@ def change_pace(speech_tokens: torch.Tensor, pace: float):
|
|
28 |
return speech_tokens
|
29 |
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
@dataclass
|
32 |
class Conditionals:
|
33 |
"""
|
@@ -176,6 +218,8 @@ class ChatterboxTTS:
|
|
176 |
emotion_adv=exaggeration * torch.ones(1, 1, 1),
|
177 |
).to(device=self.device)
|
178 |
|
|
|
|
|
179 |
text_tokens = self.tokenizer.text_to_tokens(text).to(self.device)
|
180 |
|
181 |
sot = self.t3.hp.start_text_token
|
|
|
28 |
return speech_tokens
|
29 |
|
30 |
|
31 |
+
def punc_norm(text: str) -> str:
|
32 |
+
"""
|
33 |
+
Quick cleanup func for punctuation from LLMs or
|
34 |
+
containing chars not seen often in the dataset
|
35 |
+
"""
|
36 |
+
if len(text) == 0:
|
37 |
+
return "You need to add some text for me to talk."
|
38 |
+
|
39 |
+
# Capitalise first letter
|
40 |
+
if text[0].islower():
|
41 |
+
text = text[0].upper() + text[1:]
|
42 |
+
|
43 |
+
# Remove multiple space chars
|
44 |
+
text = " ".join(text.split())
|
45 |
+
|
46 |
+
# Replace uncommon/llm punc
|
47 |
+
punc_to_replace = [
|
48 |
+
("...", ", "),
|
49 |
+
("…", ", "),
|
50 |
+
(":", ","),
|
51 |
+
(" - ", ", "),
|
52 |
+
(";", ", "),
|
53 |
+
("—", "-"),
|
54 |
+
("–", "-"),
|
55 |
+
(" ,", ","),
|
56 |
+
("“", "\""),
|
57 |
+
("”", "\""),
|
58 |
+
("‘", "'"),
|
59 |
+
("’", "'"),
|
60 |
+
]
|
61 |
+
for old_char_sequence, new_char in punc_to_replace:
|
62 |
+
text = text.replace(old_char_sequence, new_char)
|
63 |
+
|
64 |
+
# Add full stop if no ending punc
|
65 |
+
text = text.rstrip(" ")
|
66 |
+
sentence_enders = {".", "!", "?", "-", ","}
|
67 |
+
if not any(text.endswith(p) for p in sentence_enders):
|
68 |
+
text += "."
|
69 |
+
|
70 |
+
return text
|
71 |
+
|
72 |
+
|
73 |
@dataclass
|
74 |
class Conditionals:
|
75 |
"""
|
|
|
218 |
emotion_adv=exaggeration * torch.ones(1, 1, 1),
|
219 |
).to(device=self.device)
|
220 |
|
221 |
+
# Norm and tokenize text
|
222 |
+
text = punc_norm(text)
|
223 |
text_tokens = self.tokenizer.text_to_tokens(text).to(self.device)
|
224 |
|
225 |
sot = self.t3.hp.start_text_token
|