Update orpheus-tts/kartoffel_decoder.py
Browse files- orpheus-tts/kartoffel_decoder.py +12 -15
orpheus-tts/kartoffel_decoder.py
CHANGED
@@ -73,21 +73,16 @@ def convert_to_audio_kartoffel(audio_tensor):
|
|
73 |
return audio_numpy.tobytes()
|
74 |
|
75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
76 |
-
"""Extrahiert Audio-Token-IDs aus dem generierten Text"""
|
77 |
try:
|
78 |
-
print(f"DEBUG KARTOFFEL: Received token_text: {token_text}")
|
79 |
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
print(f"DEBUG KARTOFFEL: Parsed token_ids from string: {token_ids}")
|
85 |
-
else:
|
86 |
-
# Fallback: Text zu Token-IDs konvertieren (altes Format)
|
87 |
-
token_ids = tokenizer.encode(token_text)
|
88 |
-
print(f"DEBUG KARTOFFEL: Encoded token_ids: {token_ids}")
|
89 |
|
90 |
-
# Nach Start-Token suchen
|
91 |
start_idx = -1
|
92 |
for i, token_id in enumerate(token_ids):
|
93 |
if token_id == CODE_START_TOKEN_ID:
|
@@ -95,14 +90,16 @@ def extract_kartoffel_tokens(token_text, tokenizer):
|
|
95 |
break
|
96 |
|
97 |
if start_idx == -1:
|
98 |
-
print(f"DEBUG KARTOFFEL: No start token found ({CODE_START_TOKEN_ID})")
|
|
|
99 |
return []
|
100 |
|
101 |
-
print(f"DEBUG KARTOFFEL: Found start token at index {start_idx}")
|
102 |
|
103 |
# Audio-Tokens extrahieren (nach Start-Token)
|
104 |
potential_code_tokens = token_ids[start_idx + 1:]
|
105 |
-
print(f"DEBUG KARTOFFEL: Potential code tokens: {potential_code_tokens
|
|
|
106 |
|
107 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
108 |
valid_raw_codes = [
|
|
|
73 |
return audio_numpy.tobytes()
|
74 |
|
75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
76 |
+
"""Extrahiert Audio-Token-IDs aus dem von vLLM generierten Text"""
|
77 |
try:
|
78 |
+
print(f"DEBUG KARTOFFEL: Received token_text: {token_text[:100]}...")
|
79 |
|
80 |
+
# Text zu Token-IDs konvertieren (vLLM generiert Text, nicht numerische IDs)
|
81 |
+
token_ids = tokenizer.encode(token_text)
|
82 |
+
print(f"DEBUG KARTOFFEL: Encoded token_ids count: {len(token_ids)}")
|
83 |
+
print(f"DEBUG KARTOFFEL: First 20 token_ids: {token_ids[:20]}")
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
+
# Nach Audio-Start-Token suchen (128257)
|
86 |
start_idx = -1
|
87 |
for i, token_id in enumerate(token_ids):
|
88 |
if token_id == CODE_START_TOKEN_ID:
|
|
|
90 |
break
|
91 |
|
92 |
if start_idx == -1:
|
93 |
+
print(f"DEBUG KARTOFFEL: No audio start token found ({CODE_START_TOKEN_ID})")
|
94 |
+
print(f"DEBUG KARTOFFEL: Available unique tokens: {sorted(set(token_ids))}")
|
95 |
return []
|
96 |
|
97 |
+
print(f"DEBUG KARTOFFEL: Found audio start token at index {start_idx}")
|
98 |
|
99 |
# Audio-Tokens extrahieren (nach Start-Token)
|
100 |
potential_code_tokens = token_ids[start_idx + 1:]
|
101 |
+
print(f"DEBUG KARTOFFEL: Potential code tokens count: {len(potential_code_tokens)}")
|
102 |
+
print(f"DEBUG KARTOFFEL: First 10 potential codes: {potential_code_tokens[:10]}")
|
103 |
|
104 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
105 |
valid_raw_codes = [
|