Update orpheus-tts/kartoffel_decoder.py
Browse files
orpheus-tts/kartoffel_decoder.py
CHANGED
@@ -6,9 +6,9 @@ import threading
|
|
6 |
import queue
|
7 |
import os
|
8 |
|
9 |
-
# Kartoffel-spezifische Konstanten
|
10 |
CODE_TOKEN_OFFSET = 128266
|
11 |
-
CODE_START_TOKEN_ID = 128257
|
12 |
CODE_REMOVE_TOKEN_ID = 128258
|
13 |
|
14 |
print("DEBUG KARTOFFEL: Loading SNAC model...")
|
@@ -75,8 +75,17 @@ def convert_to_audio_kartoffel(audio_tensor):
|
|
75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
76 |
"""Extrahiert Audio-Token-IDs aus dem generierten Text"""
|
77 |
try:
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
# Nach Start-Token suchen
|
82 |
start_idx = -1
|
@@ -86,10 +95,14 @@ def extract_kartoffel_tokens(token_text, tokenizer):
|
|
86 |
break
|
87 |
|
88 |
if start_idx == -1:
|
|
|
89 |
return []
|
90 |
|
|
|
|
|
91 |
# Audio-Tokens extrahieren (nach Start-Token)
|
92 |
potential_code_tokens = token_ids[start_idx + 1:]
|
|
|
93 |
|
94 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
95 |
valid_raw_codes = [
|
@@ -97,6 +110,8 @@ def extract_kartoffel_tokens(token_text, tokenizer):
|
|
97 |
if token != CODE_REMOVE_TOKEN_ID and token >= CODE_TOKEN_OFFSET
|
98 |
]
|
99 |
|
|
|
|
|
100 |
# Offset abziehen
|
101 |
valid_codes = [token - CODE_TOKEN_OFFSET for token in valid_raw_codes]
|
102 |
|
|
|
6 |
import queue
|
7 |
import os
|
8 |
|
9 |
+
# Kartoffel-spezifische Konstanten (basierend auf Referenz-Implementierung)
|
10 |
CODE_TOKEN_OFFSET = 128266
|
11 |
+
CODE_START_TOKEN_ID = 128257 # Token für Audio-Code-Start
|
12 |
CODE_REMOVE_TOKEN_ID = 128258
|
13 |
|
14 |
print("DEBUG KARTOFFEL: Loading SNAC model...")
|
|
|
75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
76 |
"""Extrahiert Audio-Token-IDs aus dem generierten Text"""
|
77 |
try:
|
78 |
+
print(f"DEBUG KARTOFFEL: Received token_text: {token_text}")
|
79 |
+
|
80 |
+
# Prüfen ob es sich um numerische Token-IDs handelt (neues Format)
|
81 |
+
if isinstance(token_text, str) and all(c.isdigit() or c.isspace() for c in token_text):
|
82 |
+
# Numerische Token-IDs direkt parsen
|
83 |
+
token_ids = [int(x) for x in token_text.split()]
|
84 |
+
print(f"DEBUG KARTOFFEL: Parsed token_ids from string: {token_ids}")
|
85 |
+
else:
|
86 |
+
# Fallback: Text zu Token-IDs konvertieren (altes Format)
|
87 |
+
token_ids = tokenizer.encode(token_text)
|
88 |
+
print(f"DEBUG KARTOFFEL: Encoded token_ids: {token_ids}")
|
89 |
|
90 |
# Nach Start-Token suchen
|
91 |
start_idx = -1
|
|
|
95 |
break
|
96 |
|
97 |
if start_idx == -1:
|
98 |
+
print(f"DEBUG KARTOFFEL: No start token found ({CODE_START_TOKEN_ID})")
|
99 |
return []
|
100 |
|
101 |
+
print(f"DEBUG KARTOFFEL: Found start token at index {start_idx}")
|
102 |
+
|
103 |
# Audio-Tokens extrahieren (nach Start-Token)
|
104 |
potential_code_tokens = token_ids[start_idx + 1:]
|
105 |
+
print(f"DEBUG KARTOFFEL: Potential code tokens: {potential_code_tokens[:10]}...")
|
106 |
|
107 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
108 |
valid_raw_codes = [
|
|
|
110 |
if token != CODE_REMOVE_TOKEN_ID and token >= CODE_TOKEN_OFFSET
|
111 |
]
|
112 |
|
113 |
+
print(f"DEBUG KARTOFFEL: Valid raw codes count: {len(valid_raw_codes)}")
|
114 |
+
|
115 |
# Offset abziehen
|
116 |
valid_codes = [token - CODE_TOKEN_OFFSET for token in valid_raw_codes]
|
117 |
|