Tomtom84 commited on
Commit
1dac694
·
verified ·
1 Parent(s): 1a347c6

Update orpheus-tts/kartoffel_decoder.py

Browse files
Files changed (1) hide show
  1. orpheus-tts/kartoffel_decoder.py +19 -4
orpheus-tts/kartoffel_decoder.py CHANGED
@@ -6,9 +6,9 @@ import threading
6
  import queue
7
  import os
8
 
9
- # Kartoffel-spezifische Konstanten
10
  CODE_TOKEN_OFFSET = 128266
11
- CODE_START_TOKEN_ID = 128257
12
  CODE_REMOVE_TOKEN_ID = 128258
13
 
14
  print("DEBUG KARTOFFEL: Loading SNAC model...")
@@ -75,8 +75,17 @@ def convert_to_audio_kartoffel(audio_tensor):
75
  def extract_kartoffel_tokens(token_text, tokenizer):
76
  """Extrahiert Audio-Token-IDs aus dem generierten Text"""
77
  try:
78
- # Text zu Token-IDs konvertieren
79
- token_ids = tokenizer.encode(token_text)
 
 
 
 
 
 
 
 
 
80
 
81
  # Nach Start-Token suchen
82
  start_idx = -1
@@ -86,10 +95,14 @@ def extract_kartoffel_tokens(token_text, tokenizer):
86
  break
87
 
88
  if start_idx == -1:
 
89
  return []
90
 
 
 
91
  # Audio-Tokens extrahieren (nach Start-Token)
92
  potential_code_tokens = token_ids[start_idx + 1:]
 
93
 
94
  # Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
95
  valid_raw_codes = [
@@ -97,6 +110,8 @@ def extract_kartoffel_tokens(token_text, tokenizer):
97
  if token != CODE_REMOVE_TOKEN_ID and token >= CODE_TOKEN_OFFSET
98
  ]
99
 
 
 
100
  # Offset abziehen
101
  valid_codes = [token - CODE_TOKEN_OFFSET for token in valid_raw_codes]
102
 
 
6
  import queue
7
  import os
8
 
9
+ # Kartoffel-spezifische Konstanten (basierend auf Referenz-Implementierung)
10
  CODE_TOKEN_OFFSET = 128266
11
+ CODE_START_TOKEN_ID = 128257 # Token für Audio-Code-Start
12
  CODE_REMOVE_TOKEN_ID = 128258
13
 
14
  print("DEBUG KARTOFFEL: Loading SNAC model...")
 
75
  def extract_kartoffel_tokens(token_text, tokenizer):
76
  """Extrahiert Audio-Token-IDs aus dem generierten Text"""
77
  try:
78
+ print(f"DEBUG KARTOFFEL: Received token_text: {token_text}")
79
+
80
+ # Prüfen ob es sich um numerische Token-IDs handelt (neues Format)
81
+ if isinstance(token_text, str) and all(c.isdigit() or c.isspace() for c in token_text):
82
+ # Numerische Token-IDs direkt parsen
83
+ token_ids = [int(x) for x in token_text.split()]
84
+ print(f"DEBUG KARTOFFEL: Parsed token_ids from string: {token_ids}")
85
+ else:
86
+ # Fallback: Text zu Token-IDs konvertieren (altes Format)
87
+ token_ids = tokenizer.encode(token_text)
88
+ print(f"DEBUG KARTOFFEL: Encoded token_ids: {token_ids}")
89
 
90
  # Nach Start-Token suchen
91
  start_idx = -1
 
95
  break
96
 
97
  if start_idx == -1:
98
+ print(f"DEBUG KARTOFFEL: No start token found ({CODE_START_TOKEN_ID})")
99
  return []
100
 
101
+ print(f"DEBUG KARTOFFEL: Found start token at index {start_idx}")
102
+
103
  # Audio-Tokens extrahieren (nach Start-Token)
104
  potential_code_tokens = token_ids[start_idx + 1:]
105
+ print(f"DEBUG KARTOFFEL: Potential code tokens: {potential_code_tokens[:10]}...")
106
 
107
  # Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
108
  valid_raw_codes = [
 
110
  if token != CODE_REMOVE_TOKEN_ID and token >= CODE_TOKEN_OFFSET
111
  ]
112
 
113
+ print(f"DEBUG KARTOFFEL: Valid raw codes count: {len(valid_raw_codes)}")
114
+
115
  # Offset abziehen
116
  valid_codes = [token - CODE_TOKEN_OFFSET for token in valid_raw_codes]
117