Tomtom84 commited on
Commit
3c5959f
·
verified ·
1 Parent(s): e43b774

Update orpheus-tts/kartoffel_decoder.py

Browse files
Files changed (1) hide show
  1. orpheus-tts/kartoffel_decoder.py +12 -15
orpheus-tts/kartoffel_decoder.py CHANGED
@@ -73,21 +73,16 @@ def convert_to_audio_kartoffel(audio_tensor):
73
  return audio_numpy.tobytes()
74
 
75
  def extract_kartoffel_tokens(token_text, tokenizer):
76
- """Extrahiert Audio-Token-IDs aus dem generierten Text"""
77
  try:
78
- print(f"DEBUG KARTOFFEL: Received token_text: {token_text}")
79
 
80
- # Prüfen ob es sich um numerische Token-IDs handelt (neues Format)
81
- if isinstance(token_text, str) and all(c.isdigit() or c.isspace() for c in token_text):
82
- # Numerische Token-IDs direkt parsen
83
- token_ids = [int(x) for x in token_text.split()]
84
- print(f"DEBUG KARTOFFEL: Parsed token_ids from string: {token_ids}")
85
- else:
86
- # Fallback: Text zu Token-IDs konvertieren (altes Format)
87
- token_ids = tokenizer.encode(token_text)
88
- print(f"DEBUG KARTOFFEL: Encoded token_ids: {token_ids}")
89
 
90
- # Nach Start-Token suchen
91
  start_idx = -1
92
  for i, token_id in enumerate(token_ids):
93
  if token_id == CODE_START_TOKEN_ID:
@@ -95,14 +90,16 @@ def extract_kartoffel_tokens(token_text, tokenizer):
95
  break
96
 
97
  if start_idx == -1:
98
- print(f"DEBUG KARTOFFEL: No start token found ({CODE_START_TOKEN_ID})")
 
99
  return []
100
 
101
- print(f"DEBUG KARTOFFEL: Found start token at index {start_idx}")
102
 
103
  # Audio-Tokens extrahieren (nach Start-Token)
104
  potential_code_tokens = token_ids[start_idx + 1:]
105
- print(f"DEBUG KARTOFFEL: Potential code tokens: {potential_code_tokens[:10]}...")
 
106
 
107
  # Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
108
  valid_raw_codes = [
 
73
  return audio_numpy.tobytes()
74
 
75
  def extract_kartoffel_tokens(token_text, tokenizer):
76
+ """Extrahiert Audio-Token-IDs aus dem von vLLM generierten Text"""
77
  try:
78
+ print(f"DEBUG KARTOFFEL: Received token_text: {token_text[:100]}...")
79
 
80
+ # Text zu Token-IDs konvertieren (vLLM generiert Text, nicht numerische IDs)
81
+ token_ids = tokenizer.encode(token_text)
82
+ print(f"DEBUG KARTOFFEL: Encoded token_ids count: {len(token_ids)}")
83
+ print(f"DEBUG KARTOFFEL: First 20 token_ids: {token_ids[:20]}")
 
 
 
 
 
84
 
85
+ # Nach Audio-Start-Token suchen (128257)
86
  start_idx = -1
87
  for i, token_id in enumerate(token_ids):
88
  if token_id == CODE_START_TOKEN_ID:
 
90
  break
91
 
92
  if start_idx == -1:
93
+ print(f"DEBUG KARTOFFEL: No audio start token found ({CODE_START_TOKEN_ID})")
94
+ print(f"DEBUG KARTOFFEL: Available unique tokens: {sorted(set(token_ids))}")
95
  return []
96
 
97
+ print(f"DEBUG KARTOFFEL: Found audio start token at index {start_idx}")
98
 
99
  # Audio-Tokens extrahieren (nach Start-Token)
100
  potential_code_tokens = token_ids[start_idx + 1:]
101
+ print(f"DEBUG KARTOFFEL: Potential code tokens count: {len(potential_code_tokens)}")
102
+ print(f"DEBUG KARTOFFEL: First 10 potential codes: {potential_code_tokens[:10]}")
103
 
104
  # Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
105
  valid_raw_codes = [