Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -37,27 +37,31 @@ def preprocess_audio_to_npz(audio_path):
|
|
37 |
# Load and resample audio to Bark's SAMPLE_RATE (24kHz)
|
38 |
audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
|
39 |
|
40 |
-
# Ensure audio is a float32 array
|
41 |
audio = audio.astype(np.float32)
|
42 |
|
43 |
with torch.device("cpu"):
|
44 |
-
# Generate
|
45 |
dummy_text = "Dummy text for history prompt generation."
|
46 |
semantic_tokens = generate_text_semantic(
|
47 |
text=dummy_text,
|
|
|
48 |
temp=0.7,
|
49 |
silent=True
|
50 |
)
|
51 |
|
52 |
-
# Ensure semantic_tokens is a numpy array
|
53 |
semantic_tokens = np.array(semantic_tokens, dtype=np.int64)
|
54 |
-
if semantic_tokens.ndim
|
55 |
-
semantic_tokens = semantic_tokens.
|
56 |
|
57 |
-
#
|
58 |
-
|
59 |
-
coarse_tokens =
|
60 |
-
|
|
|
|
|
|
|
61 |
|
62 |
# Create history prompt dictionary
|
63 |
history_prompt = {
|
|
|
37 |
# Load and resample audio to Bark's SAMPLE_RATE (24kHz)
|
38 |
audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
|
39 |
|
40 |
+
# Ensure audio is a float32 array (for potential future use)
|
41 |
audio = audio.astype(np.float32)
|
42 |
|
43 |
with torch.device("cpu"):
|
44 |
+
# Generate semantic tokens using generate_text_semantic
|
45 |
dummy_text = "Dummy text for history prompt generation."
|
46 |
semantic_tokens = generate_text_semantic(
|
47 |
text=dummy_text,
|
48 |
+
max_gen_len=512,
|
49 |
temp=0.7,
|
50 |
silent=True
|
51 |
)
|
52 |
|
53 |
+
# Ensure semantic_tokens is a 1D numpy array of int64
|
54 |
semantic_tokens = np.array(semantic_tokens, dtype=np.int64)
|
55 |
+
if semantic_tokens.ndim != 1:
|
56 |
+
semantic_tokens = semantic_tokens.flatten()
|
57 |
|
58 |
+
# Simulate coarse tokens (typically shorter or quantized version of semantic tokens)
|
59 |
+
coarse_tokens = semantic_tokens[:256] # Truncate to simulate coarse quantization
|
60 |
+
coarse_tokens = np.array(coarse_tokens, dtype=np.int64)
|
61 |
+
|
62 |
+
# Simulate fine tokens (often similar to coarse tokens in Bark)
|
63 |
+
fine_tokens = coarse_tokens.copy() # Simplified assumption
|
64 |
+
fine_tokens = np.array(fine_tokens, dtype=np.int64)
|
65 |
|
66 |
# Create history prompt dictionary
|
67 |
history_prompt = {
|