Spaces:

shukdevdatta123
/

VocalForge-AI

Running

shukdevdatta123 commited on 27 days ago

Commit

99ef324

verified ·

1 Parent(s): 4ee577e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,27 +37,31 @@ def preprocess_audio_to_npz(audio_path):
     # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
     audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
-    # Ensure audio is a float32 array
     audio = audio.astype(np.float32)
     with torch.device("cpu"):
-        # Generate dummy semantic tokens using generate_text_semantic
         dummy_text = "Dummy text for history prompt generation."
         semantic_tokens = generate_text_semantic(
             text=dummy_text,
             temp=0.7,
             silent=True
         )
-        # Ensure semantic_tokens is a numpy array with correct shape
         semantic_tokens = np.array(semantic_tokens, dtype=np.int64)
-        if semantic_tokens.ndim == 0:
-            semantic_tokens = semantic_tokens.reshape(-1)
-        # Coarse and fine prompts are derived from semantic tokens
-        # Bark often uses similar tokens for coarse and fine prompts
-        coarse_tokens = semantic_tokens  # Simplified assumption
-        fine_tokens = semantic_tokens    # Simplified assumption
         # Create history prompt dictionary
         history_prompt = {

     # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
     audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
+    # Ensure audio is a float32 array (for potential future use)
     audio = audio.astype(np.float32)
     with torch.device("cpu"):
+        # Generate semantic tokens using generate_text_semantic
         dummy_text = "Dummy text for history prompt generation."
         semantic_tokens = generate_text_semantic(
             text=dummy_text,
+            max_gen_len=512,
             temp=0.7,
             silent=True
         )
+        # Ensure semantic_tokens is a 1D numpy array of int64
         semantic_tokens = np.array(semantic_tokens, dtype=np.int64)
+        if semantic_tokens.ndim != 1:
+            semantic_tokens = semantic_tokens.flatten()
+        # Simulate coarse tokens (typically shorter or quantized version of semantic tokens)
+        coarse_tokens = semantic_tokens[:256]  # Truncate to simulate coarse quantization
+        coarse_tokens = np.array(coarse_tokens, dtype=np.int64)
+        # Simulate fine tokens (often similar to coarse tokens in Bark)
+        fine_tokens = coarse_tokens.copy()  # Simplified assumption
+        fine_tokens = np.array(fine_tokens, dtype=np.int64)
         # Create history prompt dictionary
         history_prompt = {