Spaces:

shukdevdatta123
/

VocalForge-AI

Running

shukdevdatta123 commited on Jul 2

Commit

7666acf

verified ·

1 Parent(s): e25f277

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,40 +34,42 @@ def preprocess_audio_to_npz(audio_path):
     Returns:
     str: Path to the generated .npz file.
     """
-    # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
-    audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
-    # Ensure audio is a float32 array
-    audio = audio.astype(np.float32)
-    # Tokenize and process through HuBERT for semantic tokens
-    hubert_manager = load_model(model_type="hubert", device="cpu")
-    hubert_tokenizer = load_model(model_type="hubert_tokenizer", device="cpu")
-    # Generate semantic tokens
-    tokens = _tokenize(audio, hubert_manager, hubert_tokenizer)
-    semantic_tokens = tokens[0]  # Extract semantic tokens
-    # Load coarse model for coarse tokens
-    coarse_model = load_model(model_type="coarse", device="cpu")
-    # Generate coarse tokens
-    coarse_tokens = generate_text_semantic(
-        semantic_tokens=semantic_tokens,
-        model=coarse_model,
-        max_gen_len=512
-    )
-    # Create history prompt dictionary
-    history_prompt = {
-        "semantic_prompt": semantic_tokens,
-        "coarse_prompt": coarse_tokens
-    }
-    # Save to temporary .npz file
-    with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
-        np.savez(temp_file.name, **history_prompt)
-        npz_path = temp_file.name
     return npz_path

     Returns:
     str: Path to the generated .npz file.
     """
+    # Set device to CPU
+    with torch.device("cpu"):
+        # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
+        audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
+        # Ensure audio is a float32 array
+        audio = audio.astype(np.float32)
+        # Load HuBERT models for semantic token extraction
+        hubert_manager = load_model(model_type="hubert")
+        hubert_tokenizer = load_model(model_type="hubert_tokenizer")
+        # Generate semantic tokens
+        tokens = _tokenize(audio, hubert_manager, hubert_tokenizer)
+        semantic_tokens = tokens[0]  # Extract semantic tokens
+        # Load coarse model for coarse tokens
+        coarse_model = load_model(model_type="coarse")
+        # Generate coarse tokens
+        coarse_tokens = generate_text_semantic(
+            semantic_tokens=semantic_tokens,
+            model=coarse_model,
+            max_gen_len=512
+        )
+        # Create history prompt dictionary
+        history_prompt = {
+            "semantic_prompt": semantic_tokens,
+            "coarse_prompt": coarse_tokens
+        }
+        # Save to temporary .npz file
+        with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
+            np.savez(temp_file.name, **history_prompt)
+            npz_path = temp_file.name
     return npz_path