Spaces:

shukdevdatta123
/

VocalForge-AI

Running

App Files Files Community

shukdevdatta123 commited on Jul 2

Commit

0a51a48

verified ·

1 Parent(s): 8ed43f7

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -30

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import gradio as gr
 from bark import SAMPLE_RATE, generate_audio, preload_models
-from bark.generation import load_model, generate_text_semantic, _tokenize
 from scipy.io.wavfile import write as write_wav
 import tempfile
-import torch
 import librosa
 import numpy as np
 # Save the original torch.load function
 original_load = torch.load
@@ -34,39 +33,26 @@ def preprocess_audio_to_npz(audio_path):
     Returns:
     str: Path to the generated .npz file.
     """
-    # Set device to CPU
     with torch.device("cpu"):
-        # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
-        audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
-        # Ensure audio is a float32 array
-        audio = audio.astype(np.float32)
-        # Load HuBERT models for semantic token extraction
-        hubert_manager = load_model(model_type="hubert")
-        hubert_tokenizer = load_model(model_type="hubert_tokenizer")
-        # Generate semantic tokens
-        tokens = _tokenize(audio, hubert_manager, hubert_tokenizer)
-        semantic_tokens = tokens[0]  # Extract semantic tokens
-        # Load coarse model for coarse tokens
-        coarse_model = load_model(model_type="coarse")
-        # Generate coarse tokens
-        coarse_tokens = generate_text_semantic(
-            semantic_tokens=semantic_tokens,
-            model=coarse_model,
-            max_gen_len=512
-        )
-        # Create history prompt dictionary
         history_prompt = {
-            "semantic_prompt": semantic_tokens,
-            "coarse_prompt": coarse_tokens
         }
-        # Save to temporary .npz file
         with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
             np.savez(temp_file.name, **history_prompt)
             npz_path = temp_file.name

 import gradio as gr
 from bark import SAMPLE_RATE, generate_audio, preload_models
 from scipy.io.wavfile import write as write_wav
 import tempfile
 import librosa
 import numpy as np
+import torch
 # Save the original torch.load function
 original_load = torch.load
     Returns:
     str: Path to the generated .npz file.
     """
+    # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
+    audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
+    # Ensure audio is a float32 array
+    audio = audio.astype(np.float32)
+    # Generate semantic tokens directly using Bark's internal processing
+    # Since HuBERT models are not implemented, we rely on generate_audio's history prompt
+    # This is a simplified approach assuming Bark can handle raw audio for history prompt
     with torch.device("cpu"):
+        # Generate audio tokens to create a history prompt
+        # We use a dummy text to generate a history prompt from the audio
+        dummy_text = "Dummy text for history prompt generation."
+        audio_array = generate_audio(dummy_text, history_prompt=audio_path)
+        # Save the audio array as a temporary .npz file
         history_prompt = {
+            "audio": audio_array
         }
         with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
             np.savez(temp_file.name, **history_prompt)
             npz_path = temp_file.name