Spaces:

shukdevdatta123
/

VocalForge-AI

Running

App Files Files Community

shukdevdatta123 commited on 29 days ago

Commit

4c9d3f6

verified ·

1 Parent(s): a8f539e

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -31

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import gradio as gr
 from bark import SAMPLE_RATE, generate_audio, preload_models
-from bark.api import text_to_semantic
-from bark.generation import generate_text_semantic
 from scipy.io.wavfile import write as write_wav
 import tempfile
 import librosa
@@ -33,7 +31,7 @@ def preprocess_audio_to_npz(audio_path):
     audio_path (str): Path to the input audio file.
     Returns:
-    str: Path to the generated .npz file.
     """
     # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
     audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
@@ -41,33 +39,29 @@ def preprocess_audio_to_npz(audio_path):
     # Ensure audio is a float32 array
     audio = audio.astype(np.float32)
-    with torch.device("cpu"):
-        # Generate dummy semantic and coarse tokens
-        # Since HuBERT is not implemented, use text_to_semantic with dummy text
-        dummy_text = "Dummy text for history prompt generation."
-        semantic_tokens = text_to_semantic(dummy_text, temp=0.7, silent=True)
-        # Generate coarse tokens from semantic tokens
-        coarse_tokens = generate_text_semantic(
-            semantic_tokens=semantic_tokens,
-            max_gen_len=512,
-            temp=0.7,
-            silent=True
-        )
-        # Create history prompt dictionary with minimal structure
-        history_prompt = {
-            "semantic_prompt": semantic_tokens,
-            "coarse_prompt": coarse_tokens,
-            "fine_prompt": coarse_tokens  # Fine prompt often mirrors coarse in Bark
-        }
-        # Save to temporary .npz file
-        with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
-            np.savez(temp_file.name, **history_prompt)
-            npz_path = temp_file.name
-    return npz_path
 def generate_speech(reference_audio, text):
     """
@@ -85,7 +79,7 @@ def generate_speech(reference_audio, text):
     if not text:
         raise ValueError("Please enter text to convert.")
-    # Preprocess audio to create .npz history prompt
     history_prompt = preprocess_audio_to_npz(reference_audio)
     # Generate speech using the processed history prompt
@@ -116,7 +110,6 @@ with gr.Blocks(title="Voice Cloning TTS with Bark") as app:
     generate_btn = gr.Button("Generate Speech")
     audio_output = gr.Audio(label="Generated Speech", interactive=False)
     # Connect the button to the generation function
     generate_btn.click(
         fn=generate_speech,

 import gradio as gr
 from bark import SAMPLE_RATE, generate_audio, preload_models
 from scipy.io.wavfile import write as write_wav
 import tempfile
 import librosa
     audio_path (str): Path to the input audio file.
     Returns:
+    str: Path to the input audio file or generated .npz file.
     """
     # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
     audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
     # Ensure audio is a float32 array
     audio = audio.astype(np.float32)
+    try:
+        # Attempt to use the audio file directly as history_prompt
+        # Bark may support raw audio files as history_prompt in some versions
+        return audio_path
+    except:
+        # Fallback: Create a minimal .npz file with dummy tokens
+        with torch.device("cpu"):
+            # Generate dummy tokens (minimal structure to avoid errors)
+            dummy_tokens = np.zeros((512,), dtype=np.int32)  # Placeholder tokens
+            # Create history prompt dictionary
+            history_prompt = {
+                "semantic_prompt": dummy_tokens,
+                "coarse_prompt": dummy_tokens,
+                "fine_prompt": dummy_tokens
+            }
+            # Save to temporary .npz file
+            with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
+                np.savez(temp_file.name, **history_prompt)
+                npz_path = temp_file.name
+        return npz_path
 def generate_speech(reference_audio, text):
     """
     if not text:
         raise ValueError("Please enter text to convert.")
+    # Preprocess audio to get history prompt (audio file or .npz)
     history_prompt = preprocess_audio_to_npz(reference_audio)
     # Generate speech using the processed history prompt
     generate_btn = gr.Button("Generate Speech")
     audio_output = gr.Audio(label="Generated Speech", interactive=False)
     # Connect the button to the generation function
     generate_btn.click(
         fn=generate_speech,