shukdevdatta123 commited on
Commit
4c9d3f6
·
verified ·
1 Parent(s): a8f539e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -31
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import gradio as gr
2
  from bark import SAMPLE_RATE, generate_audio, preload_models
3
- from bark.api import text_to_semantic
4
- from bark.generation import generate_text_semantic
5
  from scipy.io.wavfile import write as write_wav
6
  import tempfile
7
  import librosa
@@ -33,7 +31,7 @@ def preprocess_audio_to_npz(audio_path):
33
  audio_path (str): Path to the input audio file.
34
 
35
  Returns:
36
- str: Path to the generated .npz file.
37
  """
38
  # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
39
  audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
@@ -41,33 +39,29 @@ def preprocess_audio_to_npz(audio_path):
41
  # Ensure audio is a float32 array
42
  audio = audio.astype(np.float32)
43
 
44
- with torch.device("cpu"):
45
- # Generate dummy semantic and coarse tokens
46
- # Since HuBERT is not implemented, use text_to_semantic with dummy text
47
- dummy_text = "Dummy text for history prompt generation."
48
- semantic_tokens = text_to_semantic(dummy_text, temp=0.7, silent=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- # Generate coarse tokens from semantic tokens
51
- coarse_tokens = generate_text_semantic(
52
- semantic_tokens=semantic_tokens,
53
- max_gen_len=512,
54
- temp=0.7,
55
- silent=True
56
- )
57
-
58
- # Create history prompt dictionary with minimal structure
59
- history_prompt = {
60
- "semantic_prompt": semantic_tokens,
61
- "coarse_prompt": coarse_tokens,
62
- "fine_prompt": coarse_tokens # Fine prompt often mirrors coarse in Bark
63
- }
64
-
65
- # Save to temporary .npz file
66
- with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
67
- np.savez(temp_file.name, **history_prompt)
68
- npz_path = temp_file.name
69
-
70
- return npz_path
71
 
72
  def generate_speech(reference_audio, text):
73
  """
@@ -85,7 +79,7 @@ def generate_speech(reference_audio, text):
85
  if not text:
86
  raise ValueError("Please enter text to convert.")
87
 
88
- # Preprocess audio to create .npz history prompt
89
  history_prompt = preprocess_audio_to_npz(reference_audio)
90
 
91
  # Generate speech using the processed history prompt
@@ -116,7 +110,6 @@ with gr.Blocks(title="Voice Cloning TTS with Bark") as app:
116
 
117
  generate_btn = gr.Button("Generate Speech")
118
  audio_output = gr.Audio(label="Generated Speech", interactive=False)
119
-
120
  # Connect the button to the generation function
121
  generate_btn.click(
122
  fn=generate_speech,
 
1
  import gradio as gr
2
  from bark import SAMPLE_RATE, generate_audio, preload_models
 
 
3
  from scipy.io.wavfile import write as write_wav
4
  import tempfile
5
  import librosa
 
31
  audio_path (str): Path to the input audio file.
32
 
33
  Returns:
34
+ str: Path to the input audio file or generated .npz file.
35
  """
36
  # Load and resample audio to Bark's SAMPLE_RATE (24kHz)
37
  audio, sr = librosa.load(audio_path, sr=SAMPLE_RATE, mono=True)
 
39
  # Ensure audio is a float32 array
40
  audio = audio.astype(np.float32)
41
 
42
+ try:
43
+ # Attempt to use the audio file directly as history_prompt
44
+ # Bark may support raw audio files as history_prompt in some versions
45
+ return audio_path
46
+ except:
47
+ # Fallback: Create a minimal .npz file with dummy tokens
48
+ with torch.device("cpu"):
49
+ # Generate dummy tokens (minimal structure to avoid errors)
50
+ dummy_tokens = np.zeros((512,), dtype=np.int32) # Placeholder tokens
51
+
52
+ # Create history prompt dictionary
53
+ history_prompt = {
54
+ "semantic_prompt": dummy_tokens,
55
+ "coarse_prompt": dummy_tokens,
56
+ "fine_prompt": dummy_tokens
57
+ }
58
+
59
+ # Save to temporary .npz file
60
+ with tempfile.NamedTemporaryFile(suffix=".npz", delete=False) as temp_file:
61
+ np.savez(temp_file.name, **history_prompt)
62
+ npz_path = temp_file.name
63
 
64
+ return npz_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  def generate_speech(reference_audio, text):
67
  """
 
79
  if not text:
80
  raise ValueError("Please enter text to convert.")
81
 
82
+ # Preprocess audio to get history prompt (audio file or .npz)
83
  history_prompt = preprocess_audio_to_npz(reference_audio)
84
 
85
  # Generate speech using the processed history prompt
 
110
 
111
  generate_btn = gr.Button("Generate Speech")
112
  audio_output = gr.Audio(label="Generated Speech", interactive=False)
 
113
  # Connect the button to the generation function
114
  generate_btn.click(
115
  fn=generate_speech,