shukdevdatta123 commited on
Commit
8d69e71
·
verified ·
1 Parent(s): 9c4336f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -6,7 +6,7 @@ from tortoise.api import TextToSpeech
6
  from tortoise.utils.audio import load_audio
7
 
8
  # 1) Initialize the Tortoise TTS engine at startup
9
- tts = TextToSpeech() # downloads and caches models automatically
10
 
11
  # 2) Define a helper to generate speech from a reference clip + text
12
  def generate_speech(reference_audio_path, text):
@@ -15,22 +15,20 @@ def generate_speech(reference_audio_path, text):
15
  text: the string to synthesize
16
  returns: path to a 24 kHz WAV file with your cloned voice
17
  """
18
- # Load and resample the reference clip to 22 050 Hz as a torch tensor
19
- # (load_audio handles mono conversion)
20
- ref_waveform = load_audio(reference_audio_path, sr=22050)
21
 
22
- # Synthesize: one clip, use the 'fast' preset for decent speed/quality tradeoff
23
- # returns a Tensor of shape (1, S) at 24 kHz :contentReference[oaicite:1]{index=1}
24
  output_tensor = tts.tts_with_preset(
25
  text,
26
  voice_samples=[ref_waveform],
27
  preset="fast"
28
  )
29
 
30
- # Convert to NumPy and save to a temporary WAV (float32, 24 kHz)
31
  wav_np = output_tensor.squeeze().cpu().numpy()
32
  tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
33
- sf.write(tmp.name, wav_np, samplerate=24000) # sample rate is 24 kHz :contentReference[oaicite:2]{index=2}
34
  return tmp.name
35
 
36
  # 3) Build the Gradio interface
@@ -55,4 +53,4 @@ with gr.Blocks(title="Tortoise Voice Cloning TTS") as app:
55
  )
56
 
57
  if __name__ == "__main__":
58
- app.launch()
 
6
  from tortoise.utils.audio import load_audio
7
 
8
  # 1) Initialize the Tortoise TTS engine at startup
9
+ tts = TextToSpeech() # Downloads and caches models automatically
10
 
11
  # 2) Define a helper to generate speech from a reference clip + text
12
  def generate_speech(reference_audio_path, text):
 
15
  text: the string to synthesize
16
  returns: path to a 24 kHz WAV file with your cloned voice
17
  """
18
+ # Load the reference clip (Tortoise auto-resamples to 22 050 Hz)
19
+ ref_waveform = load_audio(reference_audio_path)
 
20
 
21
+ # Generate speech using 'fast' preset
 
22
  output_tensor = tts.tts_with_preset(
23
  text,
24
  voice_samples=[ref_waveform],
25
  preset="fast"
26
  )
27
 
28
+ # Save to temp WAV (float32, 24 kHz)
29
  wav_np = output_tensor.squeeze().cpu().numpy()
30
  tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
31
+ sf.write(tmp.name, wav_np, samplerate=24000)
32
  return tmp.name
33
 
34
  # 3) Build the Gradio interface
 
53
  )
54
 
55
  if __name__ == "__main__":
56
+ app.launch()