SmilingTree commited on
Commit
5ac9f0f
·
verified ·
1 Parent(s): 24c142b

change text-to-speech pipeline to facebook tts

Browse files
Files changed (1) hide show
  1. app.py +11 -12
app.py CHANGED
@@ -7,12 +7,9 @@ import gradio as gr
7
  # Specify the source and target languages for translation
8
  translation_pipeline = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="zh", tgt_lang="en")
9
 
10
- # Initialize the text-to-speech client
11
- # Make sure you have HF_TOKEN set as a secret in your Hugging Face Space
12
- client = InferenceClient(
13
- provider="replicate",
14
- api_key=os.environ.get("HF_TOKEN"),
15
- )
16
 
17
  def translate_and_speak(chinese_text):
18
  """
@@ -25,13 +22,15 @@ def translate_and_speak(chinese_text):
25
  # Generate speech from the translated text
26
  # Use a try-except block to handle potential errors during speech generation
27
  try:
28
- audio_bytes = client.text_to_speech(
29
- translated_text,
30
- model="nari-labs/Dia-1.6B",
31
- )
32
- return translated_text, audio_bytes
 
 
33
  except Exception as e:
34
- return translated_text, f"Error generating speech: {e}"
35
 
36
 
37
  # Create the Gradio interface
 
7
  # Specify the source and target languages for translation
8
  translation_pipeline = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="zh", tgt_lang="en")
9
 
10
+ # Initialize the text-to-speech pipeline
11
+ tts_pipeline = pipeline("text-to-speech", model="facebook/mms-tts-eng")
12
+
 
 
 
13
 
14
  def translate_and_speak(chinese_text):
15
  """
 
22
  # Generate speech from the translated text
23
  # Use a try-except block to handle potential errors during speech generation
24
  try:
25
+ # The transformers text-to-speech pipeline returns a dictionary
26
+ audio_output = tts_pipeline(translated_text)
27
+ # The audio data is in the 'audio' key and the sampling rate in 'sampling_rate'
28
+ audio_bytes = audio_output['audio']
29
+ sampling_rate = audio_output['sampling_rate']
30
+ return translated_text, (sampling_rate, audio_bytes) # Gradio expects a tuple for audio
31
+
32
  except Exception as e:
33
+ return translated_text, None, f"Error generating speech: {e}"
34
 
35
 
36
  # Create the Gradio interface