Spaces:

SmilingTree
/

Chinese-to-English-translation-speaker

Sleeping

SmilingTree commited on Jul 20

Commit

5ac9f0f

verified ·

1 Parent(s): 24c142b

change text-to-speech pipeline to facebook tts

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,12 +7,9 @@ import gradio as gr
 # Specify the source and target languages for translation
 translation_pipeline = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="zh", tgt_lang="en")
-# Initialize the text-to-speech client
-# Make sure you have HF_TOKEN set as a secret in your Hugging Face Space
-client = InferenceClient(
-    provider="replicate",
-    api_key=os.environ.get("HF_TOKEN"),
-)
 def translate_and_speak(chinese_text):
     """
@@ -25,13 +22,15 @@ def translate_and_speak(chinese_text):
     # Generate speech from the translated text
     # Use a try-except block to handle potential errors during speech generation
     try:
-        audio_bytes = client.text_to_speech(
-            translated_text,
-            model="nari-labs/Dia-1.6B",
-        )
-        return translated_text, audio_bytes
     except Exception as e:
-        return translated_text, f"Error generating speech: {e}"
 # Create the Gradio interface

 # Specify the source and target languages for translation
 translation_pipeline = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="zh", tgt_lang="en")
+# Initialize the text-to-speech pipeline
+tts_pipeline = pipeline("text-to-speech", model="facebook/mms-tts-eng")
 def translate_and_speak(chinese_text):
     """
     # Generate speech from the translated text
     # Use a try-except block to handle potential errors during speech generation
     try:
+        # The transformers text-to-speech pipeline returns a dictionary
+        audio_output = tts_pipeline(translated_text)
+        # The audio data is in the 'audio' key and the sampling rate in 'sampling_rate'
+        audio_bytes = audio_output['audio']
+        sampling_rate = audio_output['sampling_rate']
+        return translated_text, (sampling_rate, audio_bytes) # Gradio expects a tuple for audio
     except Exception as e:
+        return translated_text, None, f"Error generating speech: {e}"
 # Create the Gradio interface