File size: 1,848 Bytes
a01445b 4101904 a01445b 5ac9f0f a01445b 4101904 a01445b 5ac9f0f a01445b 5ac9f0f a01445b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import os
from transformers import pipeline
from huggingface_hub import InferenceClient
import gradio as gr
# Initialize the translation pipeline
# Specify the source and target languages for translation
translation_pipeline = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="zh", tgt_lang="en")
# Initialize the text-to-speech pipeline
tts_pipeline = pipeline("text-to-speech", model="facebook/mms-tts-eng")
def translate_and_speak(chinese_text):
"""
Translates Chinese text to English and generates speech.
"""
# Translate the text
# Pass the input text as a list to the pipeline
translated_text = translation_pipeline([chinese_text])[0]['translation_text']
# Generate speech from the translated text
# Use a try-except block to handle potential errors during speech generation
try:
# The transformers text-to-speech pipeline returns a dictionary
audio_output = tts_pipeline(translated_text)
# The audio data is in the 'audio' key and the sampling rate in 'sampling_rate'
audio_bytes = audio_output['audio']
sampling_rate = audio_output['sampling_rate']
return translated_text, (sampling_rate, audio_bytes) # Gradio expects a tuple for audio
except Exception as e:
return translated_text, None, f"Error generating speech: {e}"
# Create the Gradio interface
iface = gr.Interface(
fn=translate_and_speak,
inputs=gr.Textbox(label="Enter Chinese Text"),
outputs=[
gr.Textbox(label="Translated English Text"),
gr.Audio(label="Generated Speech", format="wav")
],
title="Chinese to English Translation and Text-to-Speech",
description="Translate Chinese text to English and listen to the English translation."
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch() |