import os from transformers import pipeline from huggingface_hub import InferenceClient import gradio as gr # Initialize the translation pipeline # Specify the source and target languages for translation translation_pipeline = pipeline("translation", model="facebook/nllb-200-distilled-600M", src_lang="zh", tgt_lang="en") # Initialize the text-to-speech pipeline tts_pipeline = pipeline("text-to-speech", model="facebook/mms-tts-eng") def translate_and_speak(chinese_text): """ Translates Chinese text to English and generates speech. """ # Translate the text # Pass the input text as a list to the pipeline translated_text = translation_pipeline([chinese_text])[0]['translation_text'] # Generate speech from the translated text # Use a try-except block to handle potential errors during speech generation try: # The transformers text-to-speech pipeline returns a dictionary audio_output = tts_pipeline(translated_text) # The audio data is in the 'audio' key and the sampling rate in 'sampling_rate' audio_bytes = audio_output['audio'] sampling_rate = audio_output['sampling_rate'] return translated_text, (sampling_rate, audio_bytes) # Gradio expects a tuple for audio except Exception as e: return translated_text, None, f"Error generating speech: {e}" # Create the Gradio interface iface = gr.Interface( fn=translate_and_speak, inputs=gr.Textbox(label="Enter Chinese Text"), outputs=[ gr.Textbox(label="Translated English Text"), gr.Audio(label="Generated Speech", format="wav") ], title="Chinese to English Translation and Text-to-Speech", description="Translate Chinese text to English and listen to the English translation." ) # Launch the Gradio app if __name__ == "__main__": iface.launch()