File size: 1,549 Bytes
a01445b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
from transformers import pipeline
from huggingface_hub import InferenceClient
import gradio as gr

# Initialize the translation pipeline
translation_pipeline = pipeline("translation", model="facebook/nllb-200-distilled-600M")

# Initialize the text-to-speech client
# Make sure you have HF_TOKEN set as a secret in your Hugging Face Space
client = InferenceClient(
    provider="fal-ai",
    api_key=os.environ.get("HF_TOKEN"),
)

def translate_and_speak(chinese_text):
    """
    Translates Chinese text to English and generates speech.
    """
    # Translate the text
    translated_text = translation_pipeline(chinese_text)[0]['translation_text']

    # Generate speech from the translated text
    # Use a try-except block to handle potential errors during speech generation
    try:
        audio_bytes = client.text_to_speech(
            translated_text,
            model="hexgrad/Kokoro-82M",
        )
        return translated_text, audio_bytes
    except Exception as e:
        return translated_text, f"Error generating speech: {e}"


# Create the Gradio interface
iface = gr.Interface(
    fn=translate_and_speak,
    inputs=gr.Textbox(label="Enter Chinese Text"),
    outputs=[
        gr.Textbox(label="Translated English Text"),
        gr.Audio(label="Generated Speech", format="wav")
    ],
    title="Chinese to English Translation and Text-to-Speech",
    description="Translate Chinese text to English and listen to the English translation."
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()