Spaces:

Ryanus
/

fastspeech2

Running

File size: 2,856 Bytes

# app.py
import gradio as gr
import torchaudio
from speechbrain.inference.TTS import FastSpeech2
from speechbrain.inference.vocoders import HIFIGAN
import os
import torch

# 載入 FastSpeech2 模型
fastspeech2 = FastSpeech2.from_hparams(
    source="speechbrain/tts-fastspeech2-ljspeech",
    savedir="pretrained_models/tts-fastspeech2-ljspeech",
    run_opts={"device":"cpu"}
)

# 載入聲碼器 (Vocoder)
hifi_gan = HIFIGAN.from_hparams(
    source="speechbrain/tts-hifigan-ljspeech",
    savedir="pretrained_models/tts-hifigan-ljspeech",
    run_opts={"device":"cpu"}
)

def synthesize_speech(text):
    """
    將輸入文字轉換為語音。
    """
    # 檢查輸入文字是否為空或只包含空白字元
    if not text or text.strip() == "":
        # 返回一個錯誤訊息或空音訊，而不是直接 None
        # Gradio 介面會顯示這個錯誤訊息
        return None, "請輸入有效的文字進行語音合成。"

    try:
        # 將文字編碼為梅爾頻譜圖
        mel_outputs, durations, pitch, energy = fastspeech2.encode_text(
            [text], pace=1.0
        )

        # 使用聲碼器將梅爾頻譜圖轉換為音訊波形
        wav = hifi_gan.decode_batch(mel_outputs).squeeze(1)

        # 將音訊張量轉換為 NumPy 陣列
        audio_numpy = wav.cpu().numpy().flatten()

        # 定義輸出檔案路徑
        output_file = "output.wav"

        # 將音訊保存為 WAV 檔案
        torchaudio.save(output_file, torch.tensor(audio_numpy).unsqueeze(0), 16000)

        return output_file, "語音合成成功！"

    except IndexError as e:
        # 捕獲特定的 IndexError，並提供更詳細的錯誤訊息
        return None, f"語音合成失敗：處理文字時發生錯誤 (IndexError)。請嘗試不同的文字。錯誤詳情: {e}"
    except Exception as e:
        # 捕獲其他所有可能的錯誤
        return None, f"語音合成失敗：發生未知錯誤。錯誤詳情: {e}"

# 創建 Gradio 介面
iface = gr.Interface(
    fn=synthesize_speech,
    inputs=gr.Textbox(lines=2, placeholder="請輸入您想要合成的文字..."),
    outputs=[
        gr.Audio(type="filepath", label="合成語音"),
        gr.Textbox(label="狀態訊息") # 新增一個文字框來顯示狀態或錯誤訊息
    ],
    title="FastSpeech2 文字轉語音 (CPU)",
    description="這是一個使用 SpeechBrain 的 FastSpeech2 模型在 Hugging Face Spaces 的 CPU 上進行文字轉語音的演示。由於在 CPU 上運行，合成速度可能會較慢。",
    examples=[
        ["你好，這是一個使用 FastSpeech2 進行語音合成的範例。"],
        ["Hugging Face Spaces 讓部署機器學習應用變得非常簡單。"],
        ["在 CPU 上運行大型模型可能需要一些時間。"]
    ]
)

# 啟動 Gradio 應用程式
iface.launch()