Project Beatrice
Initial commit
5124d5d
import gradio as gr
import librosa
import torch
from src import CandidateGenerator
device = "cuda" if torch.cuda.is_available() else "cpu"
candidate_generator = CandidateGenerator(device)
def process_audio_text(audio_file, text, num_candidates):
if audio_file is None or text.strip() == "":
return "音声とテキストの両方が必要です。", "", ""
audio_16khz, _ = librosa.load(audio_file, sr=16000)
results = candidate_generator.generate(text, audio_16khz, int(num_candidates))
candidates_output = []
for candidate in results["candidates"]:
candidates_output.append(
f"mecab_cost: {candidate['mecab_cost']}, "
f"ctc_loss: {candidate['ctc_loss']:.3f}, "
f"phonemes: {' '.join(candidate['phonemes'])}"
)
candidates_str = "\n".join(candidates_output)
hubert_pred = results["hubert_prediction"]
hubert_output = (
f"ctc_loss: {hubert_pred['ctc_loss']:.3f}, "
f"phonemes: {' '.join(hubert_pred['phonemes'])}"
)
debug_output = repr(results)
return candidates_str, hubert_output, debug_output
interface = gr.Interface(
fn=process_audio_text,
inputs=[
gr.Audio(type="filepath", label="音声ファイル"),
gr.Textbox(
label="テキスト", placeholder="漢字仮名交じりのテキストを入力してください"
),
gr.Slider(minimum=1, maximum=20, value=10, step=1, label="MeCab の候補数"),
],
outputs=[
gr.Textbox(label="候補", lines=10),
gr.Textbox(label="HuBERT による予測", lines=1),
gr.Textbox(label="全ての結果", lines=20),
],
title="音声と漢字仮名交じりテキストからふりがなを推定するツール v2 (工事中)",
description="音素認識モデルと MeCab による読みの推定を行います。",
)
if __name__ == "__main__":
interface.launch()