Spaces:

soiz
/

voice-to-pth

Running

soiz commited on Nov 11, 2024

Commit

7fffcc0

verified ·

1 Parent(s): fddc52d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,13 +1,20 @@
 import gradio as gr
 import torch
 import librosa
 def audio_to_pth(audio):
     # 音声ファイル（ファイルパス）を読み込む
     y, sr = librosa.load(audio, sr=None)
-    # 音声データをテンソルに変換
-    tensor = torch.tensor(y)
     # テンソルを .pth ファイルに保存
     output_path = "audio_features.pth"
@@ -18,10 +25,10 @@ def audio_to_pth(audio):
 # Gradio インターフェースの設定
 iface = gr.Interface(
     fn=audio_to_pth,
-    inputs=gr.Audio(type="filepath"),  # `source="upload"` は不要です
     outputs="file",
     title="Audio to .PTH Converter",
-    description="Upload an audio file to convert it to a .pth file containing audio features."
 )
 iface.launch()

 import gradio as gr
 import torch
 import librosa
+import numpy as np
 def audio_to_pth(audio):
     # 音声ファイル（ファイルパス）を読み込む
     y, sr = librosa.load(audio, sr=None)
+    # メルスペクトログラムに変換
+    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
+    # メルスペクトログラムを対数スケールに変換（TTSモデルに適した形式）
+    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
+    # メルスペクトログラムをテンソルに変換
+    tensor = torch.tensor(mel_spectrogram_db)
     # テンソルを .pth ファイルに保存
     output_path = "audio_features.pth"
 # Gradio インターフェースの設定
 iface = gr.Interface(
     fn=audio_to_pth,
+    inputs=gr.Audio(type="filepath"),
     outputs="file",
     title="Audio to .PTH Converter",
+    description="Upload an audio file to convert it to a .pth file containing audio features in mel spectrogram format."
 )
 iface.launch()