Spaces:

DeepLearning101
/

Speech-Quality-Inspection_Meta-Denoiser

Running

File size: 1,633 Bytes

06fc5c8
 
00885ea
 
 
 
 
 
 
0075f67
 
00885ea
 
 
0075f67
 
 
 
00885ea
0075f67
00885ea
 
e34c54b
00885ea
0075f67
00885ea
 
06fc5c8
 
e34c54b
 
06fc5c8
0075f67
00885ea
 
06fc5c8
00885ea
 
 
 
 
8581ee6
06fc5c8
 
0075f67

import os
import time
import json
import gradio as gr
import torch
import torchaudio
import numpy as np
from denoiser.demucs import Demucs
from pydub import AudioSegment

modelpath = './denoiser/master64.th'

def transcribe(file_upload, microphone):
    file = microphone if microphone is not None else file_upload
    model = Demucs(hidden=64)
    state_dict = torch.load(modelpath, map_location='cpu')
    model.load_state_dict(state_dict)
    demucs = model
    x, sr = torchaudio.load(file)
    out = demucs(x[None])[0]
    out = out / max(out.abs().max().item(), 1)
    torchaudio.save('enhanced.wav', out, sr)
    enhanced = AudioSegment.from_wav('enhanced.wav') # 只有去完噪的需要降 bitrate 再做語音識別
    enhanced.export('enhanced.wav', format="wav", bitrate="256k")
    return "enhanced.wav"

demo = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(type="filepath", label="語音質檢麥克風實時錄音"),        
        gr.Audio(type="filepath", label="語音質檢原始音檔"),
    ],
    outputs=gr.Audio(type="filepath", label="Output"),
    title="<p style='text-align: center'><a href='https://www.twman.org/AI' target='_blank'>語音質檢噪音去除 (語音增強)：Meta Denoiser</a>",
    description="為了提升語音識別的效果，可以在識別前先進行噪音去除",
    allow_flagging="never",
    examples=[
        ["exampleAudio/15s_2020-03-27_sep1.wav"],
        ["exampleAudio/13s_2020-03-27_sep2.wav"],
        ["exampleAudio/30s_2020-04-23_sep1.wav"],
        ["exampleAudio/15s_2020-04-23_sep2.wav"],
    ],
)

demo.launch(enable_queue=True, debug=True)