DeepLearning101 commited on
Commit
631422a
·
verified ·
1 Parent(s): 2e23895

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -12,16 +12,28 @@ modelpath = './denoiser/master64.th'
12
 
13
  def transcribe(file_upload, microphone):
14
  file = microphone if microphone is not None else file_upload
 
 
15
  model = Demucs(hidden=64)
16
  state_dict = torch.load(modelpath, map_location='cpu')
17
  model.load_state_dict(state_dict)
18
- demucs = model
19
- x, sr = torchaudio.load(file)
20
- out = demucs(x[None])[0]
 
 
 
 
 
 
 
21
  out = out / max(out.abs().max().item(), 1)
22
  torchaudio.save('enhanced.wav', out, sr)
23
- enhanced = AudioSegment.from_wav('enhanced.wav') # 只有去完噪的需要降 bitrate 再做語音識別
 
 
24
  enhanced.export('enhanced.wav', format="wav", bitrate="256k")
 
25
  return "enhanced.wav"
26
 
27
  # import os
 
12
 
13
  def transcribe(file_upload, microphone):
14
  file = microphone if microphone is not None else file_upload
15
+
16
+ # 載入模型
17
  model = Demucs(hidden=64)
18
  state_dict = torch.load(modelpath, map_location='cpu')
19
  model.load_state_dict(state_dict)
20
+
21
+ # 載入音訊並強制轉單聲道
22
+ x, sr = torchaudio.load(file, channels_first=True) # 確保通道優先格式
23
+ if x.shape[0] > 1:
24
+ x = torch.mean(x, dim=0, keepdim=True) # 平均所有通道轉單聲道
25
+
26
+ # 執行降噪
27
+ out = model(x[None])[0] # 增加batch維度
28
+
29
+ # 後處理
30
  out = out / max(out.abs().max().item(), 1)
31
  torchaudio.save('enhanced.wav', out, sr)
32
+
33
+ # 降低位元率(僅供語音辨識使用)
34
+ enhanced = AudioSegment.from_wav('enhanced.wav')
35
  enhanced.export('enhanced.wav', format="wav", bitrate="256k")
36
+
37
  return "enhanced.wav"
38
 
39
  # import os