Update app.py
Browse files
app.py
CHANGED
@@ -1,79 +1,96 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
-
import
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
"
|
11 |
-
"Choose Light / Medium / Strong (1× / 2× / 3× passes)."
|
12 |
)
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
y
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
-
with gr.Blocks(title=APP_TITLE, theme=THEME) as demo:
|
65 |
-
gr.Markdown(f"## {APP_TITLE}\n{APP_DESC}")
|
66 |
with gr.Row():
|
67 |
-
|
68 |
-
strength = gr.Radio(["Light","Medium","Strong"], value="Medium",
|
69 |
-
label="Noise
|
70 |
-
|
|
|
71 |
|
72 |
with gr.Row():
|
73 |
-
|
74 |
-
|
75 |
|
76 |
-
run.click(
|
77 |
|
78 |
-
|
79 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
+
import torch
|
4 |
+
import torchaudio
|
5 |
+
from speechbrain.pretrained import SpectralMaskEnhancement
|
6 |
+
|
7 |
+
# Download once and cache in the Space
|
8 |
+
ENHANCER = SpectralMaskEnhancement.from_hparams(
|
9 |
+
source="speechbrain/metricgan-plus-voicebank",
|
10 |
+
savedir="pretrained/metricgan-plus-voicebank",
|
|
|
11 |
)
|
12 |
|
13 |
+
TARGET_SR = 16000 # model sample rate
|
14 |
+
|
15 |
+
def _to_tensor(mono_np: np.ndarray) -> torch.Tensor:
|
16 |
+
# ensure float32 [-1,1]
|
17 |
+
t = torch.from_numpy(mono_np.astype(np.float32))
|
18 |
+
peak = t.abs().max().clamp(min=1e-8)
|
19 |
+
return (t / peak)
|
20 |
+
|
21 |
+
def _enhance_channel(wav_np: np.ndarray, in_sr: int, mix: float) -> np.ndarray:
|
22 |
+
"""Enhance one channel and wet/dry mix."""
|
23 |
+
x = _to_tensor(wav_np)
|
24 |
+
if in_sr != TARGET_SR:
|
25 |
+
x16 = torchaudio.functional.resample(x, in_sr, TARGET_SR)
|
26 |
+
else:
|
27 |
+
x16 = x
|
28 |
+
|
29 |
+
with torch.no_grad():
|
30 |
+
# enhance_batch expects shape [B, T]
|
31 |
+
est16 = ENHANCER.enhance_batch(x16.unsqueeze(0), TARGET_SR)[0].squeeze(0)
|
32 |
+
|
33 |
+
# back to original sr
|
34 |
+
if in_sr != TARGET_SR:
|
35 |
+
est = torchaudio.functional.resample(est16, TARGET_SR, in_sr)
|
36 |
+
else:
|
37 |
+
est = est16
|
38 |
+
|
39 |
+
# trim/pad to original length
|
40 |
+
n = x.shape[0]
|
41 |
+
if est.shape[0] >= n:
|
42 |
+
est = est[:n]
|
43 |
+
else:
|
44 |
+
est = torch.nn.functional.pad(est, (0, n - est.shape[0]))
|
45 |
+
|
46 |
+
y = (1.0 - mix) * x + mix * est
|
47 |
+
return y.cpu().numpy()
|
48 |
+
|
49 |
+
def denoise(audio, strength):
|
50 |
+
"""
|
51 |
+
Gradio passes (sr, numpy) when type='numpy'.
|
52 |
+
numpy is shape [T] (mono) or [T, 2] (stereo).
|
53 |
+
We process mono or true stereo.
|
54 |
+
"""
|
55 |
+
if audio is None:
|
56 |
+
return None, None
|
57 |
+
|
58 |
+
sr, data = audio
|
59 |
+
if data.ndim == 1: # mono
|
60 |
+
chs = [data]
|
61 |
+
else: # stereo (T,2)
|
62 |
+
chs = [data[:, 0], data[:, 1]]
|
63 |
+
|
64 |
+
mix_map = {"Light": 0.5, "Medium": 0.75, "Strong": 1.0}
|
65 |
+
mix = mix_map.get(strength, 0.75)
|
66 |
+
|
67 |
+
out_chs = [ _enhance_channel(c, sr, mix) for c in chs ]
|
68 |
+
|
69 |
+
if len(out_chs) == 2:
|
70 |
+
processed = np.stack(out_chs, axis=1) # (T,2)
|
71 |
+
original = data
|
72 |
+
else:
|
73 |
+
processed = out_chs[0]
|
74 |
+
original = data
|
75 |
+
|
76 |
+
# Return both so users can A/B
|
77 |
+
return (sr, original), (sr, processed)
|
78 |
+
|
79 |
+
# -------- UI --------
|
80 |
+
with gr.Blocks(css="footer {visibility: hidden}") as demo:
|
81 |
+
gr.Markdown("## Zack’s Audio Outpost — AI Noise Reducer\nUpload a file and compare **Original** vs **Processed**.")
|
82 |
|
|
|
|
|
83 |
with gr.Row():
|
84 |
+
audio_in = gr.Audio(type="numpy", label="Upload Audio")
|
85 |
+
strength = gr.Radio(["Light", "Medium", "Strong"], value="Medium",
|
86 |
+
label="Noise Reduction Strength")
|
87 |
+
|
88 |
+
run = gr.Button("Run Noise Reduction", variant="primary")
|
89 |
|
90 |
with gr.Row():
|
91 |
+
out_orig = gr.Audio(label="Original Audio")
|
92 |
+
out_proc = gr.Audio(label="Processed Audio")
|
93 |
|
94 |
+
run.click(denoise, inputs=[audio_in, strength], outputs=[out_orig, out_proc])
|
95 |
|
96 |
+
demo.launch()
|
|