Update app.py
Browse files
app.py
CHANGED
@@ -1,116 +1,92 @@
|
|
1 |
-
import os
|
2 |
import numpy as np
|
3 |
import gradio as gr
|
4 |
import soundfile as sf
|
|
|
5 |
import torch
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
import torchaudio
|
11 |
-
import torchaudio.functional as AF
|
12 |
-
except Exception:
|
13 |
-
USE_TORCHAUDIO = False
|
14 |
-
from scipy.signal import resample_poly
|
15 |
-
|
16 |
-
# SpeechBrain MetricGAN+ enhancement (CPU)
|
17 |
-
from speechbrain.pretrained import SpectralMaskEnhancement
|
18 |
-
|
19 |
-
torch.set_num_threads(1)
|
20 |
DEVICE = "cpu"
|
21 |
-
|
22 |
-
|
23 |
-
# Load the enhancer once
|
24 |
-
ENHANCER = SpectralMaskEnhancement.from_hparams(
|
25 |
-
source=MODEL_ID,
|
26 |
-
savedir="pretrained_metricganp",
|
27 |
-
run_opts={"device": DEVICE}
|
28 |
-
)
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
def _to_mono(x: np.ndarray) -> np.ndarray:
|
33 |
-
# x shape: (
|
34 |
if x.ndim == 2 and x.shape[1] > 1:
|
35 |
-
|
36 |
-
|
|
|
37 |
|
38 |
def _resample(x: np.ndarray, sr_in: int, sr_out: int) -> np.ndarray:
|
39 |
if sr_in == sr_out:
|
40 |
-
return x
|
41 |
-
if USE_TORCHAUDIO:
|
42 |
-
with torch.no_grad():
|
43 |
-
t = torch.from_numpy(x).unsqueeze(0) # (1, time)
|
44 |
-
y = AF.resample(t, orig_freq=sr_in, new_freq=sr_out)
|
45 |
-
return y.squeeze(0).cpu().numpy().astype(np.float32)
|
46 |
-
# SciPy fall-back
|
47 |
g = np.gcd(sr_in, sr_out)
|
48 |
up, down = sr_out // g, sr_in // g
|
49 |
y = resample_poly(x, up, down).astype(np.float32)
|
50 |
return y
|
51 |
|
52 |
def _mix(dry: np.ndarray, wet: np.ndarray, strength: str) -> np.ndarray:
|
53 |
-
# Light / Medium / Strong → wet mix amounts
|
54 |
mix = {"Light": 0.4, "Medium": 0.7, "Strong": 1.0}.get(strength, 0.7)
|
55 |
-
# pad/truncate to the same length
|
56 |
n = min(len(dry), len(wet))
|
57 |
-
|
58 |
-
return out
|
59 |
|
60 |
def denoise(audio: tuple, strength: str):
|
61 |
-
"""
|
62 |
-
Gradio passes (sr, np.ndarray[int16/float32, shape=(n,) or (n, ch)]) when type='numpy'
|
63 |
-
Return the processed audio as (sr, np.ndarray[float32]).
|
64 |
-
"""
|
65 |
if audio is None:
|
66 |
raise gr.Error("Please upload an audio file.")
|
|
|
67 |
sr, data = audio
|
68 |
-
|
69 |
-
data = np.array(data, dtype=np.float32)
|
70 |
|
71 |
-
#
|
72 |
-
|
73 |
-
x_mono = np.clip(x_mono, -1.0, 1.0).astype(np.float32)
|
74 |
|
75 |
-
#
|
76 |
-
|
77 |
|
78 |
-
#
|
|
|
79 |
with torch.no_grad():
|
80 |
-
|
81 |
-
|
82 |
-
enhanced = ENHANCER.enhance_batch(inp, TARGET_SR)
|
83 |
if isinstance(enhanced, torch.Tensor):
|
84 |
enhanced = enhanced.squeeze(0).cpu().numpy().astype(np.float32)
|
85 |
|
86 |
-
#
|
87 |
-
|
88 |
-
|
89 |
-
# Mix according to strength (preserve dry transients)
|
90 |
-
y = _mix(dry=x_mono, wet=enhanced_sr, strength=strength)
|
91 |
-
|
92 |
-
# Return as mono track at original sr
|
93 |
-
return (sr, y.astype(np.float32))
|
94 |
|
|
|
|
|
|
|
95 |
|
96 |
-
#
|
97 |
-
with gr.Blocks(theme=gr.themes.Soft(), css="footer
|
98 |
gr.Markdown("### Zack’s Audio Outpost — AI Noise Reducer\nUpload a file and compare **Original vs Processed**.")
|
99 |
with gr.Row():
|
100 |
audio_in = gr.Audio(type="numpy", label="Upload Audio")
|
101 |
-
strength = gr.Radio(["Light",
|
102 |
run_btn = gr.Button("Run Noise Reduction", variant="primary")
|
103 |
with gr.Row():
|
104 |
orig = gr.Audio(label="Original")
|
105 |
-
|
106 |
|
107 |
-
def run(audio,
|
108 |
if audio is None:
|
109 |
raise gr.Error("Please upload an audio file.")
|
110 |
-
sr,
|
111 |
-
|
112 |
-
return (sr,
|
113 |
|
114 |
-
run_btn.click(
|
115 |
|
116 |
demo.launch()
|
|
|
|
|
1 |
import numpy as np
|
2 |
import gradio as gr
|
3 |
import soundfile as sf
|
4 |
+
from scipy.signal import resample_poly
|
5 |
import torch
|
6 |
|
7 |
+
# Lazy import to avoid failing at build time
|
8 |
+
ENHANCER = None
|
9 |
+
TARGET_SR = 16000 # MetricGAN+ expects 16 kHz
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
DEVICE = "cpu"
|
11 |
+
torch.set_num_threads(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
def get_enhancer():
|
14 |
+
global ENHANCER
|
15 |
+
if ENHANCER is None:
|
16 |
+
from speechbrain.pretrained import SpectralMaskEnhancement
|
17 |
+
ENHANCER = SpectralMaskEnhancement.from_hparams(
|
18 |
+
source="speechbrain/metricgan-plus-voicebank",
|
19 |
+
savedir="pretrained_metricganp",
|
20 |
+
run_opts={"device": DEVICE}
|
21 |
+
)
|
22 |
+
return ENHANCER
|
23 |
|
24 |
def _to_mono(x: np.ndarray) -> np.ndarray:
|
25 |
+
# x shape: (n,) or (n, ch); keep as float32 in [-1,1]
|
26 |
if x.ndim == 2 and x.shape[1] > 1:
|
27 |
+
x = np.mean(x, axis=1)
|
28 |
+
x = np.asarray(x, dtype=np.float32)
|
29 |
+
return np.clip(x, -1.0, 1.0)
|
30 |
|
31 |
def _resample(x: np.ndarray, sr_in: int, sr_out: int) -> np.ndarray:
|
32 |
if sr_in == sr_out:
|
33 |
+
return x.astype(np.float32, copy=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
g = np.gcd(sr_in, sr_out)
|
35 |
up, down = sr_out // g, sr_in // g
|
36 |
y = resample_poly(x, up, down).astype(np.float32)
|
37 |
return y
|
38 |
|
39 |
def _mix(dry: np.ndarray, wet: np.ndarray, strength: str) -> np.ndarray:
|
|
|
40 |
mix = {"Light": 0.4, "Medium": 0.7, "Strong": 1.0}.get(strength, 0.7)
|
|
|
41 |
n = min(len(dry), len(wet))
|
42 |
+
return dry[:n] * (1.0 - mix) + wet[:n] * mix
|
|
|
43 |
|
44 |
def denoise(audio: tuple, strength: str):
|
|
|
|
|
|
|
|
|
45 |
if audio is None:
|
46 |
raise gr.Error("Please upload an audio file.")
|
47 |
+
|
48 |
sr, data = audio
|
49 |
+
data = np.asarray(data) # gradio sometimes gives list
|
|
|
50 |
|
51 |
+
# to mono + float32
|
52 |
+
dry_mono = _to_mono(data)
|
|
|
53 |
|
54 |
+
# resample to 16k
|
55 |
+
x16 = _resample(dry_mono, sr_in=sr, sr_out=TARGET_SR)
|
56 |
|
57 |
+
# run enhancer (lazy load)
|
58 |
+
enhancer = get_enhancer()
|
59 |
with torch.no_grad():
|
60 |
+
inp = torch.from_numpy(x16).unsqueeze(0) # (1, time)
|
61 |
+
enhanced = enhancer.enhance_batch(inp, TARGET_SR)
|
|
|
62 |
if isinstance(enhanced, torch.Tensor):
|
63 |
enhanced = enhanced.squeeze(0).cpu().numpy().astype(np.float32)
|
64 |
|
65 |
+
# back to original SR
|
66 |
+
enh_sr = _resample(enhanced, sr_in=TARGET_SR, sr_out=sr)
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
+
# wet/dry
|
69 |
+
out = _mix(dry_mono, enh_sr, strength)
|
70 |
+
return (sr, out.astype(np.float32))
|
71 |
|
72 |
+
# -------- UI --------
|
73 |
+
with gr.Blocks(theme=gr.themes.Soft(), css="footer{visibility:hidden}") as demo:
|
74 |
gr.Markdown("### Zack’s Audio Outpost — AI Noise Reducer\nUpload a file and compare **Original vs Processed**.")
|
75 |
with gr.Row():
|
76 |
audio_in = gr.Audio(type="numpy", label="Upload Audio")
|
77 |
+
strength = gr.Radio(["Light","Medium","Strong"], value="Medium", label="Noise Reduction Strength")
|
78 |
run_btn = gr.Button("Run Noise Reduction", variant="primary")
|
79 |
with gr.Row():
|
80 |
orig = gr.Audio(label="Original")
|
81 |
+
proc = gr.Audio(label="Processed")
|
82 |
|
83 |
+
def run(audio, s):
|
84 |
if audio is None:
|
85 |
raise gr.Error("Please upload an audio file.")
|
86 |
+
sr, x = audio
|
87 |
+
y = denoise(audio, s)
|
88 |
+
return (sr, x), y
|
89 |
|
90 |
+
run_btn.click(run, [audio_in, strength], [orig, proc])
|
91 |
|
92 |
demo.launch()
|