File size: 2,664 Bytes
dde59c5
8a8d79c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import numpy as np
import soundfile as sf

# DeepFilterNet2
from df.enhance import enhance, init_df

APP_TITLE = "Zack’s Audio Outpost — Voice Denoiser (DeepFilterNet2)"
APP_DESC = (
    "Upload a voice clip with traffic/hiss/room noise and compare Original vs Processed. "
    "Choose Light / Medium / Strong (1× / 2× / 3× passes)."
)

# Load DFN2 once (first run can take a few minutes while the Space installs packages)
MODEL_DF, DF_STATE, _ = init_df()

def _ensure_2d(x: np.ndarray) -> np.ndarray:
    """Make shape (samples, channels)."""
    if x.ndim == 1:
        x = x[:, None]
    return x

def _run_single_pass(stereo: np.ndarray) -> np.ndarray:
    """Run DFN2 per channel; keep same length/channels."""
    out = np.zeros_like(stereo, dtype=np.float32)
    for ch in range(stereo.shape[1]):
        y = enhance(stereo[:, ch].astype(np.float32),
                    DF_STATE, model=MODEL_DF, atten_lim_db=12.0)
        out[:len(y), ch] = y[:stereo.shape[0]]
    return out

def process(file_obj, strength):
    if file_obj is None:
        raise gr.Error("Please upload an audio file first.")

    # Load original audio (mono or stereo)
    audio, sr = sf.read(file_obj.name, always_2d=False)
    x = _ensure_2d(audio.astype(np.float32))

    # Map UI strength to number of passes
    passes = {"Light": 1, "Medium": 2, "Strong": 3}[strength]

    y = x.copy()
    for _ in range(passes):
        y = _run_single_pass(y)

    # Avoid clipping if multi-pass pushed levels
    y = np.clip(y, -1.0, 1.0)

    # Gradio wants (sr, np.array). If mono, squeeze back to 1D
    return (sr, audio), (sr, y.squeeze())

THEME = gr.themes.Soft(primary_hue="cyan", neutral_hue="slate").set(
    body_background_fill="#0b1020",
    body_text_color="#e6ecff",
    block_background_fill="#121830",
    block_border_color="#243154",
    button_primary_background_fill="#3dd6ff",
    button_primary_text_color="#001018",
    input_background_fill="#0e1530",
    input_border_color="#243154",
)

with gr.Blocks(title=APP_TITLE, theme=THEME) as demo:
    gr.Markdown(f"## {APP_TITLE}\n{APP_DESC}")
    with gr.Row():
        file = gr.File(label="Upload audio", file_types=["audio"])
        strength = gr.Radio(["Light","Medium","Strong"], value="Medium",
                            label="Noise reduction strength")
    run = gr.Button("Process", variant="primary")

    with gr.Row():
        a_orig = gr.Audio(label="Original (A)", interactive=False)
        a_proc = gr.Audio(label="Processed (B)", interactive=False)

    run.click(process, inputs=[file, strength], outputs=[a_orig, a_proc])

if __name__ == "__main__":
    demo.launch()