ahk-d commited on
Commit
f67b703
Β·
verified Β·
1 Parent(s): c1263a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torchaudio
3
+ import torch
4
+ import os
5
+ from rave import RAVE # Assuming rave.py or pip package is available
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ # βœ… Available RAVE models (can expand dynamically from HF repo)
9
+ RAVE_MODELS = {
10
+ "Guitar": "guitar_iil_b2048_r48000_z16.ts",
11
+ "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
12
+ "Organ (Archive)": "organ_archive_b2048_r48000_z16.ts",
13
+ "Organ (Bach)": "organ_bach_b2048_r48000_z16.ts",
14
+ "Voice Multivoice": "voice-multi-b2048-r48000-z11.ts",
15
+ "Birds Dawn Chorus": "birds_dawnchorus_b2048_r48000_z8.ts",
16
+ "Magnets": "magnets_b2048_r48000_z8.ts",
17
+ "Whale Songs": "humpbacks_pondbrain_b2048_r48000_z20.ts"
18
+ }
19
+
20
+ MODEL_CACHE = {}
21
+
22
+ def load_rave_model(model_name):
23
+ """Load a RAVE model from Hugging Face or cache."""
24
+ if model_name in MODEL_CACHE:
25
+ return MODEL_CACHE[model_name]
26
+
27
+ model_file = hf_hub_download(
28
+ repo_id="Intelligent-Instruments-Lab/rave-models",
29
+ filename=RAVE_MODELS[model_name]
30
+ )
31
+
32
+ model = RAVE.load(model_file) # RAVE.load assumes wrapper for loading .ts file
33
+ model.eval()
34
+ MODEL_CACHE[model_name] = model
35
+ return model
36
+
37
+ def apply_rave(audio, model_name):
38
+ """Apply selected RAVE style transfer model to uploaded audio."""
39
+ model = load_rave_model(model_name)
40
+
41
+ # Convert numpy audio (from Gradio) to torch tensor
42
+ audio_tensor = torch.tensor(audio[0]).unsqueeze(0) # [1, samples]
43
+ sr = audio[1]
44
+
45
+ if sr != 48000:
46
+ audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
47
+ sr = 48000
48
+
49
+ # Pass through model (encode -> decode)
50
+ with torch.no_grad():
51
+ z = model.encode(audio_tensor)
52
+ processed_audio = model.decode(z)
53
+
54
+ processed_audio = processed_audio.squeeze().cpu().numpy()
55
+ return (processed_audio, sr)
56
+
57
+ # πŸŽ› Gradio Interface
58
+ with gr.Blocks() as demo:
59
+ gr.Markdown("## πŸŽ› RAVE Style Transfer on Stems")
60
+ gr.Markdown("Upload audio, select a RAVE model, and get a transformed version.")
61
+
62
+ with gr.Row():
63
+ audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
64
+ model_selector = gr.Dropdown(list(RAVE_MODELS.keys()), label="Select Style", value="Guitar")
65
+
66
+ with gr.Row():
67
+ output_audio = gr.Audio(type="numpy", label="Transformed Audio")
68
+
69
+ # API + UI trigger
70
+ process_btn = gr.Button("Apply Style Transfer")
71
+ process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)
72
+
73
+ demo.launch()