import gradio as gr import torchaudio import torch import numpy as np import os from huggingface_hub import hf_hub_download # HF Spaces doesn't need this, but keeps local compatibility # os.environ["GRADIO_TEMP_DIR"] = "/tmp/gradio_cache" # ✅ Updated list: only confirmed existing models RAVE_MODELS = { # Models from Intelligent-Instruments-Lab/rave-models "Electric Guitar (IIL)": ("Intelligent-Instruments-Lab/rave-models", "guitar_iil_b2048_r48000_z16.ts"), "Soprano Sax (IIL)": ("Intelligent-Instruments-Lab/rave-models", "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts"), "Organ (Archive IIL)": ("Intelligent-Instruments-Lab/rave-models", "organ_archive_b2048_r48000_z16.ts"), "Organ (Bach IIL)": ("Intelligent-Instruments-Lab/rave-models", "organ_bach_b2048_r48000_z16.ts"), "Magnetic Resonator Piano (IIL)": ("Intelligent-Instruments-Lab/rave-models", "mrp_strengjavera_b2048_r44100_z16.ts"), "Multi-Voice (IIL)": ("Intelligent-Instruments-Lab/rave-models", "voice-multi-b2048-r48000-z11.ts"), "Birds (Dawn Chorus IIL)": ("Intelligent-Instruments-Lab/rave-models", "birds_dawnchorus_b2048_r48000_z8.ts"), "Water (Pond Brain IIL)": ("Intelligent-Instruments-Lab/rave-models", "water_pondbrain_b2048_r48000_z16.ts"), "Marine Mammals (IIL)": ("Intelligent-Instruments-Lab/rave-models", "marinemammals_pondbrain_b2048_r48000_z20.ts"), # Models from shuoyang-zheng/jaspers-rave-models "Guitar Picking (Jasper Causal)": ("shuoyang-zheng/jaspers-rave-models", "guitar_picking_dm_b2048_r44100_z8_causal.ts"), "Singing Voice (Jasper Non-Causal)": ("shuoyang-zheng/jaspers-rave-models", "gtsinger_b2048_r44100_z16_noncausal.ts"), "Drums (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_drum_b2048_r44100_z16_noncausal.ts"), "Bass (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_bass_b2048_r44100_z16_noncausal.ts"), "Strings (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_string_b2048_r44100_z16_noncausal.ts"), "Speech (Jasper Causal)": ("shuoyang-zheng/jaspers-rave-models", "librispeech100_b2048_r44100_z8_causal.ts"), "Brass/Sax (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_brass_sax_b2048_r44100_z8_noncausal.ts"), # Model from lancelotblanchard/rave_percussion "Percussion (Lancelot)": ("lancelotblanchard/rave_percussion", "percussion.ts"), } MODEL_CACHE = {} print("🎛 RAVE Style Transfer - Starting up...") def load_rave_model(model_key): if model_key in MODEL_CACHE: return MODEL_CACHE[model_key] print(f"📥 Loading model: {model_key}...") try: repo_id, model_file_name = RAVE_MODELS[model_key] model_file = hf_hub_download(repo_id=repo_id, filename=model_file_name) model = torch.jit.load(model_file, map_location="cpu") model.eval() MODEL_CACHE[model_key] = model print(f"✅ Loaded: {model_key}") return model except Exception as e: print(f"❌ Error loading {model_key}: {str(e)}") raise def apply_rave(audio_path, model_name): """ Apply RAVE style transfer to audio. Returns tuple (sample_rate, numpy_array) for Gradio. """ if not audio_path: return None, "❌ Please upload an audio file." try: print(f"🎵 Processing audio: {os.path.basename(audio_path)} with {model_name}") # Load and preprocess audio waveform, sr = torchaudio.load(audio_path) print(f"📊 Original: {waveform.shape}, {sr}Hz") # Convert to mono if stereo if waveform.shape[0] > 1: print("🔄 Converting stereo to mono") waveform = torch.mean(waveform, dim=0, keepdim=True) # Resample to 48kHz if needed if sr != 48000: print(f"🔄 Resampling from {sr}Hz to 48000Hz") waveform = torchaudio.functional.resample(waveform, sr, 48000) sr = 48000 # Add batch dimension waveform = waveform.unsqueeze(0) # Load model and process model = load_rave_model(model_name) print("🤖 Applying RAVE transformation...") with torch.no_grad(): z = model.encode(waveform) processed = model.decode(z) # Prepare output processed = processed.squeeze(0) arr = processed.squeeze().cpu().numpy() print("✅ Transformation complete!") return (sr, arr), "✅ Style transfer successful!" except Exception as e: error_msg = f"❌ Error: {str(e)}" print(error_msg) return None, error_msg # --- Gradio UI --- print("🚀 Creating Gradio interface...") with gr.Blocks(theme=gr.themes.Soft(), title="RAVE Style Transfer") as demo: gr.Markdown("# 🎛 RAVE Style Transfer Stem Remixer") gr.Markdown("Transform your audio using AI-powered style transfer. Upload audio and choose an instrument style!") with gr.Row(): with gr.Column(): audio_input = gr.Audio( type="filepath", label="🎵 Upload Your Audio", sources=["upload", "microphone"] ) model_selector = gr.Dropdown( choices=list(RAVE_MODELS.keys()), label="🎸 Select Instrument Style", value="Electric Guitar (IIL)", interactive=True ) process_btn = gr.Button("🔄 Apply RAVE Transform", variant="primary", size="lg") with gr.Column(): output_audio = gr.Audio( type="numpy", label="🎧 Transformed Audio" ) status_output = gr.Textbox( label="📊 Status", interactive=False, value="Ready to transform audio..." ) process_btn.click( fn=apply_rave, inputs=[audio_input, model_selector], outputs=[output_audio, status_output] ) gr.Markdown("---") gr.Markdown( "

" "Powered by RAVE (Realtime Audio Variational autoEncoder) | " "Models from Intelligent Instruments Lab & Community" "

" ) print("🌐 Launching demo...") if __name__ == "__main__": demo.launch()