#!/usr/bin/env python3 """ Madverse Music - Hugging Face Spaces Version Streamlit app for HF Spaces deployment """ import streamlit as st import torch import librosa import tempfile import os import time import numpy as np # Import the sonics library for model loading try: from sonics import HFAudioClassifier except ImportError: st.error("Sonics library not found. Please install it first.") st.stop() # Global model variable model = None # Page configuration st.set_page_config( page_title="Madverse Music: AI Music Detector", page_icon="🎵", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_model(): """Load the model with caching for HF Spaces""" try: with st.spinner("Loading AI model... This may take a moment..."): # Use the same loading method as the working API model = HFAudioClassifier.from_pretrained("awsaf49/sonics-spectttra-alpha-120s") model.eval() return model except Exception as e: st.error(f"Failed to load model: {str(e)}") return None def process_audio(audio_file, model): """Process audio file and return classification""" try: # Save uploaded file temporarily with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: tmp_file.write(audio_file.read()) tmp_path = tmp_file.name # Load audio (model uses 16kHz sample rate) audio, sr = librosa.load(tmp_path, sr=16000) # Convert to tensor and add batch dimension audio_tensor = torch.FloatTensor(audio).unsqueeze(0) # Get prediction using the same pattern as working API with torch.no_grad(): output = model(audio_tensor) # Convert logit to probability using sigmoid probability = torch.sigmoid(output).item() # Classify: prob < 0.5 = Real, prob >= 0.5 = Fake if probability < 0.5: classification = "Real" confidence = (1 - probability) * 2 # Convert to 0-1 scale else: classification = "Fake" confidence = (probability - 0.5) * 2 # Convert to 0-1 scale # Calculate duration duration = len(audio) / sr # Clean up os.unlink(tmp_path) return { 'classification': classification, 'confidence': min(confidence, 1.0), # Cap at 1.0 'probability': probability, 'raw_score': output.item(), 'duration': duration, 'success': True } except Exception as e: # Clean up on error if 'tmp_path' in locals(): try: os.unlink(tmp_path) except: pass return { 'success': False, 'error': str(e) } def main(): # Header st.markdown("""

Madverse Music: AI Music Detector

Detect AI-generated music vs human-created music using advanced AI technology

""", unsafe_allow_html=True) # Sidebar with st.sidebar: st.markdown("### About") st.markdown(""" This AI model can detect whether music is: - **Real**: Human-created music - **Fake**: AI-generated music (Suno, Udio, etc.) **Model**: SpecTTTra-α (120s) **Accuracy**: 97% F1 score **Max Duration**: 120 seconds """) st.markdown("### Supported Formats") st.markdown("- WAV (.wav)") st.markdown("- MP3 (.mp3)") st.markdown("- FLAC (.flac)") st.markdown("- M4A (.m4a)") st.markdown("- OGG (.ogg)") st.markdown("### Links") st.markdown("- [Madverse Website](https://madverse.co)") st.markdown("- [GitHub Repository](#)") # Load model model = load_model() if model is None: st.error("Model failed to load. Please refresh the page.") return st.success("AI model loaded successfully!") # File upload st.markdown("### Upload Audio File") uploaded_file = st.file_uploader( "Choose an audio file", type=['wav', 'mp3', 'flac', 'm4a', 'ogg'], help="Upload an audio file to analyze (max 120 seconds)" ) if uploaded_file is not None: # Display file info st.markdown("### File Information") col1, col2, col3 = st.columns(3) with col1: st.metric("Filename", uploaded_file.name) with col2: st.metric("File Size", f"{uploaded_file.size / 1024:.1f} KB") with col3: st.metric("Format", uploaded_file.type) # Audio player st.markdown("### Preview") st.audio(uploaded_file) # Analysis button if st.button("Analyze Audio", type="primary", use_container_width=True): try: with st.spinner("Analyzing audio... This may take a few seconds..."): # Reset file pointer uploaded_file.seek(0) # Process audio start_time = time.time() result = process_audio(uploaded_file, model) processing_time = time.time() - start_time if not result['success']: st.error(f"Error processing audio: {result['error']}") return # Display results st.markdown("### Analysis Results") classification = result['classification'] confidence = result['confidence'] # Result box if classification == "Real": st.markdown(f"""

Result: Human-Created Music

Classification: {classification}

Confidence: {confidence:.1%}

Message: This appears to be human-created music!

""", unsafe_allow_html=True) else: st.markdown(f"""

Result: AI-Generated Music

Classification: {classification}

Confidence: {confidence:.1%}

Message: This appears to be AI-generated music!

""", unsafe_allow_html=True) # Detailed metrics with st.expander("Detailed Metrics"): col1, col2, col3 = st.columns(3) with col1: st.metric("Confidence", f"{confidence:.1%}") with col2: st.metric("Probability", f"{result['probability']:.3f}") with col3: st.metric("Processing Time", f"{processing_time:.2f}s") if result['duration'] > 0: st.metric("Duration", f"{result['duration']:.1f}s") st.markdown("**Interpretation:**") st.markdown(""" - **Probability < 0.5**: Classified as Real (human-created) - **Probability ≥ 0.5**: Classified as Fake (AI-generated) - **Confidence**: How certain the model is about its prediction """) except Exception as e: st.error(f"Error processing audio: {str(e)}") # Footer st.markdown("---") st.markdown("""

This tool is for research and educational purposes. Results may vary depending on audio quality.

""", unsafe_allow_html=True) if __name__ == "__main__": main()