Spaces:

juzer09
/

ai-music-detection

Sleeping

File size: 9,411 Bytes

f280f9f

#!/usr/bin/env python3
"""

Madverse Music - Hugging Face Spaces Version

Streamlit app for HF Spaces deployment

"""

import streamlit as st
import torch
import librosa
import tempfile
import os
import time
import numpy as np

# Import the sonics library for model loading
try:
    from sonics import HFAudioClassifier
except ImportError:
    st.error("Sonics library not found. Please install it first.")
    st.stop()

# Global model variable
model = None

# Page configuration
st.set_page_config(
    page_title="Madverse Music: AI Music Detector",
    page_icon="🎵",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS
st.markdown("""

<style>

    .main-header {

        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);

        padding: 1rem;

        border-radius: 10px;

        color: white;

        text-align: center;

        margin-bottom: 2rem;

    }

    .result-box {

        padding: 1rem;

        border-radius: 10px;

        margin: 1rem 0;

        border-left: 5px solid;

    }

    .real-music {

        background-color: #d4edda;

        border-left-color: #28a745;

    }

    .fake-music {

        background-color: #f8d7da;

        border-left-color: #dc3545;

    }

</style>

""", unsafe_allow_html=True)

@st.cache_resource
def load_model():
    """Load the model with caching for HF Spaces"""
    try:
        with st.spinner("Loading AI model... This may take a moment..."):
            # Use the same loading method as the working API
            model = HFAudioClassifier.from_pretrained("awsaf49/sonics-spectttra-alpha-120s")
            model.eval()
            return model
    except Exception as e:
        st.error(f"Failed to load model: {str(e)}")
        return None

def process_audio(audio_file, model):
    """Process audio file and return classification"""
    try:
        # Save uploaded file temporarily
        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
            tmp_file.write(audio_file.read())
            tmp_path = tmp_file.name
        
        # Load audio (model uses 16kHz sample rate)
        audio, sr = librosa.load(tmp_path, sr=16000)
        
        # Convert to tensor and add batch dimension
        audio_tensor = torch.FloatTensor(audio).unsqueeze(0)
        
        # Get prediction using the same pattern as working API
        with torch.no_grad():
            output = model(audio_tensor)
            
            # Convert logit to probability using sigmoid
            probability = torch.sigmoid(output).item()
            
            # Classify: prob < 0.5 = Real, prob >= 0.5 = Fake
            if probability < 0.5:
                classification = "Real"
                confidence = (1 - probability) * 2  # Convert to 0-1 scale
            else:
                classification = "Fake"
                confidence = (probability - 0.5) * 2  # Convert to 0-1 scale
        
        # Calculate duration
        duration = len(audio) / sr
        
        # Clean up
        os.unlink(tmp_path)
        
        return {
            'classification': classification,
            'confidence': min(confidence, 1.0),  # Cap at 1.0
            'probability': probability,
            'raw_score': output.item(),
            'duration': duration,
            'success': True
        }
        
    except Exception as e:
        # Clean up on error
        if 'tmp_path' in locals():
            try:
                os.unlink(tmp_path)
            except:
                pass
        return {
            'success': False,
            'error': str(e)
        }

def main():
    # Header
    st.markdown("""

    <div class="main-header">

        <h1>Madverse Music: AI Music Detector</h1>

        <p>Detect AI-generated music vs human-created music using advanced AI technology</p>

    </div>

    """, unsafe_allow_html=True)
    
    # Sidebar
    with st.sidebar:
        st.markdown("### About")
        st.markdown("""

        This AI model can detect whether music is:

        - **Real**: Human-created music

        - **Fake**: AI-generated music (Suno, Udio, etc.)

        

        **Model**: SpecTTTra-α (120s)  

        **Accuracy**: 97% F1 score  

        **Max Duration**: 120 seconds

        """)
        
        st.markdown("### Supported Formats")
        st.markdown("- WAV (.wav)")
        st.markdown("- MP3 (.mp3)")
        st.markdown("- FLAC (.flac)")
        st.markdown("- M4A (.m4a)")
        st.markdown("- OGG (.ogg)")
        
        st.markdown("### Links")
        st.markdown("- [Madverse Website](https://madverse.co)")
        st.markdown("- [GitHub Repository](#)")
    
    # Load model
    model = load_model()
    
    if model is None:
        st.error("Model failed to load. Please refresh the page.")
        return
    
    st.success("AI model loaded successfully!")
    
    # File upload
    st.markdown("### Upload Audio File")
    uploaded_file = st.file_uploader(
        "Choose an audio file",
        type=['wav', 'mp3', 'flac', 'm4a', 'ogg'],
        help="Upload an audio file to analyze (max 120 seconds)"
    )
    
    if uploaded_file is not None:
        # Display file info
        st.markdown("### File Information")
        col1, col2, col3 = st.columns(3)
        
        with col1:
            st.metric("Filename", uploaded_file.name)
        with col2:
            st.metric("File Size", f"{uploaded_file.size / 1024:.1f} KB")
        with col3:
            st.metric("Format", uploaded_file.type)
        
        # Audio player
        st.markdown("### Preview")
        st.audio(uploaded_file)
        
        # Analysis button
        if st.button("Analyze Audio", type="primary", use_container_width=True):
            try:
                with st.spinner("Analyzing audio... This may take a few seconds..."):
                    # Reset file pointer
                    uploaded_file.seek(0)
                    
                    # Process audio
                    start_time = time.time()
                    result = process_audio(uploaded_file, model)
                    processing_time = time.time() - start_time
                
                if not result['success']:
                    st.error(f"Error processing audio: {result['error']}")
                    return
                
                # Display results
                st.markdown("### Analysis Results")
                
                classification = result['classification']
                confidence = result['confidence']
                
                # Result box
                if classification == "Real":
                    st.markdown(f"""

                    <div class="result-box real-music">

                        <h3>Result: Human-Created Music</h3>

                        <p><strong>Classification:</strong> {classification}</p>

                        <p><strong>Confidence:</strong> {confidence:.1%}</p>

                        <p><strong>Message:</strong> This appears to be human-created music!</p>

                    </div>

                    """, unsafe_allow_html=True)
                else:
                    st.markdown(f"""

                    <div class="result-box fake-music">

                        <h3>Result: AI-Generated Music</h3>

                        <p><strong>Classification:</strong> {classification}</p>

                        <p><strong>Confidence:</strong> {confidence:.1%}</p>

                        <p><strong>Message:</strong> This appears to be AI-generated music!</p>

                    </div>

                    """, unsafe_allow_html=True)
                
                # Detailed metrics
                with st.expander("Detailed Metrics"):
                    col1, col2, col3 = st.columns(3)
                    
                    with col1:
                        st.metric("Confidence", f"{confidence:.1%}")
                    with col2:
                        st.metric("Probability", f"{result['probability']:.3f}")
                    with col3:
                        st.metric("Processing Time", f"{processing_time:.2f}s")
                    
                    if result['duration'] > 0:
                        st.metric("Duration", f"{result['duration']:.1f}s")
                    
                    st.markdown("**Interpretation:**")
                    st.markdown("""

                    - **Probability < 0.5**: Classified as Real (human-created)

                    - **Probability ≥ 0.5**: Classified as Fake (AI-generated)

                    - **Confidence**: How certain the model is about its prediction

                    """)
                
            except Exception as e:
                st.error(f"Error processing audio: {str(e)}")
    
    # Footer
    st.markdown("---")
    st.markdown("""

    <div style="text-align: center; color: #666;">

        <p>Powered by <strong>Madverse Music</strong> | Built with Streamlit & PyTorch</p>

        <p>This tool is for research and educational purposes. Results may vary depending on audio quality.</p>

    </div>

    """, unsafe_allow_html=True)

if __name__ == "__main__":
    main()