Spaces:

smokxy
/

Nature-Nexus

Sleeping

App Files Files Community

Vector73 commited on Apr 10

Commit

01e938d

1 Parent(s): 8c38d83

Add audio model.

Browse files

Files changed (7) hide show

.gitignore +1 -0
app.py +270 -13
models/best_model.pth +3 -0
prediction_engine.py +157 -0
requirements.txt +4 -1
utils/audio_model.py +76 -0
utils/audio_processing.py +42 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ **/__pycache__

app.py CHANGED Viewed

@@ -2,25 +2,109 @@ import streamlit as st
 import cv2
 import numpy as np
 import os
 from PIL import Image
 import torch
-from predict import load_onnx_model
 from utils.helpers import calculate_deforestation_metrics, create_overlay
 torch.classes.__path__ = []
 # Set page config
-st.set_page_config(page_title="Deforestation Detection", page_icon="🌳", layout="wide")
-# Set constants
-MODEL_INPUT_SIZE = 256  # The size our model expects
-# Load ONNX model
 @st.cache_resource
-def load_cached_onnx_model():
     model_path = "models/deforestation_model.onnx"
-    return load_onnx_model(model_path, input_size=MODEL_INPUT_SIZE)
 def process_image(model, image):
     """Process a single image and return results"""
     # Save original image dimensions for display
@@ -43,24 +127,121 @@ def process_image(model, image):
     return binary_mask, overlay, metrics
-def main():
     # App title and description
     st.title("🌳 Deforestation Detection")
     st.markdown(
         """
-    This app detects areas of deforestation in satellite or aerial images of forests.
-    Upload an image to get started!
-    """
     )
     # Model info
     st.info(
-        f"⚙️ Model optimized for {MODEL_INPUT_SIZE}x{MODEL_INPUT_SIZE} pixel images using ONNX runtime"
     )
     # Load model
     try:
-        model = load_cached_onnx_model()
     except Exception as e:
         st.error(f"Error loading model: {e}")
         st.info(
@@ -139,5 +320,81 @@ def main():
             except Exception as e:
                 st.error(f"Error during processing: {e}")
 if __name__ == "__main__":
     main()

 import cv2
 import numpy as np
 import os
+import tempfile
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
 from PIL import Image
 import torch
+# Import deforestation modules
+from prediction_engine import load_onnx_model
 from utils.helpers import calculate_deforestation_metrics, create_overlay
+# Import audio classification modules
+from utils.audio_processing import preprocess_audio
+from utils.audio_model import load_audio_model, predict_audio, class_names
+# Ensure torch classes path is initialized to avoid warnings
 torch.classes.__path__ = []
 # Set page config
+st.set_page_config(
+    page_title="Nature Nexus - Forest Surveillance",
+    page_icon="🌳",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Constants
+DEFOREST_MODEL_INPUT_SIZE = 256
+AUDIO_MODEL_PATH = "models/best_model.pth"
+# Initialize session state for navigation
+if 'current_service' not in st.session_state:
+    st.session_state.current_service = 'deforestation'
+if 'audio_input_method' not in st.session_state:
+    st.session_state.audio_input_method = 'upload'
+# Sidebar for navigation
+with st.sidebar:
+    st.title("Nature Nexus")
+    st.subheader("Forest Surveillance System")
+    selected_service = st.radio(
+        "Select Service:",
+        ["Deforestation Detection", "Forest Audio Surveillance"]
+    )
+    st.session_state.current_service = 'deforestation' if selected_service == "Deforestation Detection" else 'audio'
+    st.markdown("---")
+    # Service-specific sidebar content
+    if st.session_state.current_service == 'deforestation':
+        st.info(
+            """
+            **Deforestation Detection**
+            Upload satellite or aerial images to detect areas of deforestation.
+            """
+        )
+    else:
+        st.info(
+            """
+            **Forest Audio Surveillance**
+            Detect unusual human-related sounds in forested regions.
+            """
+        )
+        # Audio service specific controls
+        st.subheader("Audio Configuration")
+        audio_input_method = st.radio(
+            "Select Input Method:",
+            ("Upload Audio", "Record Audio"),
+            index=0 if st.session_state.audio_input_method == 'upload' else 1
+        )
+        st.session_state.audio_input_method = 'upload' if audio_input_method == "Upload Audio" else 'record'
+        # Audio class information
+        st.markdown("**Detection Classes:**")
+        # Group classes by category
+        human_sounds = ['footsteps', 'coughing', 'laughing', 'breathing',
+                       'drinking_sipping', 'snoring', 'sneezing']
+        tool_sounds = ['chainsaw', 'hand_saw']
+        vehicle_sounds = ['car_horn', 'engine', 'siren']
+        other_sounds = ['crackling_fire', 'fireworks']
+        st.markdown("👤 **Human Sounds:** " + ", ".join([s.capitalize() for s in human_sounds]))
+        st.markdown("🔨 **Tool Sounds:** " + ", ".join([s.capitalize() for s in tool_sounds]))
+        st.markdown("🚗 **Vehicle Sounds:** " + ", ".join([s.capitalize() for s in vehicle_sounds]))
+        st.markdown("💥 **Other Sounds:** " + ", ".join([s.capitalize() for s in other_sounds]))
+# Load deforestation model
 @st.cache_resource
+def load_cached_deforestation_model():
     model_path = "models/deforestation_model.onnx"
+    return load_onnx_model(model_path, input_size=DEFOREST_MODEL_INPUT_SIZE)
+# Load audio model
+@st.cache_resource
+def load_cached_audio_model():
+    return load_audio_model(AUDIO_MODEL_PATH)
+# Process image for deforestation detection
 def process_image(model, image):
     """Process a single image and return results"""
     # Save original image dimensions for display
     return binary_mask, overlay, metrics
+# Visualize audio for audio classification
+def visualize_audio(audio_path):
+    y, sr = librosa.load(audio_path, sr=16000)
+    duration = len(y) / sr
+    fig, ax = plt.subplots(2, 1, figsize=(10, 6))
+    # Waveform plot
+    librosa.display.waveshow(y, sr=sr, ax=ax[0])
+    ax[0].set_title('Audio Waveform')
+    ax[0].set_xlabel('Time (s)')
+    ax[0].set_ylabel('Amplitude')
+    # Spectrogram plot
+    S = librosa.feature.melspectrogram(y=y, sr=sr)
+    S_db = librosa.power_to_db(S, ref=np.max)
+    img = librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='mel', ax=ax[1])
+    fig.colorbar(img, ax=ax[1], format='%+2.0f dB')
+    ax[1].set_title('Mel Spectrogram')
+    plt.tight_layout()
+    st.pyplot(fig)
+    return y, sr, duration
+# Process audio for classification
+def process_audio(audio_file):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+        tmp_file.write(audio_file.read() if hasattr(audio_file, 'read') else audio_file)
+        audio_path = tmp_file.name
+    try:
+        # Load audio model
+        audio_model = load_cached_audio_model()
+        # Visualize audio
+        with st.spinner('Analyzing audio...'):
+            y, sr, duration = visualize_audio(audio_path)
+            st.caption(f"Audio duration: {duration:.2f} seconds")
+        # Make prediction
+        with st.spinner('Making prediction...'):
+            class_name, confidence = predict_audio(audio_path, audio_model)
+        # Display results
+        st.subheader("Detection Results")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.metric("Detected Sound", class_name.replace('_', ' ').title())
+        with col2:
+            st.metric("Confidence", f"{confidence*100:.2f}%")
+        # Show alerts based on class
+        human_sounds = ['footsteps', 'coughing', 'laughing', 'breathing',
+                      'drinking_sipping', 'snoring', 'sneezing']
+        tool_sounds = ['chainsaw', 'hand_saw']
+        if class_name in human_sounds:
+            st.warning("""
+            ⚠️ **Human Activity Detected!**
+            Potential human presence in the monitored area.
+            """)
+        elif class_name in tool_sounds:
+            st.error("""
+            🚨 **ALERT: Human Tool Detected!**
+            Potential illegal logging or activity detected. Consider immediate verification.
+            """)
+        elif class_name in ['car_horn', 'engine', 'siren']:
+            st.warning("""
+            ⚠️ **Vehicle Detected!**
+            Vehicle sounds detected in the monitored area.
+            """)
+        elif class_name == 'fireworks':
+            st.error("""
+            🚨 **ALERT: Fireworks Detected!**
+            Potential fire hazard and disturbance to wildlife. Immediate verification required.
+            """)
+        elif class_name == 'crackling_fire':
+            st.error("""
+            🚨 **ALERT: Fire Detected!**
+            Potential wildfire detected. Immediate verification required.
+            """)
+        else:
+            st.success("✅ Environmental sound detected - no immediate threat")
+    except Exception as e:
+        st.error(f"Error processing audio: {str(e)}")
+        st.exception(e)
+    finally:
+        # Clean up temp file
+        try:
+            os.unlink(audio_path)
+        except:
+            pass
+# Deforestation detection UI
+def show_deforestation_detection():
     # App title and description
     st.title("🌳 Deforestation Detection")
     st.markdown(
         """
+        This service detects areas of deforestation in satellite or aerial images of forests.
+        Upload an image to get started!
+        """
     )
     # Model info
     st.info(
+        f"⚙️ Model optimized for {DEFOREST_MODEL_INPUT_SIZE}x{DEFOREST_MODEL_INPUT_SIZE} pixel images using ONNX runtime"
     )
     # Load model
     try:
+        model = load_cached_deforestation_model()
     except Exception as e:
         st.error(f"Error loading model: {e}")
         st.info(
             except Exception as e:
                 st.error(f"Error during processing: {e}")
+# Audio classification UI
+def show_audio_classification():
+    # App title and description
+    st.title("🎧 Forest Audio Surveillance")
+    st.markdown("""
+    Detect unusual human-related sounds in forested regions to prevent illegal activities.
+    Supported sounds: {}
+    """.format(", ".join(class_names)))
+    if st.session_state.audio_input_method == 'upload':
+        st.header("Upload Audio File")
+        sample_col, upload_col = st.columns(2)
+        with sample_col:
+            st.info("Upload a WAV, MP3 or OGG file with forest sounds")
+            st.markdown("""
+            **Tips for best results:**
+            - Use audio with minimal background noise
+            - Ensure the sound of interest is clear
+            - 2-3 second clips work best
+            """)
+        with upload_col:
+            audio_file = st.file_uploader(
+                "Choose an audio file",
+                type=["wav", "mp3", "ogg"],
+                help="Supported formats: WAV, MP3, OGG"
+            )
+        if audio_file:
+            st.success("File uploaded successfully!")
+            with st.expander("Audio Preview", expanded=True):
+                st.audio(audio_file)
+            process_audio(audio_file)
+    else:  # Record mode
+        st.header("Record Live Audio")
+        st.info("""
+        Click the microphone button below to record a sound for analysis.
+        **Note:** Please ensure your browser has permission to access your microphone.
+        When prompted, click "Allow" to enable recording.
+        """)
+        recorded_audio = st.audio_input(
+            label="Record a sound",
+            key="audio_recorder",
+            help="Click to record forest sounds for analysis",
+            label_visibility="visible"
+        )
+        if recorded_audio:
+            st.success("Audio recorded successfully!")
+            with st.expander("Recorded Audio", expanded=True):
+                st.audio(recorded_audio)
+            process_audio(recorded_audio)
+        else:
+            st.write("Waiting for recording...")
+# Main function
+def main():
+    # Check which service is selected and render appropriate UI
+    if st.session_state.current_service == 'deforestation':
+        show_deforestation_detection()
+    else:
+        show_audio_classification()
+    # Footer
+    st.markdown("---")
+    st.markdown("""
+    <div style="text-align: center; padding: 10px;">
+        <p>Nature Nexus - Forest Surveillance System | 🌳 Protect Natural Ecosystems</p>
+        <p><small>Built with Streamlit and PyTorch</small></p>
+    </div>
+    """, unsafe_allow_html=True)
 if __name__ == "__main__":
     main()

models/best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fff4ca890869016c359ce0991e22c0df72bdaee45b4512f5252967fe44361095
+size 5148310

prediction_engine.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import torch
+import numpy as np
+import cv2
+import onnxruntime as ort
+from utils.preprocess import preprocess_image
+class PredictionEngine:
+    def __init__(self, model_path=None, use_onnx=True, input_size=256):
+        """
+        Initialize the prediction engine
+        Args:
+            model_path: Path to the model file (PyTorch or ONNX)
+            use_onnx: Whether to use ONNX runtime for inference
+            input_size: Input size for the model (default is 256)
+        """
+        self.use_onnx = use_onnx
+        self.input_size = input_size
+        if model_path:
+            if use_onnx:
+                self.model = self._load_onnx_model(model_path)
+            else:
+                self.model = self._load_pytorch_model(model_path)
+        else:
+            self.model = None
+    def _load_onnx_model(self, model_path):
+        """
+        Load an ONNX model
+        Args:
+            model_path: Path to the ONNX model
+        Returns:
+            ONNX Runtime InferenceSession
+        """
+        # Try with CUDA first, fall back to CPU if needed
+        try:
+            session = ort.InferenceSession(
+                model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+            )
+            print("ONNX model loaded with CUDA support")
+            return session
+        except Exception as e:
+            print(f"Could not load ONNX model with CUDA, falling back to CPU: {e}")
+            session = ort.InferenceSession(
+                model_path, providers=["CPUExecutionProvider"]
+            )
+            print("ONNX model loaded with CPU support")
+            return session
+    def _load_pytorch_model(self, model_path):
+        """
+        Load a PyTorch model
+        Args:
+            model_path: Path to the PyTorch model
+        Returns:
+            PyTorch model
+        """
+        from utils.model import load_model
+        return load_model(model_path)
+    def preprocess(self, image):
+        """
+        Preprocess an image for prediction
+        Args:
+            image: Input image (numpy array)
+        Returns:
+            Processed image suitable for the model
+        """
+        # Keep the original image for reference
+        self.original_shape = image.shape[:2]
+        # Preprocess image
+        if self.use_onnx:
+            # For ONNX, we need to ensure the input is exactly the expected size
+            tensor = preprocess_image(image, img_size=self.input_size)
+            return tensor.numpy()
+        else:
+            # For PyTorch
+            return preprocess_image(image, img_size=self.input_size)
+    def predict(self, image):
+        """
+        Make a prediction on an image
+        Args:
+            image: Input image (numpy array)
+        Returns:
+            Predicted mask
+        """
+        if self.model is None:
+            raise ValueError("Model not loaded. Initialize with a valid model path.")
+        # Preprocess the image
+        processed_input = self.preprocess(image)
+        # Run inference
+        if self.use_onnx:
+            # Get input and output names
+            input_name = self.model.get_inputs()[0].name
+            output_name = self.model.get_outputs()[0].name
+            # Run ONNX inference
+            outputs = self.model.run([output_name], {input_name: processed_input})
+            # Apply sigmoid to output
+            mask = 1 / (1 + np.exp(-outputs[0].squeeze()))
+        else:
+            # PyTorch inference
+            with torch.no_grad():
+                # Move to device
+                device = next(self.model.parameters()).device
+                processed_input = processed_input.to(device)
+                # Forward pass
+                output = self.model(processed_input)
+                output = torch.sigmoid(output)
+                # Convert to numpy
+                mask = output.cpu().numpy().squeeze()
+        return mask
+def load_pytorch_model(model_path):
+    """
+    Load the PyTorch model for prediction
+    Args:
+        model_path: Path to the PyTorch model
+    Returns:
+        PredictionEngine instance
+    """
+    return PredictionEngine(model_path, use_onnx=False)
+def load_onnx_model(model_path, input_size=256):
+    """
+    Load the ONNX model for prediction
+    Args:
+        model_path: Path to the ONNX model
+        input_size: Input size for the model
+    Returns:
+        PredictionEngine instance
+    """
+    return PredictionEngine(model_path, use_onnx=True, input_size=input_size)

requirements.txt CHANGED Viewed

@@ -10,4 +10,7 @@ scikit-learn
 matplotlib
 onnxruntime
 onnxruntime-gpu
-onnx

 matplotlib
 onnxruntime
 onnxruntime-gpu
+onnx
+librosa
+soundfile
+pydub

utils/audio_model.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch
+import numpy as np
+from utils.audio_processing import preprocess_audio
+class_names = [
+    'fireworks', 'chainsaw', 'footsteps', 'car_horn', 'crackling_fire',
+    'drinking_sipping', 'laughing', 'engine', 'breathing', 'hand_saw',
+    'coughing', 'snoring', 'sneezing', 'siren'
+]
+class AudioClassifier(torch.nn.Module):
+    def __init__(self, num_classes=14):
+        super().__init__()
+        self.features = torch.nn.Sequential(
+            torch.nn.Conv2d(1, 64, 3, padding=1),
+            torch.nn.BatchNorm2d(64),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(64, 64, 3, padding=1),
+            torch.nn.BatchNorm2d(64),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(2),
+            torch.nn.Dropout(0.2),
+            torch.nn.Conv2d(64, 128, 3, padding=1),
+            torch.nn.BatchNorm2d(128),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(128, 128, 3, padding=1),
+            torch.nn.BatchNorm2d(128),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(2),
+            torch.nn.Dropout(0.2),
+            torch.nn.Conv2d(128, 256, 3, padding=1),
+            torch.nn.BatchNorm2d(256),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(256, 256, 3, padding=1),
+            torch.nn.BatchNorm2d(256),
+            torch.nn.ReLU(),
+            torch.nn.MaxPool2d(2),
+            torch.nn.Dropout(0.2)
+        )
+        self.classifier = torch.nn.Sequential(
+            torch.nn.AdaptiveAvgPool2d(1),
+            torch.nn.Flatten(),
+            torch.nn.Linear(256, 256),
+            torch.nn.ReLU(),
+            torch.nn.Linear(256, 256),
+            torch.nn.ReLU(),
+            torch.nn.Linear(256, num_classes)
+        )
+    def forward(self, x):
+        x = self.features(x)
+        return self.classifier(x)
+def load_audio_model(model_path='models/audio_model.pth'):
+    model = AudioClassifier(len(class_names))
+    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
+    model.eval()
+    return model
+def predict_audio(audio_path, model):
+    # Preprocess audio
+    spec = preprocess_audio(audio_path)
+    # Convert to tensor
+    input_tensor = torch.FloatTensor(spec).unsqueeze(0)  # Add batch dimension
+    # Predict
+    with torch.no_grad():
+        outputs = model(input_tensor)
+        probabilities = torch.nn.functional.softmax(outputs, dim=1)
+    # Get results
+    pred_prob, pred_index = torch.max(probabilities, 1)
+    return class_names[pred_index.item()], pred_prob.item()

utils/audio_processing.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import librosa
+import numpy as np
+class AudioConfig:
+    sr = 16000
+    duration = 3
+    hop_length = 340 * duration
+    fmin = 20
+    fmax = sr // 2
+    n_mels = 128
+    n_fft = 128 * 20
+    samples = sr * duration
+def preprocess_audio(audio_path, config=None):
+    if config is None:
+        config = AudioConfig()
+    # Load audio
+    y, sr = librosa.load(audio_path, sr=config.sr)
+    # Trim or pad
+    if len(y) > config.samples:
+        y = y[:config.samples]
+    else:
+        padding = config.samples - len(y)
+        offset = padding // 2
+        y = np.pad(y, (offset, padding - offset), 'constant')
+    # Create mel spectrogram
+    spectrogram = librosa.feature.melspectrogram(
+        y=y,
+        sr=config.sr,
+        n_mels=config.n_mels,
+        hop_length=config.hop_length,
+        n_fft=config.n_fft,
+        fmin=config.fmin,
+        fmax=config.fmax
+    )
+    spectrogram = librosa.power_to_db(spectrogram)
+    # Return with correct shape for PyTorch (channels, height, width)
+    return spectrogram[np.newaxis, ...]