Spaces:

smokxy
/

Nature-Nexus

Sleeping

App Files Files Community

smokeyScraper commited on Apr 10

Commit

0e5cf7b

unverified ·

2 Parent(s): e3d6299 4655d1b

Merge pull request #2 from kartikbhtt7/add-audio-model

Browse files

Files changed (5) hide show

.gitignore +3 -1
app.py +308 -4
requirements.txt +2 -1
utils/helpers.py +49 -0
utils/onnx_inference.py +47 -0

.gitignore CHANGED Viewed

	@@ -1 +1,3 @@
1	- **/__pycache__

+**/__pycache__
+*.onnx
+*.pth

app.py CHANGED Viewed

@@ -6,9 +6,16 @@ import tempfile
 import librosa
 import librosa.display
 import matplotlib.pyplot as plt
 from PIL import Image
 import torch
 # Import deforestation modules
 from prediction_engine import load_onnx_model
 from utils.helpers import calculate_deforestation_metrics, create_overlay
@@ -17,6 +24,9 @@ from utils.helpers import calculate_deforestation_metrics, create_overlay
 from utils.audio_processing import preprocess_audio
 from utils.audio_model import load_audio_model, predict_audio, class_names
 # Ensure torch classes path is initialized to avoid warnings
 torch.classes.__path__ = []
@@ -28,15 +38,19 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
 # Constants
 DEFOREST_MODEL_INPUT_SIZE = 256
 AUDIO_MODEL_PATH = "models/best_model.pth"
 # Initialize session state for navigation
 if 'current_service' not in st.session_state:
     st.session_state.current_service = 'deforestation'
 if 'audio_input_method' not in st.session_state:
     st.session_state.audio_input_method = 'upload'
 # Sidebar for navigation
 with st.sidebar:
@@ -45,9 +59,15 @@ with st.sidebar:
     selected_service = st.radio(
         "Select Service:",
-        ["Deforestation Detection", "Forest Audio Surveillance"]
     )
-    st.session_state.current_service = 'deforestation' if selected_service == "Deforestation Detection" else 'audio'
     st.markdown("---")
@@ -60,7 +80,7 @@ with st.sidebar:
             Upload satellite or aerial images to detect areas of deforestation.
             """
         )
-    else:
         st.info(
             """
             **Forest Audio Surveillance**
@@ -92,6 +112,44 @@ with st.sidebar:
         st.markdown("🔨 **Tool Sounds:** " + ", ".join([s.capitalize() for s in tool_sounds]))
         st.markdown("🚗 **Vehicle Sounds:** " + ", ".join([s.capitalize() for s in vehicle_sounds]))
         st.markdown("💥 **Other Sounds:** " + ", ".join([s.capitalize() for s in other_sounds]))
 # Load deforestation model
 @st.cache_resource
@@ -104,6 +162,10 @@ def load_cached_deforestation_model():
 def load_cached_audio_model():
     return load_audio_model(AUDIO_MODEL_PATH)
 # Process image for deforestation detection
 def process_image(model, image):
     """Process a single image and return results"""
@@ -379,13 +441,255 @@ def show_audio_classification():
         else:
             st.write("Waiting for recording...")
 # Main function
 def main():
     # Check which service is selected and render appropriate UI
     if st.session_state.current_service == 'deforestation':
         show_deforestation_detection()
-    else:
         show_audio_classification()
     # Footer
     st.markdown("---")

 import librosa
 import librosa.display
 import matplotlib.pyplot as plt
+import tempfile
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
 from PIL import Image
 import torch
+# Import deforestation modules
+from prediction_engine import load_onnx_model
 # Import deforestation modules
 from prediction_engine import load_onnx_model
 from utils.helpers import calculate_deforestation_metrics, create_overlay
 from utils.audio_processing import preprocess_audio
 from utils.audio_model import load_audio_model, predict_audio, class_names
+# Import YOLO detection modules
+from utils.onnx_inference import YOLOv11
 # Ensure torch classes path is initialized to avoid warnings
 torch.classes.__path__ = []
     initial_sidebar_state="expanded"
 )
 # Constants
 DEFOREST_MODEL_INPUT_SIZE = 256
 AUDIO_MODEL_PATH = "models/best_model.pth"
+YOLO_MODEL_PATH = "models/best_model.onnx"
 # Initialize session state for navigation
 if 'current_service' not in st.session_state:
     st.session_state.current_service = 'deforestation'
 if 'audio_input_method' not in st.session_state:
     st.session_state.audio_input_method = 'upload'
+if 'detection_input_method' not in st.session_state:
+    st.session_state.detection_input_method = 'image'
 # Sidebar for navigation
 with st.sidebar:
     selected_service = st.radio(
         "Select Service:",
+        ["Deforestation Detection", "Forest Audio Surveillance", "Object Detection"]
     )
+    if selected_service == "Deforestation Detection":
+        st.session_state.current_service = 'deforestation'
+    elif selected_service == "Forest Audio Surveillance":
+        st.session_state.current_service = 'audio'
+    else:
+        st.session_state.current_service = 'detection'
     st.markdown("---")
             Upload satellite or aerial images to detect areas of deforestation.
             """
         )
+    elif st.session_state.current_service == 'audio':
         st.info(
             """
             **Forest Audio Surveillance**
         st.markdown("🔨 **Tool Sounds:** " + ", ".join([s.capitalize() for s in tool_sounds]))
         st.markdown("🚗 **Vehicle Sounds:** " + ", ".join([s.capitalize() for s in vehicle_sounds]))
         st.markdown("💥 **Other Sounds:** " + ", ".join([s.capitalize() for s in other_sounds]))
+    else:  # Object Detection
+        st.info(
+            """
+            **Object Detection**
+            Detect trespassers, vehicles, fires, and other objects in forest surveillance footage.
+            """
+        )
+        # Detection service specific controls
+        st.subheader("Detection Configuration")
+        detection_input_method = st.radio(
+            "Select Input Method:",
+            ("Image", "Video", "Camera"),
+            index=0 if st.session_state.detection_input_method == 'image' else
+                  (1 if st.session_state.detection_input_method == 'video' else 2)
+        )
+        if detection_input_method == "Image":
+            st.session_state.detection_input_method = 'image'
+        elif detection_input_method == "Video":
+            st.session_state.detection_input_method = 'video'
+        else:
+            st.session_state.detection_input_method = 'camera'
+        # Detection threshold controls
+        st.subheader("Detection Settings")
+        confidence = st.slider("Confidence Threshold", 0.0, 1.0, 0.5)
+        iou_thres = st.slider("IoU Threshold", 0.0, 1.0, 0.5)
+        # Detection class information
+        st.markdown("**Detection Classes:**")
+        st.markdown("🚴 **Bike/Bicycle**")
+        st.markdown("🚚 **Bus/Truck**")
+        st.markdown("🚗 **Car**")
+        st.markdown("🔥 **Fire**")
+        st.markdown("👤 **Human**")
+        st.markdown("💨 **Smoke**")
 # Load deforestation model
 @st.cache_resource
 def load_cached_audio_model():
     return load_audio_model(AUDIO_MODEL_PATH)
+@st.cache_resource
+def load_cached_yolo_model():
+    return YOLOv11(YOLO_MODEL_PATH)
 # Process image for deforestation detection
 def process_image(model, image):
     """Process a single image and return results"""
         else:
             st.write("Waiting for recording...")
+# Object Detection UI
+def show_object_detection():
+    # App title and description
+    st.title("🔍 Forest Object Detection")
+    st.markdown(
+        """
+        Detect trespassers, vehicles, fires, and other objects in forest surveillance footage.
+        Choose an input method to begin detection.
+        """
+    )
+    # Model info
+    st.info("⚙️ Object detection model optimized with ONNX runtime for faster inference")
+    # Load model
+    try:
+        model = load_cached_yolo_model()
+        # Update model confidence and IoU thresholds from sidebar
+        confidence = st.session_state.get('confidence', 0.5)
+        iou_thres = st.session_state.get('iou_thres', 0.5)
+        model.conf_thres = confidence
+        model.iou_thres = iou_thres
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        st.info(
+            "Make sure you have the YOLO ONNX model file available at models/best_model.onnx"
+        )
+        return
+    # Input method based selection
+    if st.session_state.detection_input_method == 'image':
+        # Image upload
+        img_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"])
+        if img_file is not None:
+            # Load image
+            file_bytes = np.asarray(bytearray(img_file.read()), dtype=np.uint8)
+            image = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)
+            if image is not None:
+                # Display original image
+                st.subheader("Original Image")
+                st.image(
+                    cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
+                    caption="Uploaded Image",
+                    use_container_width=True,
+                )
+                # Process with detection model
+                with st.spinner("Processing image..."):
+                    try:
+                        detections = model.detect(image)
+                        result_image = model.draw_detections(image.copy(), detections)
+                        # Display results
+                        st.subheader("Detection Results")
+                        st.image(
+                            cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB),
+                            caption="Detected Objects",
+                            use_container_width=True,
+                        )
+                        # Display detection statistics
+                        st.subheader("Detection Statistics")
+                        # Count detections by class
+                        class_counts = {}
+                        for det in detections:
+                            class_name = det['class']
+                            if class_name in class_counts:
+                                class_counts[class_name] += 1
+                            else:
+                                class_counts[class_name] = 1
+                        # Display counts with emojis
+                        cols = st.columns(3)
+                        col_idx = 0
+                        for class_name, count in class_counts.items():
+                            emoji = "👤" if class_name == "human" else (
+                                   "🔥" if class_name == "fire" else (
+                                   "💨" if class_name == "smoke" else (
+                                   "🚗" if class_name == "car" else (
+                                   "🚴" if class_name == "bike-bicycle" else "🚚"))))
+                            with cols[col_idx % 3]:
+                                st.metric(f"{emoji} {class_name.capitalize()}", count)
+                            col_idx += 1
+                        # Check for priority threats
+                        if "fire" in class_counts or "smoke" in class_counts:
+                            st.error("🚨 **ALERT: Fire Detected!** Potential forest fire detected. Immediate action required.")
+                        if "human" in class_counts or "car" in class_counts or "bike-bicycle" in class_counts or "bus-truck" in class_counts:
+                            st.warning("⚠️ **Trespassers Detected!** Unauthorized entry detected in monitored area.")
+                    except Exception as e:
+                        st.error(f"Error during detection: {e}")
+                        st.exception(e)
+    elif st.session_state.detection_input_method == 'video':
+        # Video upload
+        video_file = st.file_uploader("Upload Video", type=["mp4", "avi", "mov"])
+        if video_file is not None:
+            # Save uploaded video to temp file
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tfile:
+                tfile.write(video_file.read())
+                temp_video_path = tfile.name
+            # Display video upload success
+            st.success("Video uploaded successfully!")
+            # Process video button
+            if st.button("Process Video"):
+                with st.spinner("Processing video... This may take a while."):
+                    try:
+                        # Open video file
+                        cap = cv2.VideoCapture(temp_video_path)
+                        # Create video writer for output
+                        output_path = "output_video.mp4"
+                        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+                        fps = cap.get(cv2.CAP_PROP_FPS)
+                        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+                        # Create placeholder for video frames
+                        video_placeholder = st.empty()
+                        status_text = st.empty()
+                        # Process frames
+                        frame_count = 0
+                        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                        while cap.isOpened():
+                            ret, frame = cap.read()
+                            if not ret:
+                                break
+                            # Process every 5th frame for speed
+                            if frame_count % 5 == 0:
+                                detections = model.detect(frame)
+                                result_frame = model.draw_detections(frame.copy(), detections)
+                                # Update preview
+                                if frame_count % 15 == 0:  # Update display less frequently
+                                    video_placeholder.image(
+                                        cv2.cvtColor(result_frame, cv2.COLOR_BGR2RGB),
+                                        caption="Processing Video",
+                                        use_container_width=True
+                                    )
+                                    progress = min(100, int((frame_count / total_frames) * 100))
+                                    status_text.text(f"Processing: {progress}% complete")
+                            else:
+                                result_frame = frame  # Skip detection on some frames
+                            # Write frame to output video
+                            out.write(result_frame)
+                            frame_count += 1
+                        # Release resources
+                        cap.release()
+                        out.release()
+                        # Display completion message
+                        st.success("Video processing complete!")
+                        # Provide download button for processed video
+                        with open(output_path, "rb") as file:
+                            st.download_button(
+                                label="Download Processed Video",
+                                data=file,
+                                file_name="forest_surveillance_results.mp4",
+                                mime="video/mp4"
+                            )
+                    except Exception as e:
+                        st.error(f"Error processing video: {e}")
+                        st.exception(e)
+                    finally:
+                        # Clean up temp file
+                        try:
+                            os.unlink(temp_video_path)
+                        except:
+                            pass
+    else:  # Camera mode
+        # Live camera feed
+        st.subheader("Live Camera Detection")
+        st.info("Use your webcam to detect objects in real-time")
+        cam = st.camera_input("Camera Feed")
+        if cam:
+            # Process camera input
+            with st.spinner("Processing image..."):
+                try:
+                    # Convert image
+                    image = cv2.imdecode(np.frombuffer(cam.getvalue(), np.uint8), cv2.IMREAD_COLOR)
+                    # Run detection
+                    detections = model.detect(image)
+                    result_image = model.draw_detections(image.copy(), detections)
+                    # Display results
+                    st.image(
+                        cv2.cvtColor(result_image, cv2.COLOR_BGR2RGB),
+                        caption="Detection Results",
+                        use_container_width=True
+                    )
+                    # Show detection summary
+                    if detections:
+                        # Count detections by class
+                        class_counts = {}
+                        for det in detections:
+                            class_name = det['class']
+                            if class_name in class_counts:
+                                class_counts[class_name] += 1
+                            else:
+                                class_counts[class_name] = 1
+                        # Display as metrics
+                        st.subheader("Detection Summary")
+                        cols = st.columns(3)
+                        for i, (class_name, count) in enumerate(class_counts.items()):
+                            with cols[i % 3]:
+                                st.metric(class_name.capitalize(), count)
+                        # Check for priority threats
+                        if "fire" in class_counts or "smoke" in class_counts:
+                            st.error("🚨 **ALERT: Fire Detected!** Potential forest fire detected.")
+                        if "human" in class_counts:
+                            st.warning("⚠️ **Trespasser Detected!** Human presence detected.")
+                    else:
+                        st.info("No objects detected in frame")
+                except Exception as e:
+                    st.error(f"Error processing camera feed: {e}")
 # Main function
 def main():
     # Check which service is selected and render appropriate UI
     if st.session_state.current_service == 'deforestation':
         show_deforestation_detection()
+    elif st.session_state.current_service == 'audio':
         show_audio_classification()
+    else:  # 'detection'
+        show_object_detection()
     # Footer
     st.markdown("---")

requirements.txt CHANGED Viewed

@@ -13,4 +13,5 @@ onnxruntime-gpu
 onnx
 librosa
 soundfile
-pydub

 onnx
 librosa
 soundfile
+pydub
+supervision

utils/helpers.py CHANGED Viewed

@@ -71,3 +71,52 @@ def create_overlay(original_image, mask, threshold=0.5, alpha=0.5):
     overlay = cv2.addWeighted(original_image, 1 - alpha, colored_mask, alpha, 0)
     return overlay

     overlay = cv2.addWeighted(original_image, 1 - alpha, colored_mask, alpha, 0)
     return overlay
+CLASS_NAMES = ['bike-bicycle', 'bus-truck', 'car', 'fire', 'human', 'smoke']
+COLORS = np.random.uniform(0, 255, size=(len(CLASS_NAMES), 3))
+def preprocess(image, img_size=640):
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image = cv2.resize(image, (img_size, img_size))
+    image = image.transpose((2, 0, 1))  # HWC to CHW
+    image = np.ascontiguousarray(image, dtype=np.float32) / 255.0
+    return image[np.newaxis, ...]
+def postprocess(outputs, conf_thresh=0.5, iou_thresh=0.5):
+    outputs = outputs[0].transpose()
+    boxes, scores, class_ids = [], [], []
+    for row in outputs:
+        cls_scores = row[4:4+len(CLASS_NAMES)]
+        class_id = np.argmax(cls_scores)
+        max_score = cls_scores[class_id]
+        if max_score >= conf_thresh:
+            cx, cy, w, h = row[:4]
+            x = (cx - w/2).item()  # Convert to Python float
+            y = (cy - h/2).item()
+            width = w.item()
+            height = h.item()
+            boxes.append([x, y, width, height])
+            scores.append(float(max_score))
+            class_ids.append(int(class_id))
+    if len(boxes) > 0:
+        # Convert to list of lists with native Python floats
+        boxes = [[float(x) for x in box] for box in boxes]
+        scores = [float(score) for score in scores]
+        indices = cv2.dnn.NMSBoxes(
+            bboxes=boxes,
+            scores=scores,
+            score_threshold=conf_thresh,
+            nms_threshold=iou_thresh
+        )
+        if len(indices) > 0:
+            boxes = [boxes[i] for i in indices.flatten()]
+            scores = [scores[i] for i in indices.flatten()]
+            class_ids = [class_ids[i] for i in indices.flatten()]
+    return boxes, scores, class_ids

utils/onnx_inference.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import cv2
+import numpy as np
+import onnxruntime as ort
+from .helpers import CLASS_NAMES, COLORS, preprocess, postprocess
+class YOLOv11:
+    def __init__(self, onnx_path, conf_thres=0.5, iou_thres=0.5):
+        self.session = ort.InferenceSession(onnx_path)
+        self.conf_thres = conf_thres
+        self.iou_thres = iou_thres
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_name = self.session.get_outputs()[0].name
+        # Verify input type
+        input_type = self.session.get_inputs()[0].type
+        assert "float" in input_type, f"Model expects {input_type}"
+    def detect(self, image):
+        orig_h, orig_w = image.shape[:2]
+        blob = preprocess(image)
+        outputs = self.session.run([self.output_name], {self.input_name: blob})
+        boxes, scores, class_ids = postprocess(outputs, self.conf_thres, self.iou_thres)
+        results = []
+        for box, score, class_id in zip(boxes, scores, class_ids):
+            x, y, w, h = box
+            x1 = int(x * orig_w / 640)
+            y1 = int(y * orig_h / 640)
+            x2 = int((x + w) * orig_w / 640)
+            y2 = int((y + h) * orig_h / 640)
+            results.append({
+                'class': CLASS_NAMES[class_id],
+                'confidence': score,
+                'box': [x1, y1, x2, y2]
+            })
+        return results
+    def draw_detections(self, image, detections):
+        for det in detections:
+            x1, y1, x2, y2 = det['box']
+            color = COLORS[CLASS_NAMES.index(det['class'])]
+            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
+            label = f"{det['class']}: {det['confidence']:.2f}"
+            cv2.putText(image, label, (x1, y1 - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+        return image