Spaces:

Adieee5
/

Deepfake-Detection-Cross-Dataset

Running

App Files Files Community

Adieee5 commited on May 13

Commit

3d8b532

verified ·

1 Parent(s): 6a49af6

Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +2 -0
app.py +672 -0
cnn_model.h5 +3 -0
deploy.prototxt +1790 -0
face_detection_yunet_2023mar.onnx +3 -0
haarcascade_frontalface_default.xml +0 -0
requirements.txt +70 -0
res10_300x300_ssd_iter_140000.caffemodel +3 -0
sample_videos/Sample.mp4 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+res10_300x300_ssd_iter_140000.caffemodel filter=lfs diff=lfs merge=lfs -text
+sample_videos/Sample.mp4 filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,672 @@

+import altair as alt
+import numpy as np
+import pandas as pd
+import streamlit as st
+import streamlit as st
+import cv2
+import torch
+import numpy as np
+import os
+import tempfile
+import time
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+from collections import deque
+import tensorflow as tf
+from tensorflow.keras.preprocessing import image
+from tensorflow.keras.models import load_model
+import urllib.request
+import shutil
+class CNNDeepfakeDetector:
+    def __init__(self):
+        st.info("Initializing CNN Deepfake Detector... This may take a moment.")
+        # Initialize CNN model for deepfake detection
+        with st.spinner("Loading CNN deepfake detection model..."):
+            try:
+                self.model = load_model('cnn_model.h5')
+                st.success("CNN model loaded successfully!")
+            except Exception as e:
+                st.error(f"Error loading CNN model: {e}")
+                st.warning("Please make sure 'cnn_model.h5' is in the current directory.")
+                self.model = None
+    def classify_image(self, face_img):
+        """Classify a face image as real or fake using CNN model"""
+        try:
+            if self.model is None:
+                return "Model Not Loaded", 0.0
+            # Resize to target size
+            img_resized = cv2.resize(face_img, (128, 128))
+            # Preprocess the image
+            img_array = img_resized / 255.0
+            img_array = np.expand_dims(img_array, axis=0)
+            # Make prediction
+            prediction = self.model.predict(img_array)
+            confidence = float(prediction[0][0])
+            # In this model, <0.5 means Real, >=0.5 means Fake
+            label = 'Real' if confidence < 0.5 else 'Fake'
+            # Adjust confidence to be relative to the prediction
+            if label == 'Fake':
+                confidence = confidence  # Already between 0.5-1.0
+            else:
+                confidence = 1.0 - confidence  # Convert 0.0-0.5 to 0.5-1.0
+            return label, confidence
+        except Exception as e:
+            st.error(f"Error in CNN classification: {e}")
+            return "Error", 0.0
+class DeepfakeDetector:
+    def __init__(self):
+        st.info("Initializing Deepfake Detector... This may take a moment.")
+        # Initialize ViT model for deepfake detection
+        with st.spinner("Loading deepfake detection model..."):
+            self.image_processor = AutoImageProcessor.from_pretrained(
+                'Adieee5/deepfake-detection-f3net-cross')
+            self.model = AutoModelForImageClassification.from_pretrained(
+                'Adieee5/deepfake-detection-f3net-cross')
+        # Face detection model setup
+        with st.spinner("Loading face detection model..."):
+            model_file = "deploy.prototxt"
+            weights_file = "res10_300x300_ssd_iter_140000.caffemodel"
+            self.use_dnn = False
+            if os.path.exists(model_file) and os.path.exists(weights_file):
+                try:
+                    self.face_net = cv2.dnn.readNetFromCaffe(model_file, weights_file)
+                    self.use_dnn = True
+                    st.success("Using DNN face detector (better for close-up faces)")
+                except Exception as e:
+                    st.warning(f"Could not load DNN model: {e}")
+                    self.use_dnn = False
+            if not self.use_dnn:
+                # Fallback to Haar cascade
+                cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
+                if os.path.exists(cascade_path):
+                    self.face_cascade = cv2.CascadeClassifier(cascade_path)
+                    st.warning("Using Haar cascade face detector as fallback")
+                else:
+                    st.error(f"Cascade file not found: {cascade_path}")
+        # Initialize CNN model
+        self.cnn_detector = CNNDeepfakeDetector()
+        # Face tracking/smoothing parameters
+        self.face_history = {}  # Store face tracking data
+        self.face_history_max_size = 10  # Store history for last 10 frames
+        self.face_ttl = 5  # Number of frames a face can be missing before removing
+        self.next_face_id = 0  # For assigning unique IDs to tracked faces
+        # Result smoothing
+        self.result_buffer_size = 5  # Number of classifications to average
+        # Performance metrics
+        self.processing_times = deque(maxlen=30)
+        st.success("Models loaded successfully!")
+    def detect_faces_haar(self, frame):
+        """Detect faces using Haar cascade"""
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        faces = self.face_cascade.detectMultiScale(
+            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+        # Convert to list of (x,y,w,h,confidence) format for consistency
+        return [(x, y, w, h, 0.8) for (x, y, w, h) in faces]
+    def classify_frame(self, face_img, model_type="vit"):
+        """Classify a face image as real or fake"""
+        try:
+            if model_type == "cnn":
+                return self.cnn_detector.classify_image(face_img)
+            # Default to ViT model
+            # Resize image if too small
+            h, w = face_img.shape[:2]
+            if h < 224 or w < 224:
+                scale = max(224/h, 224/w)
+                face_img = cv2.resize(face_img, (int(w*scale), int(h*scale)))
+            # Make sure we have valid image data
+            if face_img.size == 0:
+                return "Unknown", 0.0
+            # Process with ViT model
+            inputs = self.image_processor(images=face_img, return_tensors="pt")
+            outputs = self.model(**inputs)
+            logits = outputs.logits
+            # Get prediction and confidence
+            probs = torch.nn.functional.softmax(logits, dim=1)
+            pred = torch.argmax(logits, dim=1).item()
+            # The model has two classes: 0=Fake, 1=Real
+            label = 'Real' if pred == 1 else 'Fake'
+            confidence = probs[0][pred].item()
+            return label, confidence
+        except Exception as e:
+            st.error(f"Error in classification: {e}")
+            return "Error", 0.0
+    def detect_faces_dnn(self, frame):
+        """Detect faces using DNN method"""
+        height, width = frame.shape[:2]
+        blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0,
+                                (300, 300), (104.0, 177.0, 123.0))
+        self.face_net.setInput(blob)
+        detections = self.face_net.forward()
+        faces = []
+        for i in range(detections.shape[2]):
+            confidence = detections[0, 0, i, 2]
+            if confidence > 0.5:  # Filter out weak detections
+                box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
+                (x1, y1, x2, y2) = box.astype("int")
+                # Ensure box is within frame boundaries
+                x1, y1 = max(0, x1), max(0, y1)
+                x2, y2 = min(width, x2), min(height, y2)
+                w, h = x2 - x1, y2 - y1
+                if w > 0 and h > 0:  # Valid face area
+                    faces.append((x1, y1, w, h, confidence))
+        return faces
+    def calculate_iou(self, box1, box2):
+        """Calculate Intersection over Union for two boxes"""
+        # Convert boxes from (x, y, w, h) to (x1, y1, x2, y2)
+        box1_x1, box1_y1, box1_w, box1_h = box1
+        box2_x1, box2_y1, box2_w, box2_h = box2
+        box1_x2, box1_y2 = box1_x1 + box1_w, box1_y1 + box1_h
+        box2_x2, box2_y2 = box2_x1 + box2_w, box2_y1 + box2_h
+        # Calculate area of intersection rectangle
+        x_left = max(box1_x1, box2_x1)
+        y_top = max(box1_y1, box2_y1)
+        x_right = min(box1_x2, box2_x2)
+        y_bottom = min(box1_y2, box2_y2)
+        if x_right < x_left or y_bottom < y_top:
+            return 0.0
+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
+        # Calculate area of both boxes
+        box1_area = box1_w * box1_h
+        box2_area = box2_w * box2_h
+        # Calculate IoU
+        iou = intersection_area / float(box1_area + box2_area - intersection_area)
+        return iou
+    def track_faces(self, faces):
+        matched_faces = []
+        unmatched_detections = list(range(len(faces)))
+        if not self.face_history:
+            for face in faces:
+                face_id = self.next_face_id
+                self.next_face_id += 1
+                self.face_history[face_id] = {
+                    'positions': deque([face[:4]], maxlen=self.face_history_max_size),
+                    'ttl': self.face_ttl,
+                    'label': None,
+                    'confidence': 0.0,
+                    'result_history': deque(maxlen=self.result_buffer_size)
+                }
+                matched_faces.append((face_id, face))
+            return matched_faces
+        for face_id in list(self.face_history.keys()):
+            last_pos = self.face_history[face_id]['positions'][-1]
+            best_match = -1
+            best_iou = 0.3
+            for i in unmatched_detections:
+                iou = self.calculate_iou(last_pos, faces[i][:4])
+                if iou > best_iou:
+                    best_iou = iou
+                    best_match = i
+            if best_match != -1:
+                matched_face = faces[best_match]
+                self.face_history[face_id]['positions'].append(matched_face[:4])
+                self.face_history[face_id]['ttl'] = self.face_ttl
+                matched_faces.append((face_id, matched_face))
+                unmatched_detections.remove(best_match)
+            else:
+                self.face_history[face_id]['ttl'] -= 1
+                if self.face_history[face_id]['ttl'] <= 0:
+                    del self.face_history[face_id]
+                else:
+                    predicted_face = (*last_pos, 0.5)
+                    matched_faces.append((face_id, predicted_face))
+        for i in unmatched_detections:
+            face_id = self.next_face_id
+            self.next_face_id += 1
+            self.face_history[face_id] = {
+                'positions': deque([faces[i][:4]], maxlen=self.face_history_max_size),
+                'ttl': self.face_ttl,
+                'label': None,
+                'confidence': 0.0,
+                'result_history': deque(maxlen=self.result_buffer_size)
+            }
+            matched_faces.append((face_id, faces[i]))
+        return matched_faces
+    def smooth_face_position(self, face_id):
+        """Calculate smoothed position for a tracked face"""
+        positions = self.face_history[face_id]['positions']
+        if len(positions) == 1:
+            return positions[0]
+        # Weight recent positions more heavily
+        total_weight = 0
+        x, y, w, h = 0, 0, 0, 0
+        for i, pos in enumerate(positions):
+            # Exponential weighting - newer positions have more influence
+            weight = 2 ** i  # Positions are stored newest to oldest
+            total_weight += weight
+            x += pos[0] * weight
+            y += pos[1] * weight
+            w += pos[2] * weight
+            h += pos[3] * weight
+        # Calculate weighted average
+        x = int(x / total_weight)
+        y = int(y / total_weight)
+        w = int(w / total_weight)
+        h = int(h / total_weight)
+        return (x, y, w, h)
+    def update_face_classification(self, face_id, label, confidence):
+        """Update classification history for a face"""
+        self.face_history[face_id]['result_history'].append((label, confidence))
+        # Calculate the smoothed result
+        if not self.face_history[face_id]['result_history']:
+            return label, confidence
+        real_votes = 0
+        fake_votes = 0
+        total_confidence = 0.0
+        for result_label, result_conf in self.face_history[face_id]['result_history']:
+            if result_label == "Real":
+                real_votes += 1
+                total_confidence += result_conf
+            elif result_label == "Fake":
+                fake_votes += 1
+                total_confidence += result_conf
+        # Determine majority vote
+        if real_votes >= fake_votes:
+            smoothed_label = "Real"
+            label_confidence = real_votes / len(self.face_history[face_id]['result_history'])
+        else:
+            smoothed_label = "Fake"
+            label_confidence = fake_votes / len(self.face_history[face_id]['result_history'])
+        # Average confidence weighted by vote consistency
+        avg_confidence = (total_confidence / len(self.face_history[face_id]['result_history'])) * label_confidence
+        # Store the smoothed result
+        self.face_history[face_id]['label'] = smoothed_label
+        self.face_history[face_id]['confidence'] = avg_confidence
+        return smoothed_label, avg_confidence
+    def process_video(self, video_path, stframe, status_text, progress_bar, detector_type="dnn", model_type="vit"):
+        """Process video with Streamlit output"""
+        use_dnn_current = detector_type == "dnn" and self.use_dnn
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            st.error(f"Error: Cannot open video source")
+            return
+        # Get video properties
+        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = 250 if video_path != 0 else 0
+        # Display video info
+        if video_path != 0:  # If not webcam
+            status_text.text(f"Video Info: {frame_width}x{frame_height}, {fps:.1f} FPS, {total_frames} frames")
+        else:
+            status_text.text(f"Webcam: {frame_width}x{frame_height}")
+        # Reset tracking data for new video
+        self.face_history = {}
+        self.next_face_id = 0
+        self.processing_times = deque(maxlen=30)
+        frame_count = 0
+        process_every_n_frames = 2  # Process every 2nd frame for better performance
+        # For face detection stats
+        face_stats = {"Real": 0, "Fake": 0, "Unknown": 0}
+        # Main processing loop
+        while True:
+            start_time = time.time()
+            ret, frame = cap.read()
+            if not ret:
+                status_text.text("End of video reached")
+                break
+            frame_count += 1
+            if frame_count == 250:
+                st.success("Video Processed Successfully!")
+                break
+            if video_path != 0:  # If not webcam, update progress
+                progress = min(float(frame_count) / float(max(total_frames, 1)), 1.0)
+                progress_bar.progress(progress)
+            process_frame = (frame_count % process_every_n_frames == 0)
+            # Store original frame for face extraction
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            if process_frame:
+                # Detect faces using the appropriate method
+                if use_dnn_current:
+                    faces = self.detect_faces_dnn(frame)
+                else:
+                    faces = self.detect_faces_haar(frame)
+                # Track faces across frames
+                tracked_faces = self.track_faces(faces)
+                # Process each tracked face
+                for face_id, (x, y, w, h, face_confidence) in tracked_faces:
+                    if face_id not in self.face_history:
+                        continue
+                    sx, sy, sw, sh = self.smooth_face_position(face_id)
+                    # Draw rectangle around face with smoothed coordinates
+                    cv2.rectangle(frame, (sx, sy), (sx+sw, sy+sh), (0, 255, 255), 2)
+                    # Only process classification for real detections (not predicted)
+                    if w > 20 and h > 20 and face_id in self.face_history:
+                        try:
+                            # Extract face using smoothed coordinates for better consistency
+                            face = frame_rgb[sy:sy+sh, sx:sx+sw]
+                            # Skip processing if face is too small after smoothing
+                            if face.size == 0 or face.shape[0] < 20 or face.shape[1] < 20:
+                                continue
+                            # Process only every N frames or if this is a new face
+                            if frame_count % process_every_n_frames == 0 or \
+                               len(self.face_history[face_id]['result_history']) == 0:
+                                # Classify the face using the selected model
+                                label, confidence = self.classify_frame(face, model_type)
+                                # Update and smooth results
+                                label, confidence = self.update_face_classification(face_id, label, confidence)
+                            else:
+                                # Use last stored result
+                                label = self.face_history[face_id]['label'] or "Unknown"
+                                confidence = self.face_history[face_id]['confidence']
+                            # Update stats
+                            if label in face_stats:
+                                face_stats[label] += 1
+                            # Display results
+                            result_text = f"{label}: {confidence:.2f}"
+                            text_color = (0, 255, 0) if label == "Real" else (0, 0, 255)
+                            # Add text background for better visibility
+                            cv2.rectangle(frame, (sx, sy+sh), (sx+len(result_text)*11, sy+sh+25), (0, 0, 0), -1)
+                            cv2.putText(frame, result_text, (sx, sy+sh+20),
+                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, text_color, 2)
+                            # Draw face ID
+                            cv2.putText(frame, f"ID:{face_id}", (sx, sy-5),
+                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1)
+                        except Exception as e:
+                            st.error(f"Error processing face: {e}")
+            # Measure processing time
+            process_time = time.time() - start_time
+            self.processing_times.append(process_time)
+            avg_time = sum(self.processing_times) / len(self.processing_times)
+            effective_fps = 1.0 / avg_time if avg_time > 0 else 0
+            # Add frame counter and progress
+            if video_path != 0:  # If not webcam
+                progress_percent = (frame_count / total_frames) * 100 if total_frames > 0 else 0
+                cv2.putText(frame, f"Frame: {frame_count}/{total_frames} ({progress_percent:.1f}%)",
+                           (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+            else:
+                cv2.putText(frame, f"Frame: {frame_count}",
+                           (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+            # Show detector info and performance
+            detector_name = "DNN" if use_dnn_current else "Haar Cascade"
+            model_name = "ViT" if model_type == "vit" else "CNN"
+            cv2.putText(frame, f"Detector: {detector_name} | Model: {model_name} | FPS: {effective_fps:.1f}",
+                       (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+            # Show tracking info
+            cv2.putText(frame, f"Tracked faces: {len(self.face_history)}",
+                       (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+            # Display the frame in Streamlit
+            stframe.image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), channels="RGB")
+            # Update stats
+            status_text.text(f"Real: {face_stats['Real']} | Fake: {face_stats['Fake']} | FPS: {effective_fps:.1f}")
+            # Check if stop button is pressed
+            if st.session_state.get('stop_button', False):
+                break
+        # Clean up
+        cap.release()
+        return face_stats
+# Function to ensure sample video exists
+def ensure_sample_video():
+    sample_dir = "sample_videos"
+    sample_path = os.path.join(sample_dir, "Sample.mp4")
+    # Create directory if it doesn't exist
+    if not os.path.exists(sample_dir):
+        os.makedirs(sample_dir)
+    # If sample video doesn't exist, download it
+    if not os.path.exists(sample_path):
+        try:
+            with st.spinner("Downloading sample video..."):
+                # URL to a public domain sample video that contains faces
+                sample_url = "https://storage.googleapis.com/deepfake-demo/sample_deepfake.mp4"
+                # Download the file
+                with urllib.request.urlopen(sample_url) as response, open(sample_path, 'wb') as out_file:
+                    shutil.copyfileobj(response, out_file)
+                st.success("Sample video downloaded successfully!")
+        except Exception as e:
+            st.error(f"Failed to download sample video: {e}")
+            return None
+    return sample_path
+def main():
+    st.set_page_config(page_title="Deepfake Detector", layout="wide")
+    # App title and description
+    st.title("Deepfake Detection App")
+    st.markdown("""
+    This app uses computer vision and deep learning to detect deepfake videos.
+    Upload a video or use your webcam to detect if faces are real or manipulated.
+    """)
+    # Initialize session state for the detector and variables
+    if 'detector' not in st.session_state:
+        st.session_state.detector = None
+    if 'stop_button' not in st.session_state:
+        st.session_state.stop_button = False
+    if 'use_sample' not in st.session_state:
+        st.session_state.use_sample = False
+    if 'sample_path' not in st.session_state:
+        st.session_state.sample_path = None
+    # Initialize the detector
+    if st.session_state.detector is None:
+        st.session_state.detector = DeepfakeDetector()
+    # Create sidebar for options
+    st.sidebar.title("Options")
+    input_option = st.sidebar.radio(
+        "Select Input Source",
+        ["Upload Video", "Use Webcam", "Try Sample Video"]
+    )
+    detector_type = st.sidebar.selectbox(
+        "Face Detector",
+        ["DNN (better for close-ups)", "Haar Cascade (faster)"],
+        index=0 if st.session_state.detector.use_dnn else 1
+    )
+    detector_option = "dnn" if "DNN" in detector_type else "haar"
+    # Model selection option
+    model_type = st.sidebar.selectbox(
+        "Deepfake Detection Model",
+        ["Vision Transformer (ViT)", "F3 Net Model"],
+        index=0
+    )
+    model_option = "vit" if "Vision" in model_type else "cnn"
+    # Main content area
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        # Video display area
+        video_placeholder = st.empty()
+    with col2:
+        # Status and controls
+        status_text = st.empty()
+        progress_bar = st.empty()
+        # Results section
+        st.subheader("Results")
+        results_area = st.empty()
+        # Stop button
+        if st.button("Stop Processing"):
+            st.session_state.stop_button = True
+    # Process based on selected option
+    if input_option == "Upload Video":
+        uploaded_file = st.sidebar.file_uploader("Choose a video file", type=["mp4", "avi", "mov", "mkv"])
+        if uploaded_file is not None:
+            st.session_state.stop_button = False
+            # Save uploaded file to temp file
+            tfile = tempfile.NamedTemporaryFile(delete=False)
+            tfile.write(uploaded_file.read())
+            video_path = tfile.name
+            # Process the video
+            face_stats = st.session_state.detector.process_video(
+                video_path,
+                video_placeholder,
+                status_text,
+                progress_bar,
+                detector_option,
+                model_option
+            )
+            # Display results
+            results_df = {
+                "Category": ["Real Faces", "Fake Faces"],
+                "Count": [face_stats["Real"], face_stats["Fake"]]
+            }
+            results_area.dataframe(results_df)
+            # Clean up temp file
+            os.unlink(video_path)
+    elif input_option == "Use Webcam":
+        # Reset stop button
+        st.session_state.stop_button = False
+        if st.sidebar.button("Start Webcam"):
+            # Process webcam feed
+            face_stats = st.session_state.detector.process_video(
+                0,  # 0 is the default camera
+                video_placeholder,
+                status_text,
+                progress_bar,
+                detector_option,
+                model_option
+            )
+            # Display results after stopping
+            results_df = {
+                "Category": ["Real Faces", "Fake Faces"],
+                "Count": [face_stats["Real"], face_stats["Fake"]]
+            }
+            results_area.dataframe(results_df)
+    elif input_option == "Try Sample Video":
+        # Reset stop button
+        st.session_state.stop_button = False
+        # Get or download the sample video
+        sample_path = ensure_sample_video()
+        if sample_path:
+            if st.sidebar.button("Process Sample Video"):
+                # Process the sample video
+                face_stats = st.session_state.detector.process_video(
+                    sample_path,
+                    video_placeholder,
+                    status_text,
+                    progress_bar,
+                    detector_option,
+                    model_option
+                )
+                # Display results
+                results_df = {
+                    "Category": ["Real Faces", "Fake Faces"],
+                    "Count": [face_stats["Real"], face_stats["Fake"]]
+                }
+                results_area.dataframe(results_df)
+        else:
+            st.sidebar.error("Failed to load sample video. Please try uploading your own video instead.")
+if __name__ == "__main__":
+    main()

cnn_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f54d9db020da33f99f861d41dc1334ec33adc14991ada4033a4ece790d0904e
+size 312843624

deploy.prototxt ADDED Viewed

	@@ -0,0 +1,1790 @@

+input: "data"
+input_shape {
+  dim: 1
+  dim: 3
+  dim: 300
+  dim: 300
+}
+layer {
+  name: "data_bn"
+  type: "BatchNorm"
+  bottom: "data"
+  top: "data_bn"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "data_scale"
+  type: "Scale"
+  bottom: "data_bn"
+  top: "data_bn"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "conv1_h"
+  type: "Convolution"
+  bottom: "data_bn"
+  top: "conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    pad: 3
+    kernel_size: 7
+    stride: 2
+    weight_filler {
+      type: "msra"
+      variance_norm: FAN_OUT
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "conv1_bn_h"
+  type: "BatchNorm"
+  bottom: "conv1_h"
+  top: "conv1_h"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "conv1_scale_h"
+  type: "Scale"
+  bottom: "conv1_h"
+  top: "conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "conv1_relu"
+  type: "ReLU"
+  bottom: "conv1_h"
+  top: "conv1_h"
+}
+layer {
+  name: "conv1_pool"
+  type: "Pooling"
+  bottom: "conv1_h"
+  top: "conv1_pool"
+  pooling_param {
+    kernel_size: 3
+    stride: 2
+  }
+}
+layer {
+  name: "layer_64_1_conv1_h"
+  type: "Convolution"
+  bottom: "conv1_pool"
+  top: "layer_64_1_conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_64_1_bn2_h"
+  type: "BatchNorm"
+  bottom: "layer_64_1_conv1_h"
+  top: "layer_64_1_conv1_h"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "layer_64_1_scale2_h"
+  type: "Scale"
+  bottom: "layer_64_1_conv1_h"
+  top: "layer_64_1_conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "layer_64_1_relu2"
+  type: "ReLU"
+  bottom: "layer_64_1_conv1_h"
+  top: "layer_64_1_conv1_h"
+}
+layer {
+  name: "layer_64_1_conv2_h"
+  type: "Convolution"
+  bottom: "layer_64_1_conv1_h"
+  top: "layer_64_1_conv2_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 32
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_64_1_sum"
+  type: "Eltwise"
+  bottom: "layer_64_1_conv2_h"
+  bottom: "conv1_pool"
+  top: "layer_64_1_sum"
+}
+layer {
+  name: "layer_128_1_bn1_h"
+  type: "BatchNorm"
+  bottom: "layer_64_1_sum"
+  top: "layer_128_1_bn1_h"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "layer_128_1_scale1_h"
+  type: "Scale"
+  bottom: "layer_128_1_bn1_h"
+  top: "layer_128_1_bn1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "layer_128_1_relu1"
+  type: "ReLU"
+  bottom: "layer_128_1_bn1_h"
+  top: "layer_128_1_bn1_h"
+}
+layer {
+  name: "layer_128_1_conv1_h"
+  type: "Convolution"
+  bottom: "layer_128_1_bn1_h"
+  top: "layer_128_1_conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_128_1_bn2"
+  type: "BatchNorm"
+  bottom: "layer_128_1_conv1_h"
+  top: "layer_128_1_conv1_h"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "layer_128_1_scale2"
+  type: "Scale"
+  bottom: "layer_128_1_conv1_h"
+  top: "layer_128_1_conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "layer_128_1_relu2"
+  type: "ReLU"
+  bottom: "layer_128_1_conv1_h"
+  top: "layer_128_1_conv1_h"
+}
+layer {
+  name: "layer_128_1_conv2"
+  type: "Convolution"
+  bottom: "layer_128_1_conv1_h"
+  top: "layer_128_1_conv2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_128_1_conv_expand_h"
+  type: "Convolution"
+  bottom: "layer_128_1_bn1_h"
+  top: "layer_128_1_conv_expand_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 2
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_128_1_sum"
+  type: "Eltwise"
+  bottom: "layer_128_1_conv2"
+  bottom: "layer_128_1_conv_expand_h"
+  top: "layer_128_1_sum"
+}
+layer {
+  name: "layer_256_1_bn1"
+  type: "BatchNorm"
+  bottom: "layer_128_1_sum"
+  top: "layer_256_1_bn1"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "layer_256_1_scale1"
+  type: "Scale"
+  bottom: "layer_256_1_bn1"
+  top: "layer_256_1_bn1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "layer_256_1_relu1"
+  type: "ReLU"
+  bottom: "layer_256_1_bn1"
+  top: "layer_256_1_bn1"
+}
+layer {
+  name: "layer_256_1_conv1"
+  type: "Convolution"
+  bottom: "layer_256_1_bn1"
+  top: "layer_256_1_conv1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 256
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_256_1_bn2"
+  type: "BatchNorm"
+  bottom: "layer_256_1_conv1"
+  top: "layer_256_1_conv1"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "layer_256_1_scale2"
+  type: "Scale"
+  bottom: "layer_256_1_conv1"
+  top: "layer_256_1_conv1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "layer_256_1_relu2"
+  type: "ReLU"
+  bottom: "layer_256_1_conv1"
+  top: "layer_256_1_conv1"
+}
+layer {
+  name: "layer_256_1_conv2"
+  type: "Convolution"
+  bottom: "layer_256_1_conv1"
+  top: "layer_256_1_conv2"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 256
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_256_1_conv_expand"
+  type: "Convolution"
+  bottom: "layer_256_1_bn1"
+  top: "layer_256_1_conv_expand"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 256
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 2
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_256_1_sum"
+  type: "Eltwise"
+  bottom: "layer_256_1_conv2"
+  bottom: "layer_256_1_conv_expand"
+  top: "layer_256_1_sum"
+}
+layer {
+  name: "layer_512_1_bn1"
+  type: "BatchNorm"
+  bottom: "layer_256_1_sum"
+  top: "layer_512_1_bn1"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "layer_512_1_scale1"
+  type: "Scale"
+  bottom: "layer_512_1_bn1"
+  top: "layer_512_1_bn1"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "layer_512_1_relu1"
+  type: "ReLU"
+  bottom: "layer_512_1_bn1"
+  top: "layer_512_1_bn1"
+}
+layer {
+  name: "layer_512_1_conv1_h"
+  type: "Convolution"
+  bottom: "layer_512_1_bn1"
+  top: "layer_512_1_conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 128
+    bias_term: false
+    pad: 1
+    kernel_size: 3
+    stride: 1 # 2
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_512_1_bn2_h"
+  type: "BatchNorm"
+  bottom: "layer_512_1_conv1_h"
+  top: "layer_512_1_conv1_h"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "layer_512_1_scale2_h"
+  type: "Scale"
+  bottom: "layer_512_1_conv1_h"
+  top: "layer_512_1_conv1_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "layer_512_1_relu2"
+  type: "ReLU"
+  bottom: "layer_512_1_conv1_h"
+  top: "layer_512_1_conv1_h"
+}
+layer {
+  name: "layer_512_1_conv2_h"
+  type: "Convolution"
+  bottom: "layer_512_1_conv1_h"
+  top: "layer_512_1_conv2_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 256
+    bias_term: false
+    pad: 2 # 1
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_512_1_conv_expand_h"
+  type: "Convolution"
+  bottom: "layer_512_1_bn1"
+  top: "layer_512_1_conv_expand_h"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  convolution_param {
+    num_output: 256
+    bias_term: false
+    pad: 0
+    kernel_size: 1
+    stride: 1 # 2
+    weight_filler {
+      type: "msra"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0.0
+    }
+  }
+}
+layer {
+  name: "layer_512_1_sum"
+  type: "Eltwise"
+  bottom: "layer_512_1_conv2_h"
+  bottom: "layer_512_1_conv_expand_h"
+  top: "layer_512_1_sum"
+}
+layer {
+  name: "last_bn_h"
+  type: "BatchNorm"
+  bottom: "layer_512_1_sum"
+  top: "layer_512_1_sum"
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+  param {
+    lr_mult: 0.0
+  }
+}
+layer {
+  name: "last_scale_h"
+  type: "Scale"
+  bottom: "layer_512_1_sum"
+  top: "layer_512_1_sum"
+  param {
+    lr_mult: 1.0
+    decay_mult: 1.0
+  }
+  param {
+    lr_mult: 2.0
+    decay_mult: 1.0
+  }
+  scale_param {
+    bias_term: true
+  }
+}
+layer {
+  name: "last_relu"
+  type: "ReLU"
+  bottom: "layer_512_1_sum"
+  top: "fc7"
+}
+layer {
+  name: "conv6_1_h"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "conv6_1_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_1_relu"
+  type: "ReLU"
+  bottom: "conv6_1_h"
+  top: "conv6_1_h"
+}
+layer {
+  name: "conv6_2_h"
+  type: "Convolution"
+  bottom: "conv6_1_h"
+  top: "conv6_2_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_2_relu"
+  type: "ReLU"
+  bottom: "conv6_2_h"
+  top: "conv6_2_h"
+}
+layer {
+  name: "conv7_1_h"
+  type: "Convolution"
+  bottom: "conv6_2_h"
+  top: "conv7_1_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_1_relu"
+  type: "ReLU"
+  bottom: "conv7_1_h"
+  top: "conv7_1_h"
+}
+layer {
+  name: "conv7_2_h"
+  type: "Convolution"
+  bottom: "conv7_1_h"
+  top: "conv7_2_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 2
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_2_relu"
+  type: "ReLU"
+  bottom: "conv7_2_h"
+  top: "conv7_2_h"
+}
+layer {
+  name: "conv8_1_h"
+  type: "Convolution"
+  bottom: "conv7_2_h"
+  top: "conv8_1_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_1_relu"
+  type: "ReLU"
+  bottom: "conv8_1_h"
+  top: "conv8_1_h"
+}
+layer {
+  name: "conv8_2_h"
+  type: "Convolution"
+  bottom: "conv8_1_h"
+  top: "conv8_2_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_2_relu"
+  type: "ReLU"
+  bottom: "conv8_2_h"
+  top: "conv8_2_h"
+}
+layer {
+  name: "conv9_1_h"
+  type: "Convolution"
+  bottom: "conv8_2_h"
+  top: "conv9_1_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 64
+    pad: 0
+    kernel_size: 1
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv9_1_relu"
+  type: "ReLU"
+  bottom: "conv9_1_h"
+  top: "conv9_1_h"
+}
+layer {
+  name: "conv9_2_h"
+  type: "Convolution"
+  bottom: "conv9_1_h"
+  top: "conv9_2_h"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 128
+    pad: 0
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv9_2_relu"
+  type: "ReLU"
+  bottom: "conv9_2_h"
+  top: "conv9_2_h"
+}
+layer {
+  name: "conv4_3_norm"
+  type: "Normalize"
+  bottom: "layer_256_1_bn1"
+  top: "conv4_3_norm"
+  norm_param {
+    across_spatial: false
+    scale_filler {
+      type: "constant"
+      value: 20
+    }
+    channel_shared: false
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc"
+  type: "Convolution"
+  bottom: "conv4_3_norm"
+  top: "conv4_3_norm_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 16
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv4_3_norm_mbox_loc"
+  top: "conv4_3_norm_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv4_3_norm_mbox_loc_perm"
+  top: "conv4_3_norm_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf"
+  type: "Convolution"
+  bottom: "conv4_3_norm"
+  top: "conv4_3_norm_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 8 # 84
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv4_3_norm_mbox_conf"
+  top: "conv4_3_norm_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv4_3_norm_mbox_conf_perm"
+  top: "conv4_3_norm_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv4_3_norm_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv4_3_norm"
+  bottom: "data"
+  top: "conv4_3_norm_mbox_priorbox"
+  prior_box_param {
+    min_size: 30.0
+    max_size: 60.0
+    aspect_ratio: 2
+    flip: true
+    clip: false
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+    step: 8
+    offset: 0.5
+  }
+}
+layer {
+  name: "fc7_mbox_loc"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "fc7_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "fc7_mbox_loc_perm"
+  type: "Permute"
+  bottom: "fc7_mbox_loc"
+  top: "fc7_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "fc7_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "fc7_mbox_loc_perm"
+  top: "fc7_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "fc7_mbox_conf"
+  type: "Convolution"
+  bottom: "fc7"
+  top: "fc7_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 12 # 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "fc7_mbox_conf_perm"
+  type: "Permute"
+  bottom: "fc7_mbox_conf"
+  top: "fc7_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "fc7_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "fc7_mbox_conf_perm"
+  top: "fc7_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "fc7_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "fc7"
+  bottom: "data"
+  top: "fc7_mbox_priorbox"
+  prior_box_param {
+    min_size: 60.0
+    max_size: 111.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: false
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+    step: 16
+    offset: 0.5
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv6_2_h"
+  top: "conv6_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv6_2_mbox_loc"
+  top: "conv6_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv6_2_mbox_loc_perm"
+  top: "conv6_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv6_2_h"
+  top: "conv6_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 12 # 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv6_2_mbox_conf"
+  top: "conv6_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv6_2_mbox_conf_perm"
+  top: "conv6_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv6_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv6_2_h"
+  bottom: "data"
+  top: "conv6_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 111.0
+    max_size: 162.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: false
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+    step: 32
+    offset: 0.5
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv7_2_h"
+  top: "conv7_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 24
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv7_2_mbox_loc"
+  top: "conv7_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv7_2_mbox_loc_perm"
+  top: "conv7_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv7_2_h"
+  top: "conv7_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 12 # 126
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv7_2_mbox_conf"
+  top: "conv7_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv7_2_mbox_conf_perm"
+  top: "conv7_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv7_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv7_2_h"
+  bottom: "data"
+  top: "conv7_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 162.0
+    max_size: 213.0
+    aspect_ratio: 2
+    aspect_ratio: 3
+    flip: true
+    clip: false
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+    step: 64
+    offset: 0.5
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv8_2_h"
+  top: "conv8_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 16
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv8_2_mbox_loc"
+  top: "conv8_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv8_2_mbox_loc_perm"
+  top: "conv8_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv8_2_h"
+  top: "conv8_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 8 # 84
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv8_2_mbox_conf"
+  top: "conv8_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv8_2_mbox_conf_perm"
+  top: "conv8_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv8_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv8_2_h"
+  bottom: "data"
+  top: "conv8_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 213.0
+    max_size: 264.0
+    aspect_ratio: 2
+    flip: true
+    clip: false
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+    step: 100
+    offset: 0.5
+  }
+}
+layer {
+  name: "conv9_2_mbox_loc"
+  type: "Convolution"
+  bottom: "conv9_2_h"
+  top: "conv9_2_mbox_loc"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 16
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv9_2_mbox_loc_perm"
+  type: "Permute"
+  bottom: "conv9_2_mbox_loc"
+  top: "conv9_2_mbox_loc_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv9_2_mbox_loc_flat"
+  type: "Flatten"
+  bottom: "conv9_2_mbox_loc_perm"
+  top: "conv9_2_mbox_loc_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv9_2_mbox_conf"
+  type: "Convolution"
+  bottom: "conv9_2_h"
+  top: "conv9_2_mbox_conf"
+  param {
+    lr_mult: 1
+    decay_mult: 1
+  }
+  param {
+    lr_mult: 2
+    decay_mult: 0
+  }
+  convolution_param {
+    num_output: 8 # 84
+    pad: 1
+    kernel_size: 3
+    stride: 1
+    weight_filler {
+      type: "xavier"
+    }
+    bias_filler {
+      type: "constant"
+      value: 0
+    }
+  }
+}
+layer {
+  name: "conv9_2_mbox_conf_perm"
+  type: "Permute"
+  bottom: "conv9_2_mbox_conf"
+  top: "conv9_2_mbox_conf_perm"
+  permute_param {
+    order: 0
+    order: 2
+    order: 3
+    order: 1
+  }
+}
+layer {
+  name: "conv9_2_mbox_conf_flat"
+  type: "Flatten"
+  bottom: "conv9_2_mbox_conf_perm"
+  top: "conv9_2_mbox_conf_flat"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "conv9_2_mbox_priorbox"
+  type: "PriorBox"
+  bottom: "conv9_2_h"
+  bottom: "data"
+  top: "conv9_2_mbox_priorbox"
+  prior_box_param {
+    min_size: 264.0
+    max_size: 315.0
+    aspect_ratio: 2
+    flip: true
+    clip: false
+    variance: 0.1
+    variance: 0.1
+    variance: 0.2
+    variance: 0.2
+    step: 300
+    offset: 0.5
+  }
+}
+layer {
+  name: "mbox_loc"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_loc_flat"
+  bottom: "fc7_mbox_loc_flat"
+  bottom: "conv6_2_mbox_loc_flat"
+  bottom: "conv7_2_mbox_loc_flat"
+  bottom: "conv8_2_mbox_loc_flat"
+  bottom: "conv9_2_mbox_loc_flat"
+  top: "mbox_loc"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mbox_conf"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_conf_flat"
+  bottom: "fc7_mbox_conf_flat"
+  bottom: "conv6_2_mbox_conf_flat"
+  bottom: "conv7_2_mbox_conf_flat"
+  bottom: "conv8_2_mbox_conf_flat"
+  bottom: "conv9_2_mbox_conf_flat"
+  top: "mbox_conf"
+  concat_param {
+    axis: 1
+  }
+}
+layer {
+  name: "mbox_priorbox"
+  type: "Concat"
+  bottom: "conv4_3_norm_mbox_priorbox"
+  bottom: "fc7_mbox_priorbox"
+  bottom: "conv6_2_mbox_priorbox"
+  bottom: "conv7_2_mbox_priorbox"
+  bottom: "conv8_2_mbox_priorbox"
+  bottom: "conv9_2_mbox_priorbox"
+  top: "mbox_priorbox"
+  concat_param {
+    axis: 2
+  }
+}
+layer {
+  name: "mbox_conf_reshape"
+  type: "Reshape"
+  bottom: "mbox_conf"
+  top: "mbox_conf_reshape"
+  reshape_param {
+    shape {
+      dim: 0
+      dim: -1
+      dim: 2
+    }
+  }
+}
+layer {
+  name: "mbox_conf_softmax"
+  type: "Softmax"
+  bottom: "mbox_conf_reshape"
+  top: "mbox_conf_softmax"
+  softmax_param {
+    axis: 2
+  }
+}
+layer {
+  name: "mbox_conf_flatten"
+  type: "Flatten"
+  bottom: "mbox_conf_softmax"
+  top: "mbox_conf_flatten"
+  flatten_param {
+    axis: 1
+  }
+}
+layer {
+  name: "detection_out"
+  type: "DetectionOutput"
+  bottom: "mbox_loc"
+  bottom: "mbox_conf_flatten"
+  bottom: "mbox_priorbox"
+  top: "detection_out"
+  include {
+    phase: TEST
+  }
+  detection_output_param {
+    num_classes: 2
+    share_location: true
+    background_label_id: 0
+    nms_param {
+      nms_threshold: 0.45
+      top_k: 400
+    }
+    code_type: CENTER_SIZE
+    keep_top_k: 200
+    confidence_threshold: 0.01
+    clip: 1
+  }
+}

face_detection_yunet_2023mar.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f2383e4dd3cfbb4553ea8718107fc0423210dc964f9f4280604804ed2552fa4
+size 232589

haarcascade_frontalface_default.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,70 @@

+absl-py==2.2.2
+altair==5.5.0
+astunparse==1.6.3
+attrs==25.3.0
+blinker==1.9.0
+cachetools==5.5.2
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.1.8
+filelock==3.18.0
+flatbuffers==25.2.10
+fsspec==2025.3.2
+gast==0.6.0
+gitdb==4.0.12
+GitPython==3.1.44
+google-pasta==0.2.0
+grpcio==1.71.0
+h5py==3.13.0
+idna==3.10
+importlib_metadata==8.7.0
+Jinja2==3.1.6
+jsonschema==4.23.0
+jsonschema-specifications==2025.4.1
+keras==3.9.2
+libclang==18.1.1
+Markdown==3.8
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+ml_dtypes==0.5.1
+mpmath==1.3.0
+namex==0.0.9
+narwhals==1.39.0
+networkx==3.2.1
+numpy==2.0.2
+opencv-python==4.11.0.86
+opt_einsum==3.4.0
+optree==0.15.0
+packaging==24.2
+pandas==2.2.3
+pillow==11.2.1
+protobuf==5.29.4
+pyarrow==20.0.0
+pydeck==0.9.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+pytz==2025.2
+referencing==0.36.2
+requests==2.32.3
+rich==14.0.0
+rpds-py==0.24.0
+six==1.17.0
+smmap==5.0.2
+streamlit==1.45.1
+sympy==1.14.0
+tenacity==9.1.2
+tensorboard==2.19.0
+tensorboard-data-server==0.7.2
+tensorflow==2.19.0
+tensorflow-io-gcs-filesystem==0.37.1
+termcolor==3.1.0
+toml==0.10.2
+torch==2.7.0
+tornado==6.4.2
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+Werkzeug==3.1.3
+wrapt==1.17.2
+zipp==3.21.0

res10_300x300_ssd_iter_140000.caffemodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a56a11a57a4a295956b0660b4a3d76bbdca2206c4961cea8efe7d95c7cb2f2d
+size 10666211

sample_videos/Sample.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb48fbbfe295461889585a2c3ffe592ba208d2501018b9517f158108f11acd10
+size 11293922