Spaces:
Runtime error
Runtime error
Update video_processing.py
Browse files- video_processing.py +36 -43
video_processing.py
CHANGED
|
@@ -15,8 +15,6 @@ import pandas as pd
|
|
| 15 |
from facenet_pytorch import MTCNN
|
| 16 |
import torch
|
| 17 |
import mediapipe as mp
|
| 18 |
-
from voice_analysis import process_audio
|
| 19 |
-
from pydub import AudioSegment
|
| 20 |
|
| 21 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 22 |
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
|
|
@@ -24,6 +22,7 @@ mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_fac
|
|
| 24 |
mp_face_mesh = mp.solutions.face_mesh
|
| 25 |
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
|
| 26 |
|
|
|
|
| 27 |
def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
|
| 28 |
os.makedirs(output_folder, exist_ok=True)
|
| 29 |
clip = VideoFileClip(video_path)
|
|
@@ -46,6 +45,7 @@ def extract_frames(video_path, output_folder, desired_fps, progress_callback=Non
|
|
| 46 |
clip.close()
|
| 47 |
return frame_count, original_fps
|
| 48 |
|
|
|
|
| 49 |
def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
|
| 50 |
embeddings_by_frame = {}
|
| 51 |
posture_scores_by_frame = {}
|
|
@@ -88,8 +88,8 @@ def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
|
|
| 88 |
progress((i + 1) / len(frame_files), f"Processing frame {i + 1} of {len(frame_files)}")
|
| 89 |
|
| 90 |
return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths, facial_landmarks_by_frame
|
| 91 |
-
|
| 92 |
-
|
| 93 |
def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
| 94 |
start_time = time.time()
|
| 95 |
output_folder = "output"
|
|
@@ -124,7 +124,6 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
| 124 |
frames_folder, aligned_faces_folder,
|
| 125 |
frame_count,
|
| 126 |
progress)
|
| 127 |
-
|
| 128 |
|
| 129 |
if not aligned_face_paths:
|
| 130 |
raise ValueError("No faces were extracted from the video.")
|
|
@@ -155,45 +154,40 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
| 155 |
try:
|
| 156 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
| 157 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
| 158 |
-
|
| 159 |
if len(X_posture) == 0:
|
| 160 |
raise ValueError("No valid posture data found")
|
| 161 |
-
|
| 162 |
mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
anomaly_threshold=anomaly_threshold)
|
| 188 |
-
mse_histogram_voice = plot_mse_histogram(mse_voice, "MSE Distribution: Voice",
|
| 189 |
-
anomaly_threshold, color='green')
|
| 190 |
-
mse_heatmap_voice = plot_mse_heatmap(mse_voice, "Voice MSE Heatmap", df)
|
| 191 |
-
|
| 192 |
except Exception as e:
|
| 193 |
print(f"Error details: {str(e)}")
|
| 194 |
import traceback
|
| 195 |
traceback.print_exc()
|
| 196 |
-
return (f"Error in video processing: {str(e)}",) + (None,) *
|
| 197 |
|
| 198 |
progress(1.0, "Preparing results")
|
| 199 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
|
@@ -251,18 +245,15 @@ def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
|
| 251 |
mse_histogram_posture,
|
| 252 |
mse_heatmap_embeddings,
|
| 253 |
mse_heatmap_posture,
|
| 254 |
-
mse_voice,
|
| 255 |
-
mse_plot_voice,
|
| 256 |
-
mse_histogram_voice,
|
| 257 |
-
mse_heatmap_voice,
|
| 258 |
-
anomaly_segments_voice,
|
| 259 |
face_samples["most_frequent"],
|
| 260 |
anomaly_faces_embeddings,
|
| 261 |
anomaly_frames_posture_images,
|
| 262 |
aligned_faces_folder,
|
| 263 |
-
frames_folder
|
|
|
|
| 264 |
)
|
| 265 |
|
|
|
|
| 266 |
def is_frontal_face(landmarks, threshold=60):
|
| 267 |
nose_tip = landmarks[4]
|
| 268 |
left_chin = landmarks[234]
|
|
@@ -277,6 +268,7 @@ def is_frontal_face(landmarks, threshold=60):
|
|
| 277 |
angle_degrees = math.degrees(angle)
|
| 278 |
return abs(180 - angle_degrees) < threshold
|
| 279 |
|
|
|
|
| 280 |
def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
|
| 281 |
person_data = {}
|
| 282 |
|
|
@@ -310,6 +302,7 @@ def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original
|
|
| 310 |
|
| 311 |
return df, largest_cluster
|
| 312 |
|
|
|
|
| 313 |
def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=100):
|
| 314 |
face_samples = {"most_frequent": [], "others": []}
|
| 315 |
for cluster_folder in sorted(os.listdir(organized_faces_folder)):
|
|
|
|
| 15 |
from facenet_pytorch import MTCNN
|
| 16 |
import torch
|
| 17 |
import mediapipe as mp
|
|
|
|
|
|
|
| 18 |
|
| 19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 20 |
mtcnn = MTCNN(keep_all=False, device=device, thresholds=[0.9, 0.9, 0.9], min_face_size=50)
|
|
|
|
| 22 |
mp_face_mesh = mp.solutions.face_mesh
|
| 23 |
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.8)
|
| 24 |
|
| 25 |
+
|
| 26 |
def extract_frames(video_path, output_folder, desired_fps, progress_callback=None):
|
| 27 |
os.makedirs(output_folder, exist_ok=True)
|
| 28 |
clip = VideoFileClip(video_path)
|
|
|
|
| 45 |
clip.close()
|
| 46 |
return frame_count, original_fps
|
| 47 |
|
| 48 |
+
|
| 49 |
def process_frames(frames_folder, aligned_faces_folder, frame_count, progress):
|
| 50 |
embeddings_by_frame = {}
|
| 51 |
posture_scores_by_frame = {}
|
|
|
|
| 88 |
progress((i + 1) / len(frame_files), f"Processing frame {i + 1} of {len(frame_files)}")
|
| 89 |
|
| 90 |
return embeddings_by_frame, posture_scores_by_frame, posture_landmarks_by_frame, aligned_face_paths, facial_landmarks_by_frame
|
| 91 |
+
|
| 92 |
+
|
| 93 |
def process_video(video_path, anomaly_threshold, desired_fps, progress=None):
|
| 94 |
start_time = time.time()
|
| 95 |
output_folder = "output"
|
|
|
|
| 124 |
frames_folder, aligned_faces_folder,
|
| 125 |
frame_count,
|
| 126 |
progress)
|
|
|
|
| 127 |
|
| 128 |
if not aligned_face_paths:
|
| 129 |
raise ValueError("No faces were extracted from the video.")
|
|
|
|
| 154 |
try:
|
| 155 |
X_posture = np.array([posture_scores_by_frame.get(frame, None) for frame in df['Frame']])
|
| 156 |
X_posture = X_posture[X_posture != None].reshape(-1, 1)
|
| 157 |
+
|
| 158 |
if len(X_posture) == 0:
|
| 159 |
raise ValueError("No valid posture data found")
|
| 160 |
+
|
| 161 |
mse_embeddings, mse_posture = anomaly_detection(X_embeddings, X_posture)
|
| 162 |
+
|
| 163 |
+
progress(0.95, "Generating plots")
|
| 164 |
+
mse_plot_embeddings, anomaly_frames_embeddings = plot_mse(df, mse_embeddings, "Facial Features",
|
| 165 |
+
color=GRAPH_COLORS['facial_embeddings'],
|
| 166 |
+
anomaly_threshold=anomaly_threshold)
|
| 167 |
+
|
| 168 |
+
mse_histogram_embeddings = plot_mse_histogram(mse_embeddings, "MSE Distribution: Facial Features",
|
| 169 |
+
anomaly_threshold, color=GRAPH_COLORS['facial_embeddings'])
|
| 170 |
+
|
| 171 |
+
mse_plot_posture, anomaly_frames_posture = plot_mse(df, mse_posture, "Body Posture",
|
| 172 |
+
color=GRAPH_COLORS['body_posture'],
|
| 173 |
+
anomaly_threshold=anomaly_threshold)
|
| 174 |
+
|
| 175 |
+
mse_histogram_posture = plot_mse_histogram(mse_posture, "MSE Distribution: Body Posture",
|
| 176 |
+
anomaly_threshold, color=GRAPH_COLORS['body_posture'])
|
| 177 |
+
|
| 178 |
+
mse_heatmap_posture = plot_mse_heatmap(mse_posture, "Body Posture MSE Heatmap", df)
|
| 179 |
+
|
| 180 |
+
mse_heatmap_embeddings = plot_mse_heatmap(mse_embeddings, "Facial Features MSE Heatmap", df)
|
| 181 |
+
|
| 182 |
+
# Create video with heatmap
|
| 183 |
+
heatmap_video_path = os.path.join(output_folder, "video_with_heatmap.mp4")
|
| 184 |
+
create_video_with_heatmap(video_path, df, mse_embeddings, mse_posture, mse_voice, heatmap_video_path)
|
| 185 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
except Exception as e:
|
| 187 |
print(f"Error details: {str(e)}")
|
| 188 |
import traceback
|
| 189 |
traceback.print_exc()
|
| 190 |
+
return (f"Error in video processing: {str(e)}",) + (None,) * 15
|
| 191 |
|
| 192 |
progress(1.0, "Preparing results")
|
| 193 |
results = f"Number of persons detected: {num_clusters}\n\n"
|
|
|
|
| 245 |
mse_histogram_posture,
|
| 246 |
mse_heatmap_embeddings,
|
| 247 |
mse_heatmap_posture,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
face_samples["most_frequent"],
|
| 249 |
anomaly_faces_embeddings,
|
| 250 |
anomaly_frames_posture_images,
|
| 251 |
aligned_faces_folder,
|
| 252 |
+
frames_folder,
|
| 253 |
+
heatmap_video_path
|
| 254 |
)
|
| 255 |
|
| 256 |
+
|
| 257 |
def is_frontal_face(landmarks, threshold=60):
|
| 258 |
nose_tip = landmarks[4]
|
| 259 |
left_chin = landmarks[234]
|
|
|
|
| 268 |
angle_degrees = math.degrees(angle)
|
| 269 |
return abs(180 - angle_degrees) < threshold
|
| 270 |
|
| 271 |
+
|
| 272 |
def save_person_data_to_csv(embeddings_by_frame, clusters, desired_fps, original_fps, output_folder, video_duration):
|
| 273 |
person_data = {}
|
| 274 |
|
|
|
|
| 302 |
|
| 303 |
return df, largest_cluster
|
| 304 |
|
| 305 |
+
|
| 306 |
def get_all_face_samples(organized_faces_folder, output_folder, largest_cluster, max_samples=100):
|
| 307 |
face_samples = {"most_frequent": [], "others": []}
|
| 308 |
for cluster_folder in sorted(os.listdir(organized_faces_folder)):
|