Sean Carnahan commited on
Commit
fbef789
·
1 Parent(s): fded136

Enable GPU support for MoveNet and CNN models

Browse files
Files changed (1) hide show
  1. app.py +104 -149
app.py CHANGED
@@ -11,6 +11,21 @@ import tensorflow as tf
11
  from tensorflow.keras.models import load_model
12
  from tensorflow.keras.preprocessing import image
13
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Add bodybuilding_pose_analyzer to path
16
  sys.path.append('.') # Assuming app.py is at the root of cv.github.io
@@ -94,20 +109,31 @@ cnn_class_labels = ['side_chest', 'front_double_biceps', 'back_double_biceps', '
94
 
95
  def predict_pose_cnn(img_path):
96
  try:
97
- print("[CNN_DEBUG] Forcing CPU for CNN prediction")
98
- with tf.device('/CPU:0'): # This line requires 'import tensorflow as tf'
99
- img = image.load_img(img_path, target_size=(150, 150))
100
- img_array = image.img_to_array(img)
101
- img_array = np.expand_dims(img_array, axis=0) / 255.0
102
- predictions = cnn_model.predict(img_array)
103
- predicted_class = np.argmax(predictions, axis=1)
104
- confidence = float(np.max(predictions))
105
- print(f"[CNN_DEBUG] Prediction successful on CPU: {cnn_class_labels[predicted_class[0]]}")
 
 
 
 
 
 
 
 
 
 
 
106
  return cnn_class_labels[predicted_class[0]], confidence
107
  except Exception as e:
108
  print(f"[CNN_ERROR] Exception during CNN prediction: {e}")
109
  traceback.print_exc()
110
- raise # Re-raise the exception to be caught by the calling function
111
 
112
  @app.route('/static/uploads/<path:filename>')
113
  def serve_video(filename):
@@ -206,168 +232,97 @@ def after_request(response):
206
  # traceback.print_exc()
207
  # raise
208
 
209
- def process_video_movenet(video_path, model_variant='lightning', pose_type='front_double_biceps'):
210
  try:
211
- print(f"[PROCESS_VIDEO_MOVENET] Called with video_path: {video_path}, model_variant: {model_variant}, pose_type: {pose_type}")
212
- if not os.path.exists(video_path):
213
- raise FileNotFoundError(f"Video file not found: {video_path}")
214
-
215
- analyzer = MoveNetAnalyzer(model_name=model_variant)
216
  cap = cv2.VideoCapture(video_path)
217
  if not cap.isOpened():
218
- raise ValueError(f"Failed to open video file: {video_path}")
 
 
219
  fps = int(cap.get(cv2.CAP_PROP_FPS))
220
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
221
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
222
 
223
- # Add panel width to total width
224
- panel_width = 300
225
- total_width = width + panel_width
 
 
 
 
 
 
 
 
 
 
 
226
 
227
- print(f"Processing video with MoveNet ({model_variant}): {width}x{height} @ {fps}fps")
228
- print(f"Output dimensions will be: {total_width}x{height}")
229
- output_filename = f'output_movenet_{model_variant}.mp4'
230
  output_path = os.path.join(app.config['UPLOAD_FOLDER'], output_filename)
231
  print(f"Output path: {output_path}")
232
 
233
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
234
- out = cv2.VideoWriter(output_path, fourcc, fps, (total_width, height))
235
  if not out.isOpened():
236
  raise ValueError(f"Failed to create output video writer at {output_path}")
237
-
238
  frame_count = 0
239
- current_pose = pose_type
240
- segment_length = 4 * fps if fps > 0 else 120
241
- cnn_pose = None
242
- last_valid_landmarks = None
243
- landmarks_analysis = {'error': 'Processing not started'} # Initialize landmarks_analysis
244
 
245
  while cap.isOpened():
246
  ret, frame = cap.read()
247
  if not ret:
248
  break
 
249
  frame_count += 1
250
- if frame_count % 30 == 0:
251
- print(f"Processing frame {frame_count}")
252
-
253
- # Process frame
254
- processed_frame, current_landmarks_analysis, landmarks = analyzer.process_frame(frame, current_pose, last_valid_landmarks=last_valid_landmarks)
255
- landmarks_analysis = current_landmarks_analysis # Update with the latest analysis
256
- if frame_count % 30 == 0: # Log every 30 frames
257
- print(f"[MOVENET_DEBUG] Frame {frame_count} - landmarks_analysis: {landmarks_analysis}")
258
- if landmarks:
259
- last_valid_landmarks = landmarks
260
-
261
- # CNN prediction (every 4 seconds)
262
- if (frame_count - 1) % segment_length == 0:
263
- temp_img_path = f'temp_frame_for_cnn_{frame_count}.jpg' # Unique temp name
264
- cv2.imwrite(temp_img_path, frame)
265
- try:
266
- cnn_pose_pred, cnn_conf = predict_pose_cnn(temp_img_path)
267
- print(f"[CNN] Frame {frame_count}: Pose: {cnn_pose_pred}, Conf: {cnn_conf:.2f}")
268
- if cnn_conf >= 0.3:
269
- current_pose = cnn_pose_pred # Update current_pose for the analyzer
270
- except Exception as e:
271
- print(f"[CNN] Error predicting pose on frame {frame_count}: {e}")
272
- finally:
273
- if os.path.exists(temp_img_path):
274
- os.remove(temp_img_path)
275
-
276
- # Create side panel
277
- panel = np.zeros((height, panel_width, 3), dtype=np.uint8)
278
-
279
- # --- Dynamic Text Parameter Calculations ---
280
- current_font = cv2.FONT_HERSHEY_DUPLEX
281
-
282
- # Base font scale and reference video height for scaling
283
- # Adjust base_font_scale_at_ref_height if text is generally too large or too small
284
- base_font_scale_at_ref_height = 0.6
285
- reference_height_for_font_scale = 640.0 # e.g., a common video height like 480p, 720p
286
-
287
- # Calculate dynamic font_scale
288
- font_scale = (height / reference_height_for_font_scale) * base_font_scale_at_ref_height
289
- # Clamp font_scale to a min/max range to avoid extremes
290
- font_scale = max(0.4, min(font_scale, 1.2))
291
-
292
- # Calculate dynamic thickness
293
- thickness = 1 if font_scale < 0.7 else 2
294
-
295
- # Calculate dynamic line_height based on actual text height
296
- # Using a sample string like "Ag" which has ascenders and descenders
297
- (_, text_actual_height), _ = cv2.getTextSize("Ag", current_font, font_scale, thickness)
298
- line_spacing_factor = 1.8 # Adjust for more or less space between lines
299
- line_height = int(text_actual_height * line_spacing_factor)
300
- line_height = max(line_height, 15) # Ensure a minimum line height
301
-
302
- # Initial y_offset for the first line of text
303
- y_offset_panel = max(line_height, 20) # Start considering top margin and text height
304
- # --- End of Dynamic Text Parameter Calculations ---
305
-
306
- display_model_name = f"Gladiator {model_variant.capitalize()}"
307
- cv2.putText(panel, f"Model: {display_model_name}", (10, y_offset_panel), current_font, font_scale, (0, 255, 255), thickness, lineType=cv2.LINE_AA)
308
- y_offset_panel += line_height
309
-
310
- if 'error' not in landmarks_analysis:
311
- cv2.putText(panel, "Angles:", (10, y_offset_panel), current_font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
312
- y_offset_panel += line_height
313
- for joint, angle in landmarks_analysis.get('angles', {}).items():
314
- text_to_display = f"{joint.capitalize()}: {angle:.1f} deg"
315
- cv2.putText(panel, text_to_display, (20, y_offset_panel), current_font, font_scale, (0, 255, 0), thickness, lineType=cv2.LINE_AA)
316
- y_offset_panel += line_height
317
 
318
- # Define available width for text within the panel, considering padding
319
- text_area_x_start = 20
320
- panel_padding = 10 # Padding from the right edge of the panel
321
- text_area_width = panel_width - text_area_x_start - panel_padding
322
-
323
- if landmarks_analysis.get('corrections'):
324
- y_offset_panel += int(line_height * 0.5) # Smaller gap before section title
325
- cv2.putText(panel, "Corrections:", (10, y_offset_panel), current_font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
326
- y_offset_panel += line_height
327
- for correction_text in landmarks_analysis.get('corrections', []):
328
- wrapped_lines = wrap_text(correction_text, current_font, font_scale, thickness, text_area_width)
329
- for line in wrapped_lines:
330
- cv2.putText(panel, line, (text_area_x_start, y_offset_panel), current_font, font_scale, (0, 0, 255), thickness, lineType=cv2.LINE_AA)
331
- y_offset_panel += line_height
332
 
333
- # Display notes if any
334
- if landmarks_analysis.get('notes'):
335
- y_offset_panel += int(line_height * 0.5) # Smaller gap before section title
336
- cv2.putText(panel, "Notes:", (10, y_offset_panel), current_font, font_scale, (200, 200, 200), thickness, lineType=cv2.LINE_AA)
337
- y_offset_panel += line_height
338
- for note_text in landmarks_analysis.get('notes', []):
339
- wrapped_lines = wrap_text(note_text, current_font, font_scale, thickness, text_area_width)
340
- for line in wrapped_lines:
341
- cv2.putText(panel, line, (text_area_x_start, y_offset_panel), current_font, font_scale, (200, 200, 200), thickness, lineType=cv2.LINE_AA)
342
- y_offset_panel += line_height
343
- else:
344
- cv2.putText(panel, "Error:", (10, y_offset_panel), current_font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
345
- y_offset_panel += line_height
346
- # Also wrap error message if it can be long
347
- error_text = landmarks_analysis.get('error', 'Unknown error')
348
- text_area_x_start = 20 # Assuming error message also starts at x=20
349
- panel_padding = 10
350
- text_area_width = panel_width - text_area_x_start - panel_padding
351
- wrapped_error_lines = wrap_text(error_text, current_font, font_scale, thickness, text_area_width)
352
- for line in wrapped_error_lines:
353
- cv2.putText(panel, line, (text_area_x_start, y_offset_panel), current_font, font_scale, (0, 0, 255), thickness, lineType=cv2.LINE_AA)
354
- y_offset_panel += line_height
355
-
356
- combined_frame = np.hstack((processed_frame, panel))
357
- out.write(combined_frame)
358
-
 
359
  cap.release()
360
  out.release()
361
 
362
- if frame_count == 0:
363
- raise ValueError("No frames were processed from the video by MoveNet")
364
-
365
- print(f"MoveNet video processing completed. Processed {frame_count} frames. Output: {output_path}")
366
- print(f"Output file size: {os.path.getsize(output_path)} bytes")
367
 
368
- return url_for('serve_video', filename=output_filename, _external=False)
369
  except Exception as e:
370
- print(f'Error in process_video_movenet: {e}')
371
  traceback.print_exc()
372
  raise
373
 
@@ -430,8 +385,8 @@ def process_video_mediapipe(video_path):
430
  except Exception as e:
431
  print(f"[CNN] Error predicting pose on frame {frame_count}: {e}")
432
  finally:
433
- if os.path.exists(temp_img_path):
434
- os.remove(temp_img_path)
435
 
436
  # Create side panel
437
  panel = np.zeros((height, panel_width, 3), dtype=np.uint8)
@@ -561,7 +516,7 @@ def upload_file():
561
  if model_choice == 'movenet':
562
  movenet_variant = request.form.get('movenet_variant', 'lightning')
563
  print(f"[UPLOAD] Using MoveNet variant: {movenet_variant}")
564
- output_path_url = process_video_movenet(filepath, model_variant=movenet_variant)
565
  else:
566
  output_path_url = process_video_mediapipe(filepath)
567
 
@@ -583,8 +538,8 @@ def upload_file():
583
 
584
  finally:
585
  try:
586
- if os.path.exists(filepath):
587
- os.remove(filepath)
588
  print(f"[UPLOAD] Cleaned up input file: {filepath}")
589
  except Exception as e:
590
  print(f"[UPLOAD] Error cleaning up file: {str(e)}")
 
11
  from tensorflow.keras.models import load_model
12
  from tensorflow.keras.preprocessing import image
13
  import time
14
+ import tensorflow_hub as hub
15
+
16
+ # Check GPU availability
17
+ print("[GPU] Checking GPU availability...")
18
+ gpus = tf.config.list_physical_devices('GPU')
19
+ if gpus:
20
+ print(f"[GPU] Found {len(gpus)} GPU(s):")
21
+ for gpu in gpus:
22
+ print(f"[GPU] {gpu}")
23
+ # Enable memory growth to avoid allocating all GPU memory at once
24
+ for gpu in gpus:
25
+ tf.config.experimental.set_memory_growth(gpu, True)
26
+ print("[GPU] Memory growth enabled for all GPUs")
27
+ else:
28
+ print("[GPU] No GPU found, will use CPU")
29
 
30
  # Add bodybuilding_pose_analyzer to path
31
  sys.path.append('.') # Assuming app.py is at the root of cv.github.io
 
109
 
110
  def predict_pose_cnn(img_path):
111
  try:
112
+ if gpus:
113
+ print("[CNN_DEBUG] Using GPU for CNN prediction")
114
+ with tf.device('/GPU:0'):
115
+ img = image.load_img(img_path, target_size=(150, 150))
116
+ img_array = image.img_to_array(img)
117
+ img_array = np.expand_dims(img_array, axis=0) / 255.0
118
+ predictions = cnn_model.predict(img_array)
119
+ predicted_class = np.argmax(predictions, axis=1)
120
+ confidence = float(np.max(predictions))
121
+ else:
122
+ print("[CNN_DEBUG] No GPU found, using CPU for CNN prediction")
123
+ with tf.device('/CPU:0'):
124
+ img = image.load_img(img_path, target_size=(150, 150))
125
+ img_array = image.img_to_array(img)
126
+ img_array = np.expand_dims(img_array, axis=0) / 255.0
127
+ predictions = cnn_model.predict(img_array)
128
+ predicted_class = np.argmax(predictions, axis=1)
129
+ confidence = float(np.max(predictions))
130
+
131
+ print(f"[CNN_DEBUG] Prediction successful: {cnn_class_labels[predicted_class[0]]}")
132
  return cnn_class_labels[predicted_class[0]], confidence
133
  except Exception as e:
134
  print(f"[CNN_ERROR] Exception during CNN prediction: {e}")
135
  traceback.print_exc()
136
+ raise
137
 
138
  @app.route('/static/uploads/<path:filename>')
139
  def serve_video(filename):
 
232
  # traceback.print_exc()
233
  # raise
234
 
235
+ def process_video_movenet(video_path):
236
  try:
237
+ print("[DEBUG] Starting MoveNet video processing")
 
 
 
 
238
  cap = cv2.VideoCapture(video_path)
239
  if not cap.isOpened():
240
+ raise ValueError("Could not open video file")
241
+
242
+ # Get video properties
243
  fps = int(cap.get(cv2.CAP_PROP_FPS))
244
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
245
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
246
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
247
 
248
+ print(f"[DEBUG] Video properties - FPS: {fps}, Width: {width}, Height: {height}, Total Frames: {total_frames}")
249
+
250
+ # Initialize MoveNet model on GPU if available
251
+ print("[DEBUG] Initializing MoveNet model")
252
+ if gpus:
253
+ print("[DEBUG] Using GPU for MoveNet")
254
+ with tf.device('/GPU:0'):
255
+ movenet_model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
256
+ movenet = movenet_model.signatures['serving_default']
257
+ else:
258
+ print("[DEBUG] No GPU found, using CPU for MoveNet")
259
+ with tf.device('/CPU:0'):
260
+ movenet_model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
261
+ movenet = movenet_model.signatures['serving_default']
262
 
263
+ # Create output video writer
264
+ output_filename = f'output_movenet_lightning.mp4'
 
265
  output_path = os.path.join(app.config['UPLOAD_FOLDER'], output_filename)
266
  print(f"Output path: {output_path}")
267
 
268
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
269
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
270
  if not out.isOpened():
271
  raise ValueError(f"Failed to create output video writer at {output_path}")
272
+
273
  frame_count = 0
274
+ processed_frames = 0
 
 
 
 
275
 
276
  while cap.isOpened():
277
  ret, frame = cap.read()
278
  if not ret:
279
  break
280
+
281
  frame_count += 1
282
+ if frame_count % 10 != 0: # Process every 10th frame
283
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
+ try:
286
+ # Resize and pad the image to keep aspect ratio
287
+ img = frame.copy()
288
+ img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 192, 192)
289
+ img = tf.cast(img, dtype=tf.int32)
 
 
 
 
 
 
 
 
 
290
 
291
+ # Run inference on GPU if available
292
+ if gpus:
293
+ with tf.device('/GPU:0'):
294
+ results = movenet(img)
295
+ keypoints = results['output_0'].numpy()
296
+ else:
297
+ with tf.device('/CPU:0'):
298
+ results = movenet(img)
299
+ keypoints = results['output_0'].numpy()
300
+
301
+ # Process keypoints and draw on frame
302
+ y, x, c = frame.shape
303
+ shaped = np.squeeze(keypoints)
304
+
305
+ for kp in range(17):
306
+ ky, kx, kp_conf = shaped[kp]
307
+ if kp_conf > 0.3:
308
+ cx, cy = int(kx * x), int(ky * y)
309
+ cv2.circle(frame, (cx, cy), 6, (0, 255, 0), -1)
310
+
311
+ out.write(frame)
312
+ processed_frames += 1
313
+
314
+ except Exception as e:
315
+ print(f"[ERROR] Error processing frame {frame_count}: {str(e)}")
316
+ continue
317
+
318
  cap.release()
319
  out.release()
320
 
321
+ print(f"[DEBUG] Processed {processed_frames} frames out of {total_frames} total frames")
322
+ return output_filename
 
 
 
323
 
 
324
  except Exception as e:
325
+ print(f"[ERROR] Error in process_video_movenet: {str(e)}")
326
  traceback.print_exc()
327
  raise
328
 
 
385
  except Exception as e:
386
  print(f"[CNN] Error predicting pose on frame {frame_count}: {e}")
387
  finally:
388
+ if os.path.exists(temp_img_path):
389
+ os.remove(temp_img_path)
390
 
391
  # Create side panel
392
  panel = np.zeros((height, panel_width, 3), dtype=np.uint8)
 
516
  if model_choice == 'movenet':
517
  movenet_variant = request.form.get('movenet_variant', 'lightning')
518
  print(f"[UPLOAD] Using MoveNet variant: {movenet_variant}")
519
+ output_path_url = process_video_movenet(filepath)
520
  else:
521
  output_path_url = process_video_mediapipe(filepath)
522
 
 
538
 
539
  finally:
540
  try:
541
+ if os.path.exists(filepath):
542
+ os.remove(filepath)
543
  print(f"[UPLOAD] Cleaned up input file: {filepath}")
544
  except Exception as e:
545
  print(f"[UPLOAD] Error cleaning up file: {str(e)}")