Spaces:

scfive
/

bodybuilding-pose-app

Sleeping

App Files Files Community

Sean Carnahan commited on 15 days ago

Commit

fbef789

1 Parent(s): fded136

Enable GPU support for MoveNet and CNN models

Browse files

Files changed (1) hide show

app.py +104 -149

app.py CHANGED Viewed

@@ -11,6 +11,21 @@ import tensorflow as tf
 from tensorflow.keras.models import load_model
 from tensorflow.keras.preprocessing import image
 import time
 # Add bodybuilding_pose_analyzer to path
 sys.path.append('.') # Assuming app.py is at the root of cv.github.io
@@ -94,20 +109,31 @@ cnn_class_labels = ['side_chest', 'front_double_biceps', 'back_double_biceps', '
 def predict_pose_cnn(img_path):
     try:
-        print("[CNN_DEBUG] Forcing CPU for CNN prediction")
-        with tf.device('/CPU:0'): # This line requires 'import tensorflow as tf'
-            img = image.load_img(img_path, target_size=(150, 150))
-            img_array = image.img_to_array(img)
-            img_array = np.expand_dims(img_array, axis=0) / 255.0
-            predictions = cnn_model.predict(img_array)
-            predicted_class = np.argmax(predictions, axis=1)
-            confidence = float(np.max(predictions))
-        print(f"[CNN_DEBUG] Prediction successful on CPU: {cnn_class_labels[predicted_class[0]]}")
         return cnn_class_labels[predicted_class[0]], confidence
     except Exception as e:
         print(f"[CNN_ERROR] Exception during CNN prediction: {e}")
         traceback.print_exc()
-        raise # Re-raise the exception to be caught by the calling function
 @app.route('/static/uploads/<path:filename>')
 def serve_video(filename):
@@ -206,168 +232,97 @@ def after_request(response):
 #         traceback.print_exc()
 #         raise
-def process_video_movenet(video_path, model_variant='lightning', pose_type='front_double_biceps'):
     try:
-        print(f"[PROCESS_VIDEO_MOVENET] Called with video_path: {video_path}, model_variant: {model_variant}, pose_type: {pose_type}")
-        if not os.path.exists(video_path):
-            raise FileNotFoundError(f"Video file not found: {video_path}")
-        analyzer = MoveNetAnalyzer(model_name=model_variant)
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
-            raise ValueError(f"Failed to open video file: {video_path}")
         fps = int(cap.get(cv2.CAP_PROP_FPS))
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        # Add panel width to total width
-        panel_width = 300
-        total_width = width + panel_width
-        print(f"Processing video with MoveNet ({model_variant}): {width}x{height} @ {fps}fps")
-        print(f"Output dimensions will be: {total_width}x{height}")
-        output_filename = f'output_movenet_{model_variant}.mp4'
         output_path = os.path.join(app.config['UPLOAD_FOLDER'], output_filename)
         print(f"Output path: {output_path}")
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_path, fourcc, fps, (total_width, height))
         if not out.isOpened():
             raise ValueError(f"Failed to create output video writer at {output_path}")
         frame_count = 0
-        current_pose = pose_type
-        segment_length = 4 * fps if fps > 0 else 120
-        cnn_pose = None
-        last_valid_landmarks = None
-        landmarks_analysis = {'error': 'Processing not started'} # Initialize landmarks_analysis
         while cap.isOpened():
             ret, frame = cap.read()
             if not ret:
                 break
             frame_count += 1
-            if frame_count % 30 == 0:
-                print(f"Processing frame {frame_count}")
-            # Process frame
-            processed_frame, current_landmarks_analysis, landmarks = analyzer.process_frame(frame, current_pose, last_valid_landmarks=last_valid_landmarks)
-            landmarks_analysis = current_landmarks_analysis # Update with the latest analysis
-            if frame_count % 30 == 0: # Log every 30 frames
-                print(f"[MOVENET_DEBUG] Frame {frame_count} - landmarks_analysis: {landmarks_analysis}")
-            if landmarks:
-                last_valid_landmarks = landmarks
-            # CNN prediction (every 4 seconds)
-            if (frame_count - 1) % segment_length == 0:
-                temp_img_path = f'temp_frame_for_cnn_{frame_count}.jpg' # Unique temp name
-                cv2.imwrite(temp_img_path, frame)
-                try:
-                    cnn_pose_pred, cnn_conf = predict_pose_cnn(temp_img_path)
-                    print(f"[CNN] Frame {frame_count}: Pose: {cnn_pose_pred}, Conf: {cnn_conf:.2f}")
-                    if cnn_conf >= 0.3:
-                        current_pose = cnn_pose_pred # Update current_pose for the analyzer
-                except Exception as e:
-                    print(f"[CNN] Error predicting pose on frame {frame_count}: {e}")
-                finally:
-                    if os.path.exists(temp_img_path):
-                        os.remove(temp_img_path)
-            # Create side panel
-            panel = np.zeros((height, panel_width, 3), dtype=np.uint8)
-            # --- Dynamic Text Parameter Calculations ---
-            current_font = cv2.FONT_HERSHEY_DUPLEX
-            # Base font scale and reference video height for scaling
-            # Adjust base_font_scale_at_ref_height if text is generally too large or too small
-            base_font_scale_at_ref_height = 0.6
-            reference_height_for_font_scale = 640.0 # e.g., a common video height like 480p, 720p
-            # Calculate dynamic font_scale
-            font_scale = (height / reference_height_for_font_scale) * base_font_scale_at_ref_height
-            # Clamp font_scale to a min/max range to avoid extremes
-            font_scale = max(0.4, min(font_scale, 1.2))
-            # Calculate dynamic thickness
-            thickness = 1 if font_scale < 0.7 else 2
-            # Calculate dynamic line_height based on actual text height
-            # Using a sample string like "Ag" which has ascenders and descenders
-            (_, text_actual_height), _ = cv2.getTextSize("Ag", current_font, font_scale, thickness)
-            line_spacing_factor = 1.8 # Adjust for more or less space between lines
-            line_height = int(text_actual_height * line_spacing_factor)
-            line_height = max(line_height, 15) # Ensure a minimum line height
-            # Initial y_offset for the first line of text
-            y_offset_panel = max(line_height, 20) # Start considering top margin and text height
-            # --- End of Dynamic Text Parameter Calculations ---
-            display_model_name = f"Gladiator {model_variant.capitalize()}"
-            cv2.putText(panel, f"Model: {display_model_name}", (10, y_offset_panel), current_font, font_scale, (0, 255, 255), thickness, lineType=cv2.LINE_AA)
-            y_offset_panel += line_height
-            if 'error' not in landmarks_analysis:
-                cv2.putText(panel, "Angles:", (10, y_offset_panel), current_font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
-                y_offset_panel += line_height
-                for joint, angle in landmarks_analysis.get('angles', {}).items():
-                    text_to_display = f"{joint.capitalize()}: {angle:.1f} deg"
-                    cv2.putText(panel, text_to_display, (20, y_offset_panel), current_font, font_scale, (0, 255, 0), thickness, lineType=cv2.LINE_AA)
-                    y_offset_panel += line_height
-                # Define available width for text within the panel, considering padding
-                text_area_x_start = 20
-                panel_padding = 10 # Padding from the right edge of the panel
-                text_area_width = panel_width - text_area_x_start - panel_padding
-                if landmarks_analysis.get('corrections'):
-                    y_offset_panel += int(line_height * 0.5) # Smaller gap before section title
-                    cv2.putText(panel, "Corrections:", (10, y_offset_panel), current_font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
-                    y_offset_panel += line_height
-                    for correction_text in landmarks_analysis.get('corrections', []):
-                        wrapped_lines = wrap_text(correction_text, current_font, font_scale, thickness, text_area_width)
-                        for line in wrapped_lines:
-                            cv2.putText(panel, line, (text_area_x_start, y_offset_panel), current_font, font_scale, (0, 0, 255), thickness, lineType=cv2.LINE_AA)
-                            y_offset_panel += line_height
-                # Display notes if any
-                if landmarks_analysis.get('notes'):
-                    y_offset_panel += int(line_height * 0.5) # Smaller gap before section title
-                    cv2.putText(panel, "Notes:", (10, y_offset_panel), current_font, font_scale, (200, 200, 200), thickness, lineType=cv2.LINE_AA)
-                    y_offset_panel += line_height
-                    for note_text in landmarks_analysis.get('notes', []):
-                        wrapped_lines = wrap_text(note_text, current_font, font_scale, thickness, text_area_width)
-                        for line in wrapped_lines:
-                            cv2.putText(panel, line, (text_area_x_start, y_offset_panel), current_font, font_scale, (200, 200, 200), thickness, lineType=cv2.LINE_AA)
-                            y_offset_panel += line_height
-            else:
-                cv2.putText(panel, "Error:", (10, y_offset_panel), current_font, font_scale, (255, 255, 255), thickness, lineType=cv2.LINE_AA)
-                y_offset_panel += line_height
-                # Also wrap error message if it can be long
-                error_text = landmarks_analysis.get('error', 'Unknown error')
-                text_area_x_start = 20 # Assuming error message also starts at x=20
-                panel_padding = 10
-                text_area_width = panel_width - text_area_x_start - panel_padding
-                wrapped_error_lines = wrap_text(error_text, current_font, font_scale, thickness, text_area_width)
-                for line in wrapped_error_lines:
-                    cv2.putText(panel, line, (text_area_x_start, y_offset_panel), current_font, font_scale, (0, 0, 255), thickness, lineType=cv2.LINE_AA)
-                    y_offset_panel += line_height
-            combined_frame = np.hstack((processed_frame, panel))
-            out.write(combined_frame)
         cap.release()
         out.release()
-        if frame_count == 0:
-            raise ValueError("No frames were processed from the video by MoveNet")
-        print(f"MoveNet video processing completed. Processed {frame_count} frames. Output: {output_path}")
-        print(f"Output file size: {os.path.getsize(output_path)} bytes")
-        return url_for('serve_video', filename=output_filename, _external=False)
     except Exception as e:
-        print(f'Error in process_video_movenet: {e}')
         traceback.print_exc()
         raise
@@ -430,8 +385,8 @@ def process_video_mediapipe(video_path):
                 except Exception as e:
                     print(f"[CNN] Error predicting pose on frame {frame_count}: {e}")
                 finally:
-                    if os.path.exists(temp_img_path):
-                        os.remove(temp_img_path)
             # Create side panel
             panel = np.zeros((height, panel_width, 3), dtype=np.uint8)
@@ -561,7 +516,7 @@ def upload_file():
                 if model_choice == 'movenet':
                     movenet_variant = request.form.get('movenet_variant', 'lightning')
                     print(f"[UPLOAD] Using MoveNet variant: {movenet_variant}")
-                    output_path_url = process_video_movenet(filepath, model_variant=movenet_variant)
                 else:
                     output_path_url = process_video_mediapipe(filepath)
@@ -583,8 +538,8 @@ def upload_file():
             finally:
                 try:
-                    if os.path.exists(filepath):
-                        os.remove(filepath)
                         print(f"[UPLOAD] Cleaned up input file: {filepath}")
                 except Exception as e:
                     print(f"[UPLOAD] Error cleaning up file: {str(e)}")

 from tensorflow.keras.models import load_model
 from tensorflow.keras.preprocessing import image
 import time
+import tensorflow_hub as hub
+# Check GPU availability
+print("[GPU] Checking GPU availability...")
+gpus = tf.config.list_physical_devices('GPU')
+if gpus:
+    print(f"[GPU] Found {len(gpus)} GPU(s):")
+    for gpu in gpus:
+        print(f"[GPU] {gpu}")
+    # Enable memory growth to avoid allocating all GPU memory at once
+    for gpu in gpus:
+        tf.config.experimental.set_memory_growth(gpu, True)
+    print("[GPU] Memory growth enabled for all GPUs")
+else:
+    print("[GPU] No GPU found, will use CPU")
 # Add bodybuilding_pose_analyzer to path
 sys.path.append('.') # Assuming app.py is at the root of cv.github.io
 def predict_pose_cnn(img_path):
     try:
+        if gpus:
+            print("[CNN_DEBUG] Using GPU for CNN prediction")
+            with tf.device('/GPU:0'):
+                img = image.load_img(img_path, target_size=(150, 150))
+                img_array = image.img_to_array(img)
+                img_array = np.expand_dims(img_array, axis=0) / 255.0
+                predictions = cnn_model.predict(img_array)
+                predicted_class = np.argmax(predictions, axis=1)
+                confidence = float(np.max(predictions))
+        else:
+            print("[CNN_DEBUG] No GPU found, using CPU for CNN prediction")
+            with tf.device('/CPU:0'):
+                img = image.load_img(img_path, target_size=(150, 150))
+                img_array = image.img_to_array(img)
+                img_array = np.expand_dims(img_array, axis=0) / 255.0
+                predictions = cnn_model.predict(img_array)
+                predicted_class = np.argmax(predictions, axis=1)
+                confidence = float(np.max(predictions))
+        print(f"[CNN_DEBUG] Prediction successful: {cnn_class_labels[predicted_class[0]]}")
         return cnn_class_labels[predicted_class[0]], confidence
     except Exception as e:
         print(f"[CNN_ERROR] Exception during CNN prediction: {e}")
         traceback.print_exc()
+        raise
 @app.route('/static/uploads/<path:filename>')
 def serve_video(filename):
 #         traceback.print_exc()
 #         raise
+def process_video_movenet(video_path):
     try:
+        print("[DEBUG] Starting MoveNet video processing")
         cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
+            raise ValueError("Could not open video file")
+        # Get video properties
         fps = int(cap.get(cv2.CAP_PROP_FPS))
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        print(f"[DEBUG] Video properties - FPS: {fps}, Width: {width}, Height: {height}, Total Frames: {total_frames}")
+        # Initialize MoveNet model on GPU if available
+        print("[DEBUG] Initializing MoveNet model")
+        if gpus:
+            print("[DEBUG] Using GPU for MoveNet")
+            with tf.device('/GPU:0'):
+                movenet_model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
+                movenet = movenet_model.signatures['serving_default']
+        else:
+            print("[DEBUG] No GPU found, using CPU for MoveNet")
+            with tf.device('/CPU:0'):
+                movenet_model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
+                movenet = movenet_model.signatures['serving_default']
+        # Create output video writer
+        output_filename = f'output_movenet_lightning.mp4'
         output_path = os.path.join(app.config['UPLOAD_FOLDER'], output_filename)
         print(f"Output path: {output_path}")
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
         if not out.isOpened():
             raise ValueError(f"Failed to create output video writer at {output_path}")
         frame_count = 0
+        processed_frames = 0
         while cap.isOpened():
             ret, frame = cap.read()
             if not ret:
                 break
             frame_count += 1
+            if frame_count % 10 != 0:  # Process every 10th frame
+                continue
+            try:
+                # Resize and pad the image to keep aspect ratio
+                img = frame.copy()
+                img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 192, 192)
+                img = tf.cast(img, dtype=tf.int32)
+                # Run inference on GPU if available
+                if gpus:
+                    with tf.device('/GPU:0'):
+                        results = movenet(img)
+                        keypoints = results['output_0'].numpy()
+                else:
+                    with tf.device('/CPU:0'):
+                        results = movenet(img)
+                        keypoints = results['output_0'].numpy()
+                # Process keypoints and draw on frame
+                y, x, c = frame.shape
+                shaped = np.squeeze(keypoints)
+                for kp in range(17):
+                    ky, kx, kp_conf = shaped[kp]
+                    if kp_conf > 0.3:
+                        cx, cy = int(kx * x), int(ky * y)
+                        cv2.circle(frame, (cx, cy), 6, (0, 255, 0), -1)
+                out.write(frame)
+                processed_frames += 1
+            except Exception as e:
+                print(f"[ERROR] Error processing frame {frame_count}: {str(e)}")
+                continue
         cap.release()
         out.release()
+        print(f"[DEBUG] Processed {processed_frames} frames out of {total_frames} total frames")
+        return output_filename
     except Exception as e:
+        print(f"[ERROR] Error in process_video_movenet: {str(e)}")
         traceback.print_exc()
         raise
                 except Exception as e:
                     print(f"[CNN] Error predicting pose on frame {frame_count}: {e}")
                 finally:
+                if os.path.exists(temp_img_path):
+                    os.remove(temp_img_path)
             # Create side panel
             panel = np.zeros((height, panel_width, 3), dtype=np.uint8)
                 if model_choice == 'movenet':
                     movenet_variant = request.form.get('movenet_variant', 'lightning')
                     print(f"[UPLOAD] Using MoveNet variant: {movenet_variant}")
+                    output_path_url = process_video_movenet(filepath)
                 else:
                     output_path_url = process_video_mediapipe(filepath)
             finally:
                 try:
+                if os.path.exists(filepath):
+                    os.remove(filepath)
                         print(f"[UPLOAD] Cleaned up input file: {filepath}")
                 except Exception as e:
                     print(f"[UPLOAD] Error cleaning up file: {str(e)}")