Spaces:

cdleong
/

explore-pose-components

Running

App Files Files Community

Colin Leong commited on Feb 26

Commit

84dfc7c

1 Parent(s): 869eec5

Add YouTube-ASL filtering, and ability to download points_dict and components list

Browse files

Files changed (1) hide show

app.py +180 -56

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import streamlit as st
 from streamlit.runtime.uploaded_file_manager import UploadedFile
 import numpy as np
 from pose_format import Pose
 from pose_format.pose_visualizer import PoseVisualizer
-from pathlib import Path
 from pyzstd import decompress
 from PIL import Image
 import mediapipe as mp
@@ -15,39 +19,47 @@ FACEMESH_CONTOURS_POINTS = [
         set([p for p_tup in list(mp_holistic.FACEMESH_CONTOURS) for p in p_tup])
     )
 ]
-def pose_normalization_info(pose_header):
-    if pose_header.components[0].name == "POSE_LANDMARKS":
-        return pose_header.normalization_info(
-            p1=("POSE_LANDMARKS", "RIGHT_SHOULDER"),
-            p2=("POSE_LANDMARKS", "LEFT_SHOULDER"),
-        )
-    if pose_header.components[0].name == "BODY_135":
-        return pose_header.normalization_info(
-            p1=("BODY_135", "RShoulder"), p2=("BODY_135", "LShoulder")
         )
-    if pose_header.components[0].name == "pose_keypoints_2d":
-        return pose_header.normalization_info(
-            p1=("pose_keypoints_2d", "RShoulder"), p2=("pose_keypoints_2d", "LShoulder")
-        )
-def pose_hide_legs(pose):
-    if pose.header.components[0].name == "POSE_LANDMARKS":
-        point_names = ["KNEE", "ANKLE", "HEEL", "FOOT_INDEX"]
-        # pylint: disable=protected-access
-        points = [
-            pose.header._get_point_index("POSE_LANDMARKS", side + "_" + n)
-            for n in point_names
-            for side in ["LEFT", "RIGHT"]
-        ]
-        pose.body.confidence[:, :, points] = 0
-        pose.body.data[:, :, points, :] = 0
-        return pose
-    else:
-        raise ValueError("Unknown pose header schema for hiding legs")
 # @st.cache_data(hash_funcs={UploadedFile: lambda p: str(p.name)})
@@ -60,7 +72,7 @@ def load_pose(uploaded_file: UploadedFile) -> Pose:
         return Pose.read(uploaded_file.read())
-@st.cache_data(hash_funcs={Pose: lambda p: np.array(p.body.data)})
 def get_pose_frames(pose: Pose, transparency: bool = False):
     v = PoseVisualizer(pose)
     frames = [frame_data for frame_data in v.draw()]
@@ -73,7 +85,13 @@ def get_pose_frames(pose: Pose, transparency: bool = False):
     return frames, images
-def get_pose_gif(pose: Pose, step: int = 1, start_frame:int=None, end_frame:int=None, fps: int = None):
     if fps is not None:
         pose.body.fps = fps
     v = PoseVisualizer(pose)
@@ -89,37 +107,42 @@ st.write(
 st.write(
     "I made this app to help me visualize and understand the format, including different 'components' and 'points', and what they are named."
 )
-st.write("If you need a .pose file, here's one of [me doing a self-introduction](https://drive.google.com/file/d/1_L5sYVhONDBABuTmQUvjsl94LbFqzEyP/view?usp=sharing), and one of [me signing ASL 'HOUSE'](https://drive.google.com/file/d/1uggYqLyTA4XdDWaWsS9w5hKaPwW86IF_/view?usp=sharing)")
 uploaded_file = st.file_uploader("Upload a .pose file", type=[".pose", ".pose.zst"])
 if uploaded_file is not None:
     with st.spinner(f"Loading {uploaded_file.name}"):
         pose = load_pose(uploaded_file)
         frames, images = get_pose_frames(pose=pose)
     st.success("Done loading!")
     st.write("### File Info")
     with st.expander(f"Show full Pose-format header from {uploaded_file.name}"):
         st.write(pose.header)
     st.write(f"### Selection")
     component_selection = st.radio(
-        "How to select components?", options=["manual", "signclip"]
     )
     component_names = [c.name for c in pose.header.components]
     chosen_component_names = []
     points_dict = {}
-    hide_legs = False
     if component_selection == "manual":
         chosen_component_names = st.pills(
-            "Select components to visualize", options=component_names, default=component_names,selection_mode="multi"
         )
         for component in pose.header.components:
             if component.name in chosen_component_names:
                 with st.expander(f"Points for {component.name}"):
@@ -128,32 +151,118 @@ if uploaded_file is not None:
                         options=component.points,
                         default=component.points,
                     )
-                    if selected_points != component.points:  # Only add entry if not all points are selected
                         points_dict[component.name] = selected_points
     elif component_selection == "signclip":
         st.write("Selected landmarks used for SignCLIP.")
-        chosen_component_names = ["POSE_LANDMARKS", "FACE_LANDMARKS", "LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS"]
         points_dict = {"FACE_LANDMARKS": FACEMESH_CONTOURS_POINTS}
     # Filter button logic
-        # Filter section
     st.write("### Filter .pose File")
     filtered = st.button("Apply Filter!")
     if filtered:
-        pose = pose.get_components(chosen_component_names, points=points_dict if points_dict else None)
-        if hide_legs:
-            pose = pose_hide_legs(pose)
         st.session_state.filtered_pose = pose
-    filtered_pose = st.session_state.get('filtered_pose', pose)
     if filtered_pose:
-        filtered_pose = st.session_state.get('filtered_pose', pose)
-        st.write(f"#### Filtered .pose file")
         st.write(f"Pose data shape: {filtered_pose.body.data.shape}")
         with st.expander("Show header"):
             st.write(filtered_pose.header)
@@ -170,12 +279,20 @@ if uploaded_file is not None:
             pose.write(f)
         with pose_file_out.open("rb") as f:
-            st.download_button("Download Filtered Pose", f, file_name=pose_file_out.name)
     st.write("### Visualization")
-    step = st.select_slider("Step value to select every nth image", list(range(1, len(frames))), value=1)
-    fps = st.slider("FPS for visualization", min_value=1.0, max_value=filtered_pose.body.fps, value=filtered_pose.body.fps)
     start_frame, end_frame = st.slider(
         "Select Frame Range",
         0,
@@ -185,6 +302,13 @@ if uploaded_file is not None:
     # Visualization button logic
     if st.button("Visualize"):
         # Load filtered pose if it exists; otherwise, use the unfiltered pose
-        st.image(get_pose_gif(pose=filtered_pose, step=step, start_frame=start_frame, end_frame=end_frame, fps=fps))

+from pathlib import Path
+import json
+from typing import Dict, Optional, List, Tuple
+from collections import defaultdict
 import streamlit as st
 from streamlit.runtime.uploaded_file_manager import UploadedFile
 import numpy as np
 from pose_format import Pose
+from pose_format.utils.generic import pose_hide_legs, reduce_holistic
 from pose_format.pose_visualizer import PoseVisualizer
 from pyzstd import decompress
 from PIL import Image
 import mediapipe as mp
         set([p for p_tup in list(mp_holistic.FACEMESH_CONTOURS) for p in p_tup])
     )
 ]
+COMPONENT_SELECTION_METHODS = ["manual", "signclip", "youtube-asl", "reduce_holistic"]
+def download_json(data):
+    json_data = json.dumps(data)
+    json_bytes = json_data.encode('utf-8')
+    return json_bytes
+def get_points_dict_and_components_with_index_list(
+    pose: Pose, landmark_indices: List[int], components_to_include: Optional[List[str]]
+) -> Tuple[List[str], Dict[str, List[str]]]:
+    """Used to get components/points if you only have a list of indices,
+    e.g. listed in a research paper like YouTube-ASL.
+    If you want to also explicitly specify component names, you can.
+    So for example, to get the two hands and the nose you could do the following:
+    c_names, points_dict = get_points_dict_and_components_with_index_list(pose,
+        landmark_indices=[0] # which is "NOSE" within POSE_LANDMARKS components
+        components_to_include=["LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS]
         )
+    then you can just use get_components
+    filtered_pose = pose.get_components(c_names, points_dict)
+    """
+    components_to_get = []
+    points_dict = defaultdict(list)
+    for c in pose.header.components:
+        for point_name in c.points:
+            point_index = pose.header.get_point_index(c.name, point_name)
+            if point_index in landmark_indices:
+                components_to_get.append(c.name)
+                points_dict[c.name].append(point_name)
+                # print(f"Point with index {point_index} has name {c.name}:{point_name}")
+    if components_to_include:
+        components_to_get.extend(components_to_include)
+    components_to_get = list(set(components_to_get))
+    # print("*********************")
+    # print(components_to_get)
+    # print(points_dict)
+    return components_to_get, points_dict
 # @st.cache_data(hash_funcs={UploadedFile: lambda p: str(p.name)})
         return Pose.read(uploaded_file.read())
+@st.cache_data(hash_funcs={Pose: lambda p: np.asarray(p.body.data.data)})
 def get_pose_frames(pose: Pose, transparency: bool = False):
     v = PoseVisualizer(pose)
     frames = [frame_data for frame_data in v.draw()]
     return frames, images
+def get_pose_gif(
+    pose: Pose,
+    step: int = 1,
+    start_frame: Optional[int] = None,
+    end_frame: Optional[int] = None,
+    fps: Optional[float] = None,
+):
     if fps is not None:
         pose.body.fps = fps
     v = PoseVisualizer(pose)
 st.write(
     "I made this app to help me visualize and understand the format, including different 'components' and 'points', and what they are named."
 )
+st.write(
+    "If you need a .pose file, here's one of [me doing a self-introduction](https://drive.google.com/file/d/1_L5sYVhONDBABuTmQUvjsl94LbFqzEyP/view?usp=sharing), and one of [me signing ASL 'HOUSE'](https://drive.google.com/file/d/1uggYqLyTA4XdDWaWsS9w5hKaPwW86IF_/view?usp=sharing)"
+)
 uploaded_file = st.file_uploader("Upload a .pose file", type=[".pose", ".pose.zst"])
 if uploaded_file is not None:
     with st.spinner(f"Loading {uploaded_file.name}"):
         pose = load_pose(uploaded_file)
+        # st.write(pose.body.data.shape)
         frames, images = get_pose_frames(pose=pose)
     st.success("Done loading!")
     st.write("### File Info")
     with st.expander(f"Show full Pose-format header from {uploaded_file.name}"):
         st.write(pose.header)
     st.write(f"### Selection")
     component_selection = st.radio(
+        "How to select components?", options=COMPONENT_SELECTION_METHODS
     )
     component_names = [c.name for c in pose.header.components]
     chosen_component_names = []
     points_dict = {}
+    HIDE_LEGS = False
     if component_selection == "manual":
         chosen_component_names = st.pills(
+            "Select components to visualize",
+            options=component_names,
+            default=component_names,
+            selection_mode="multi",
         )
         for component in pose.header.components:
             if component.name in chosen_component_names:
                 with st.expander(f"Points for {component.name}"):
                         options=component.points,
                         default=component.points,
                     )
+                    if (
+                        selected_points != component.points
+                    ):  # Only add entry if not all points are selected
                         points_dict[component.name] = selected_points
     elif component_selection == "signclip":
         st.write("Selected landmarks used for SignCLIP.")
+        chosen_component_names = [
+            "POSE_LANDMARKS",
+            "FACE_LANDMARKS",
+            "LEFT_HAND_LANDMARKS",
+            "RIGHT_HAND_LANDMARKS",
+        ]
         points_dict = {"FACE_LANDMARKS": FACEMESH_CONTOURS_POINTS}
+    elif component_selection == "youtube-asl":
+        st.write("Selected landmarks used for SignCLIP.")
+        # https://arxiv.org/pdf/2306.15162
+        # For each hand, we use all 21 landmark points.
+        # Colin: So that's
+        # For the pose, we use 6 landmark points, for the shoulders, elbows and hips
+        # These are indices 11, 12, 13, 14, 23, 24
+        # For the face, we use 37 landmark points, from the eyes, eyebrows, lips, and face outline.
+        # These are indices 0, 4, 13, 14, 17, 33, 37, 39, 46, 52, 55, 61, 64, 81, 82, 93, 133, 151, 152, 159, 172, 178,
+        # 181, 263, 269, 276, 282, 285, 291, 294, 311, 323, 362, 386, 397, 468, 473.
+        # Colin: note that these are with refine_face_landmarks on, and are relative to the component itself. Working it all out the result is:
+        components=['POSE_LANDMARKS', 'FACE_LANDMARKS', 'LEFT_HAND_LANDMARKS', 'RIGHT_HAND_LANDMARKS']
+        points_dict={
+            "POSE_LANDMARKS": [
+                "LEFT_SHOULDER",
+                "RIGHT_SHOULDER",
+                "LEFT_HIP",
+                "RIGHT_HIP",
+                "LEFT_ELBOW",
+                "RIGHT_ELBOW"
+            ],
+            "FACE_LANDMARKS": [
+                "0",
+                "4",
+                "13",
+                "14",
+                "17",
+                "33",
+                "37",
+                "39",
+                "46",
+                "52",
+                "55",
+                "61",
+                "64",
+                "81",
+                "82",
+                "93",
+                "133",
+                "151",
+                "152",
+                "159",
+                "172",
+                "178",
+                "181",
+                "263",
+                "269",
+                "276",
+                "282",
+                "285",
+                "291",
+                "294",
+                "311",
+                "323",
+                "362",
+                "386",
+                "397",
+                "468", # 468 only exists with the refine_face_landmarks option on MediaPipe
+                "473", # 473 only exists with the refine_face_landmarks option on MediaPipe
+            ]
+        }
     # Filter button logic
+    # Filter section
     st.write("### Filter .pose File")
     filtered = st.button("Apply Filter!")
     if filtered:
+        st.write(f"Filtering strategy: {component_selection}")
+        if component_selection == "reduce_holistic":
+            # st.write(f"reduce_holistic:")
+            pose = reduce_holistic(pose)
+            st.write("Used pose_format.reduce_holistic")
+        else:
+            pose = pose.get_components(components=chosen_component_names, points=points_dict if points_dict else None
+            )
+            with st.expander("Show component list and points dict used for get_components"):
+                st.write("##### Component names")
+                st.write(chosen_component_names)
+                st.write("##### Points dict")
+                st.write(points_dict)
+            with st.expander("How to replicate in pose-format"):
+                st.write("##### Usage:")
+                st.write("How to achieve the same result with pose-format library")
+                # points_dict_str = json.dumps(points_dict, indent=4)
+                usage_string = f"components={chosen_component_names}\npoints_dict={points_dict}\npose = pose.get_components(components=components, points=points_dict)"
+                st.code(usage_string)
+        if HIDE_LEGS:
+            pose = pose_hide_legs(pose, remove=True)
         st.session_state.filtered_pose = pose
+    filtered_pose = st.session_state.get("filtered_pose", pose)
     if filtered_pose:
+        filtered_pose = st.session_state.get("filtered_pose", pose)
+        st.write("#### Filtered .pose file")
         st.write(f"Pose data shape: {filtered_pose.body.data.shape}")
         with st.expander("Show header"):
             st.write(filtered_pose.header)
             pose.write(f)
         with pose_file_out.open("rb") as f:
+            st.download_button(
+                "Download Filtered Pose", f, file_name=pose_file_out.name
+            )
     st.write("### Visualization")
+    step = st.select_slider(
+        "Step value to select every nth image", list(range(1, len(frames))), value=1
+    )
+    fps = st.slider(
+        "FPS for visualization",
+        min_value=1.0,
+        max_value=filtered_pose.body.fps,
+        value=filtered_pose.body.fps,
+    )
     start_frame, end_frame = st.slider(
         "Select Frame Range",
         0,
     # Visualization button logic
     if st.button("Visualize"):
         # Load filtered pose if it exists; otherwise, use the unfiltered pose
+        pose_bytes = get_pose_gif(
+            pose=filtered_pose,
+            step=step,
+            start_frame=start_frame,
+            end_frame=end_frame,
+            fps=fps,
+        )
+        if pose_bytes is not None:
+            st.image(pose_bytes)