Spaces:

cdleong
/

explore-pose-components

Running

Colin Leong

Add an output for reduce_holistic

77ef1b5 4 months ago

12.7 kB

	from pathlib import Path
	import json
	from typing import Dict, Optional, List, Tuple
	from collections import defaultdict
	import streamlit as st
	from streamlit.runtime.uploaded_file_manager import UploadedFile
	import numpy as np
	from pose_format import Pose
	from pose_format.utils.generic import pose_hide_legs, reduce_holistic
	from pose_format.pose_visualizer import PoseVisualizer
	from pyzstd import decompress
	from PIL import Image
	import mediapipe as mp

	mp_holistic = mp.solutions.holistic
	FACEMESH_CONTOURS_POINTS = [
	str(p)
	for p in sorted(
	set([p for p_tup in list(mp_holistic.FACEMESH_CONTOURS) for p in p_tup])
	)
	]
	COMPONENT_SELECTION_METHODS = ["manual", "signclip", "youtube-asl", "reduce_holistic"]

	def download_json(data):
	json_data = json.dumps(data)
	json_bytes = json_data.encode('utf-8')
	return json_bytes

	def get_points_dict_and_components_with_index_list(
	pose: Pose, landmark_indices: List[int], components_to_include: Optional[List[str]]
	) -> Tuple[List[str], Dict[str, List[str]]]:
	"""Used to get components/points if you only have a list of indices,
	e.g. listed in a research paper like YouTube-ASL.
	If you want to also explicitly specify component names, you can.
	So for example, to get the two hands and the nose you could do the following:
	c_names, points_dict = get_points_dict_and_components_with_index_list(pose,
	landmark_indices=[0] # which is "NOSE" within POSE_LANDMARKS components
	components_to_include=["LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS]
	)

	then you can just use get_components
	filtered_pose = pose.get_components(c_names, points_dict)

	"""
	components_to_get = []
	points_dict = defaultdict(list)

	for c in pose.header.components:
	for point_name in c.points:
	point_index = pose.header.get_point_index(c.name, point_name)
	if point_index in landmark_indices:
	components_to_get.append(c.name)
	points_dict[c.name].append(point_name)
	# print(f"Point with index {point_index} has name {c.name}:{point_name}")

	if components_to_include:
	components_to_get.extend(components_to_include)
	components_to_get = list(set(components_to_get))
	# print("*********************")
	# print(components_to_get)
	# print(points_dict)
	return components_to_get, points_dict


	# @st.cache_data(hash_funcs={UploadedFile: lambda p: str(p.name)})
	def load_pose(uploaded_file: UploadedFile) -> Pose:

	# with input_path.open("rb") as f_in:
	if uploaded_file.name.endswith(".zst"):
	return Pose.read(decompress(uploaded_file.read()))
	else:
	return Pose.read(uploaded_file.read())


	@st.cache_data(hash_funcs={Pose: lambda p: np.asarray(p.body.data.data)})
	def get_pose_frames(pose: Pose, transparency: bool = False):
	v = PoseVisualizer(pose)
	frames = [frame_data for frame_data in v.draw()]

	if transparency:
	cv_code = v.cv2.COLOR_BGR2RGBA
	else:
	cv_code = v.cv2.COLOR_BGR2RGB
	images = [Image.fromarray(v.cv2.cvtColor(frame, cv_code)) for frame in frames]
	return frames, images


	def get_pose_gif(
	pose: Pose,
	step: int = 1,
	start_frame: Optional[int] = None,
	end_frame: Optional[int] = None,
	fps: Optional[float] = None,
	):
	if fps is not None:
	pose.body.fps = fps
	v = PoseVisualizer(pose)
	frames = [frame_data for frame_data in v.draw()]
	frames = frames[start_frame:end_frame:step]
	return v.save_gif(None, frames=frames)


	st.write("# Pose-format explorer")
	st.write(
	"`pose-format` is a toolkit/library for 'handling, manipulation, and visualization of poses'. See [The documentation](https://pose-format.readthedocs.io/en/latest/)"
	)
	st.write(
	"I made this app to help me visualize and understand the format, including different 'components' and 'points', and what they are named."
	)
	st.write(
	"If you need a .pose file, here's a few:"
	)
	st.write("* One of [me doing a self-introduction](https://drive.google.com/file/d/1_L5sYVhONDBABuTmQUvjsl94LbFqzEyP/view?usp=sharing)")
	st.write("* One of [me signing ASL 'HOUSE'](https://drive.google.com/file/d/1uggYqLyTA4XdDWaWsS9w5hKaPwW86IF_/view?usp=sharing)")
	st.write(
	"* ... or [the same file, but with the 10 extra landmarks](https://drive.google.com/file/d/1XHkfn24PIas1a3XUUXYXTX2DvYeUDuCI/view?usp=drive_link) from mediapipe holistic's [`refine_face_landmarks` option](https://github.com/sign-language-processing/pose/?tab=readme-ov-file#2-estimating-pose-from-video)"
	)
	uploaded_file = st.file_uploader("Upload a .pose file", type=[".pose", ".pose.zst"])


	if uploaded_file is not None:
	with st.spinner(f"Loading {uploaded_file.name}"):
	pose = load_pose(uploaded_file)
	# st.write(pose.body.data.shape)
	frames, images = get_pose_frames(pose=pose)
	st.success("Done loading!")

	st.write("### File Info")
	with st.expander(f"Show full Pose-format header from {uploaded_file.name}"):
	st.write(pose.header)

	st.write(f"### Selection")
	component_selection = st.radio(
	"How to select components?", options=COMPONENT_SELECTION_METHODS
	)

	component_names = [c.name for c in pose.header.components]
	chosen_component_names = []
	points_dict = {}
	HIDE_LEGS = False

	if component_selection == "manual":

	chosen_component_names = st.pills(
	"Select components to visualize",
	options=component_names,
	default=component_names,
	selection_mode="multi",
	)

	for component in pose.header.components:
	if component.name in chosen_component_names:
	with st.expander(f"Points for {component.name}"):
	selected_points = st.multiselect(
	f"Select points for component {component.name}:",
	options=component.points,
	default=component.points,
	)
	if (
	selected_points != component.points
	): # Only add entry if not all points are selected
	points_dict[component.name] = selected_points

	elif component_selection == "signclip":
	st.write("Selected landmarks used for [SignCLIP](https://arxiv.org/abs/2407.01264).")
	chosen_component_names = [
	"POSE_LANDMARKS",
	"FACE_LANDMARKS",
	"LEFT_HAND_LANDMARKS",
	"RIGHT_HAND_LANDMARKS",
	]
	points_dict = {"FACE_LANDMARKS": FACEMESH_CONTOURS_POINTS}

	elif component_selection == "reduce_holistic":
	st.write("Using [pose_format.utils.generic.reduce_holistic](https://github.com/sign-language-processing/pose/blob/master/src/python/pose_format/utils/generic.py#L286)")

	elif component_selection == "youtube-asl":
	st.write("Selected landmarks used for [YouTube-ASL](https://arxiv.org/pdf/2306.15162).")
	# https://arxiv.org/pdf/2306.15162
	# For each hand, we use all 21 landmark points.
	# Colin: So that's
	# For the pose, we use 6 landmark points, for the shoulders, elbows and hips
	# These are indices 11, 12, 13, 14, 23, 24
	# For the face, we use 37 landmark points, from the eyes, eyebrows, lips, and face outline.
	# These are indices 0, 4, 13, 14, 17, 33, 37, 39, 46, 52, 55, 61, 64, 81, 82, 93, 133, 151, 152, 159, 172, 178,
	# 181, 263, 269, 276, 282, 285, 291, 294, 311, 323, 362, 386, 397, 468, 473.
	# Colin: note that these are with refine_face_landmarks on, and are relative to the component itself. Working it all out the result is:
	chosen_component_names=['POSE_LANDMARKS', 'FACE_LANDMARKS', 'LEFT_HAND_LANDMARKS', 'RIGHT_HAND_LANDMARKS']
	points_dict={
	"POSE_LANDMARKS": [
	"LEFT_SHOULDER",
	"RIGHT_SHOULDER",
	"LEFT_HIP",
	"RIGHT_HIP",
	"LEFT_ELBOW",
	"RIGHT_ELBOW"
	],
	"FACE_LANDMARKS": [
	"0",
	"4",
	"13",
	"14",
	"17",
	"33",
	"37",
	"39",
	"46",
	"52",
	"55",
	"61",
	"64",
	"81",
	"82",
	"93",
	"133",
	"151",
	"152",
	"159",
	"172",
	"178",
	"181",
	"263",
	"269",
	"276",
	"282",
	"285",
	"291",
	"294",
	"311",
	"323",
	"362",
	"386",
	"397",
	]
	}

	# check if we have the extra points from refine_face_landmarks
	additional_face_points = ["468", "473"]
	for additional_point in additional_face_points:
	try:
	point_index = pose.header.get_point_index("FACE_LANDMARKS", additional_point)
	points_dict['FACE_LANDMARKS'].append(additional_point)
	except ValueError:
	# not in the list
	# st.write(f"Point {additional_point} not in file")
	pass



	# Filter button logic
	# Filter section
	st.write("### Filter .pose File")
	filtered = st.button("Apply Filter!")
	if filtered:
	st.write(f"Filtering strategy: {component_selection}")

	if component_selection == "reduce_holistic":
	# st.write(f"reduce_holistic:")
	pose = reduce_holistic(pose)
	st.write("Used pose_format.reduce_holistic")
	else:
	pose = pose.get_components(components=chosen_component_names, points=points_dict if points_dict else None
	)
	with st.expander("Show component list and points dict used for get_components"):
	st.write("##### Component names")
	st.write(chosen_component_names)
	st.write("##### Points dict")
	st.write(points_dict)

	with st.expander("How to replicate in pose-format"):
	st.write("##### Usage:")
	st.write("How to achieve the same result with pose-format library")
	# points_dict_str = json.dumps(points_dict, indent=4)
	usage_string = f"components={chosen_component_names}\npoints_dict={points_dict}\npose = pose.get_components(components=components, points=points_dict)"
	st.code(usage_string)

	if HIDE_LEGS:
	pose = pose_hide_legs(pose, remove=True)
	st.session_state.filtered_pose = pose

	filtered_pose = st.session_state.get("filtered_pose", pose)
	if filtered_pose:
	filtered_pose = st.session_state.get("filtered_pose", pose)
	st.write("#### Filtered .pose file")
	st.write(f"Pose data shape: {filtered_pose.body.data.shape}")
	with st.expander("Show header"):
	st.write(filtered_pose.header)
	with st.expander("Show body"):
	st.write(filtered_pose.body)
	# with st.expander("Show data:"):
	# for frame in filtered_pose.body.data:
	# st.write(f"Frame:{frame}")
	# for person in frame:
	# st.write(person)

	pose_file_out = Path(uploaded_file.name).with_suffix(".pose")
	with pose_file_out.open("wb") as f:
	pose.write(f)

	with pose_file_out.open("rb") as f:
	st.download_button(
	"Download Filtered Pose", f, file_name=pose_file_out.name
	)

	st.write("### Visualization")
	step = st.select_slider(
	"Step value to select every nth image", list(range(1, len(frames))), value=1
	)
	fps = st.slider(
	"FPS for visualization",
	min_value=1.0,
	max_value=filtered_pose.body.fps,
	value=filtered_pose.body.fps,
	)
	start_frame, end_frame = st.slider(
	"Select Frame Range",
	0,
	len(frames),
	(0, len(frames)), # Default range
	)
	# Visualization button logic
	if st.button("Visualize"):
	# Load filtered pose if it exists; otherwise, use the unfiltered pose

	pose_bytes = get_pose_gif(
	pose=filtered_pose,
	step=step,
	start_frame=start_frame,
	end_frame=end_frame,
	fps=fps,
	)
	if pose_bytes is not None:
	st.image(pose_bytes)