|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Reach oracle.""" |
|
import numpy as np |
|
from tf_agents.policies import py_policy |
|
from tf_agents.trajectories import policy_step |
|
from tf_agents.trajectories import time_step as ts |
|
from tf_agents.typing import types |
|
|
|
|
|
import pybullet |
|
|
|
|
|
class ReachOracle(py_policy.PyPolicy): |
|
"""Oracle for moving to a specific spot relative to the block and target.""" |
|
|
|
def __init__(self, env, block_pushing_oracles_action_std=0.0): |
|
super(ReachOracle, self).__init__(env.time_step_spec(), env.action_spec()) |
|
self._env = env |
|
self._np_random_state = np.random.RandomState(0) |
|
self._block_pushing_oracles_action_std = block_pushing_oracles_action_std |
|
|
|
def _action(self, time_step, policy_state): |
|
|
|
|
|
max_step_velocity = 0.2 |
|
|
|
xy_ee = time_step.observation["effector_target_translation"] |
|
|
|
|
|
|
|
|
|
xy_pre_block = self._env.reach_target_translation |
|
|
|
xy_delta = xy_pre_block - xy_ee |
|
|
|
if self._block_pushing_oracles_action_std != 0.0: |
|
xy_delta += ( |
|
self._np_random_state.randn(2) * self._block_pushing_oracles_action_std |
|
) |
|
|
|
max_step_distance = max_step_velocity * (1 / self._env.get_control_frequency()) |
|
length = np.linalg.norm(xy_delta) |
|
if length > max_step_distance: |
|
xy_direction = xy_delta / length |
|
xy_delta = xy_direction * max_step_distance |
|
|
|
return policy_step.PolicyStep(action=np.asarray(xy_delta, dtype=np.float32)) |
|
|