Adding a wrapper that concatenates observation in a single tensor.

leonardhussenot · copybara-github · commit 0a9d97e00311 · 2021-12-02T08:44:16.000-08:00
It allows to run seamlessly on DMControl and Gym environments.

PiperOrigin-RevId: 413688077
Change-Id: Id59f6bf2800088c71438e3a0e0eaa5d9debdf9ed
diff --git a/acme/wrappers/__init__.py b/acme/wrappers/__init__.py
@@ -19,6 +19,7 @@
 from acme.wrappers.base import EnvironmentWrapper
 from acme.wrappers.base import wrap_all
 from acme.wrappers.canonical_spec import CanonicalSpecWrapper
+from acme.wrappers.concatenate_observations import ConcatObservationWrapper
 from acme.wrappers.frame_stacking import FrameStackingWrapper
 from acme.wrappers.gym_wrapper import GymAtariAdapter
 from acme.wrappers.gym_wrapper import GymWrapper
diff --git a/acme/wrappers/concatenate_observations.py b/acme/wrappers/concatenate_observations.py
@@ -0,0 +1,89 @@
+# python3
+# Copyright 2018 DeepMind Technologies Limited. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Wrapper that implements concatenation of observation fields."""
+
+from typing import Sequence, Optional
+
+from acme import types
+from acme.jax import utils
+from acme.wrappers import base
+import dm_env
+import numpy as np
+import tree
+
+
+def _concat(values: types.NestedArray) -> np.ndarray:
+  """Concatenates the leaves of `values` along the leading dimension.
+
+  Treats scalars as 1d arrays and expects that the shapes of all leaves are
+  the same except for the leading dimension.
+
+  Args:
+    values: the nested arrays to concatenate.
+  Returns:
+    The concatenated array.
+  """
+  leaves = list(map(np.atleast_1d, tree.flatten(values)))
+  return np.concatenate(leaves)
+
+
+class ConcatObservationWrapper(base.EnvironmentWrapper):
+  """Wrapper that concatenates observation fields.
+
+  It takes an environment with nested observations and concatenates the fields
+  in a single tensor. The orginial fields should be 1-dimensional.
+  Observation fields that are not in name_filter are dropped.
+  """
+
+  def __init__(self, environment: dm_env.Environment,
+               name_filter: Optional[Sequence[str]] = None):
+    """Initializes a new ConcatObservationWrapper.
+
+    Args:
+      environment: Environment to wrap.
+      name_filter: Sequence of observation names to keep. None keeps them all.
+    """
+    super().__init__(environment)
+    observation_spec = environment.observation_spec()
+    if name_filter is None:
+      name_filter = list(observation_spec.keys())
+    self._obs_names = [x for x in name_filter if x in observation_spec.keys()]
+
+    dummy_obs = utils.zeros_like(observation_spec)
+    dummy_obs = self._convert_observation(dummy_obs)
+    self._observation_spec = dm_env.specs.BoundedArray(
+        shape=dummy_obs.shape,
+        dtype=dummy_obs.dtype,
+        minimum=-np.inf,
+        maximum=np.inf,
+        name='state')
+
+  def _convert_observation(self, observation):
+    obs = {k: observation[k] for k in self._obs_names}
+    return _concat(obs)
+
+  def step(self, action) -> dm_env.TimeStep:
+    timestep = self._environment.step(action)
+    return timestep._replace(
+        observation=self._convert_observation(timestep.observation))
+
+  def reset(self) -> dm_env.TimeStep:
+    timestep = self._environment.reset()
+    return timestep._replace(
+        observation=self._convert_observation(timestep.observation))
+
+  def observation_spec(self) -> types.NestedSpec:
+    return self._observation_spec
diff --git a/examples/control/helpers.py b/examples/control/helpers.py
@@ -22,12 +22,19 @@
 
 def make_environment(evaluation: bool = False,
                      domain_name: str = 'cartpole',
-                     task_name: str = 'balance') -> dm_env.Environment:
+                     task_name: str = 'balance',
+                     concatenate_observations: bool = False
+                     ) -> dm_env.Environment:
   """Implements a control suite environment factory."""
   # Nothing special to be done for evaluation environment.
   del evaluation
 
   environment = suite.load(domain_name, task_name)
   environment = wrappers.SinglePrecisionWrapper(environment)
-
+  timestep = environment.reset()
+  obs_names = list(timestep.observation.keys())
+  if concatenate_observations:
+    environment = wrappers.ConcatObservationWrapper(environment, obs_names)
   return environment
+
+