f1tenth · hzheng40 · Dec 26, 2023 · Oct 18, 2023 · Oct 18, 2023 · Nov 24, 2023
diff --git a/examples/waypoint_follow.py b/examples/waypoint_follow.py
@@ -298,17 +298,19 @@ def main():
         "vgain": 1,
     }
 
+    num_agents = 3
     env = gym.make(
         "f110_gym:f110-v0",
         config={
             "map": "Spielberg",
-            "num_agents": 1,
+            "num_agents": num_agents,
             "timestep": 0.01,
             "integrator": "rk4",
             "control_input": "speed",
             "model": "st",
             "observation_config": {"type": "kinematic_state"},
             "params": {"mu": 1.0},
+            "reset_config": {"type": "random_static"},
         },
         render_mode="human",
     )

diff --git a/gym/f110_gym/envs/f110_env.py b/gym/f110_gym/envs/f110_env.py
@@ -36,7 +36,8 @@
 # base classes
 from f110_gym.envs.base_classes import Simulator, DynamicModel
 from f110_gym.envs.observation import observation_factory
-
+from f110_gym.envs.reset import make_reset_fn
+from f110_gym.envs.track import Track
 from f110_gym.envs.utils import deep_update
 
 
@@ -161,6 +162,11 @@ def __init__(self, config: dict = None, render_mode=None, **kwargs):
             self.action_type.space, self.num_agents
         )
 
+        # reset modes
+        self.reset_fn = make_reset_fn(
+            **self.config["reset_config"], track=self.track, num_agents=self.num_agents
+        )
+
         # stateful observations for rendering
         # add choice of colors (same, random, ...)
         self.render_obs = None
@@ -221,6 +227,7 @@ def default_config(cls) -> dict:
             "model": "st",
             "control_input": "speed",
             "observation_config": {"type": "original"},
+            "reset_config": {"type": "grid_static"},
         }
 
     def configure(self, config: dict) -> None:
@@ -366,14 +373,16 @@ def reset(self, seed=None, options=None):
         self.toggle_list = np.zeros((self.num_agents,))
 
         # states after reset
-        if options is None or "poses" not in options:
-            options = {"poses": np.zeros((self.num_agents, 3))}
-        assert "poses" in options, "Must provide initial poses for reset"
-        assert isinstance(options["poses"], np.ndarray) and options["poses"].shape == (
+        if options is not None and "poses" in options:
+            poses = options["poses"]
+        else:
+            poses = self.reset_fn.sample()
+
+        assert isinstance(poses, np.ndarray) and poses.shape == (
             self.num_agents,
             3,
         ), "Initial poses must be a numpy array of shape (num_agents, 3)"
-        poses = options["poses"]
+
         self.start_xs = poses[:, 0]
         self.start_ys = poses[:, 1]
         self.start_thetas = poses[:, 2]

diff --git a/gym/f110_gym/envs/observation.py b/gym/f110_gym/envs/observation.py
@@ -36,7 +36,9 @@ def __init__(self, env):
     def space(self):
         num_agents = self.env.num_agents
         scan_size = self.env.sim.agents[0].scan_simulator.num_beams
-        scan_range = self.env.sim.agents[0].scan_simulator.max_range
+        scan_range = (
+            self.env.sim.agents[0].scan_simulator.max_range + 0.5
+        )  # add 1.0 to avoid small errors
         large_num = 1e30  # large number to avoid unbounded obs space (ie., low=-inf or high=inf)
 
         obs_space = gym.spaces.Dict(

diff --git a/gym/f110_gym/envs/reset/__init__.py b/gym/f110_gym/envs/reset/__init__.py
@@ -0,0 +1,20 @@
+from f110_gym.envs.reset.masked_reset import GridResetFn, AllTrackResetFn
+from f110_gym.envs.reset.reset_fn import ResetFn
+from f110_gym.envs.track import Track
+
+
+def make_reset_fn(type: str, track: Track, num_agents: int, **kwargs) -> ResetFn:
+    if type == "grid_static":
+        return GridResetFn(track=track, num_agents=num_agents, shuffle=False, **kwargs)
+    elif type == "grid_random":
+        return GridResetFn(track=track, num_agents=num_agents, shuffle=True, **kwargs)
+    elif type == "random_static":
+        return AllTrackResetFn(
+            track=track, num_agents=num_agents, shuffle=False, **kwargs
+        )
+    elif type == "random_random":
+        return AllTrackResetFn(
+            track=track, num_agents=num_agents, shuffle=True, **kwargs
+        )
+    else:
+        raise ValueError(f"invalid reset type {type}")
diff --git a/gym/f110_gym/envs/reset/masked_reset.py b/gym/f110_gym/envs/reset/masked_reset.py
@@ -0,0 +1,111 @@
+from abc import abstractmethod
+
+import numpy as np
+
+from f110_gym.envs.reset.reset_fn import ResetFn
+from f110_gym.envs.reset.utils import sample_around_waypoint
+from f110_gym.envs.track import Track
+
+
+class MaskedResetFn(ResetFn):
+    @abstractmethod
+    def get_mask(self) -> np.ndarray:
+        pass
+
+    def __init__(
+        self,
+        track: Track,
+        num_agents: int,
+        move_laterally: bool,
+        min_dist: float,
+        max_dist: float,
+    ):
+        self.track = track
+        self.n_agents = num_agents
+        self.min_dist = min_dist
+        self.max_dist = max_dist
+        self.move_laterally = move_laterally
+        self.mask = self.get_mask()
+
+    def sample(self) -> np.ndarray:
+        waypoint_id = np.random.choice(np.where(self.mask)[0])
+        poses = sample_around_waypoint(
+            track=self.track,
+            waypoint_id=waypoint_id,
+            n_agents=self.n_agents,
+            min_dist=self.min_dist,
+            max_dist=self.max_dist,
+            move_laterally=self.move_laterally,
+        )
+        return poses
+
+
+class GridResetFn(MaskedResetFn):
+    def __init__(
+        self,
+        track: Track,
+        num_agents: int,
+        move_laterally: bool = True,
+        shuffle: bool = True,
+        start_width: float = 1.0,
+        min_dist: float = 1.5,
+        max_dist: float = 2.5,
+    ):
+        self.start_width = start_width
+        self.shuffle = shuffle
+
+        super().__init__(
+            track=track,
+            num_agents=num_agents,
+            move_laterally=move_laterally,
+            min_dist=min_dist,
+            max_dist=max_dist,
+        )
+
+    def get_mask(self) -> np.ndarray:
+        # approximate the nr waypoints in the starting line
+        step_size = self.track.centerline.length / self.track.centerline.n
+        n_wps = int(self.start_width / step_size)
+
+        mask = np.zeros(self.track.centerline.n)
+        mask[:n_wps] = 1
+        return mask.astype(bool)
+
+    def sample(self) -> np.ndarray:
+        poses = super().sample()
+
+        if self.shuffle:
+            np.random.shuffle(poses)
+
+        return poses
+
+
+class AllTrackResetFn(MaskedResetFn):
+    def __init__(
+        self,
+        track: Track,
+        num_agents: int,
+        move_laterally: bool = True,
+        shuffle: bool = True,
+        min_dist: float = 1.5,
+        max_dist: float = 2.5,
+    ):
+        super().__init__(
+            track=track,
+            num_agents=num_agents,
+            move_laterally=move_laterally,
+            min_dist=min_dist,
+            max_dist=max_dist,
+        )
+        self.shuffle = shuffle
+
+    def get_mask(self) -> np.ndarray:
+        return np.ones(self.track.centerline.n).astype(bool)
+
+    def sample(self) -> np.ndarray:
+        poses = super().sample()
+
+        if self.shuffle:
+            np.random.shuffle(poses)
+
+        return poses
diff --git a/gym/f110_gym/envs/reset/reset_fn.py b/gym/f110_gym/envs/reset/reset_fn.py
@@ -0,0 +1,9 @@
+from abc import abstractmethod
+
+import numpy as np
+
+
+class ResetFn:
+    @abstractmethod
+    def sample(self) -> np.ndarray:
+        pass
diff --git a/gym/f110_gym/envs/reset/utils.py b/gym/f110_gym/envs/reset/utils.py
@@ -0,0 +1,87 @@
+from __future__ import annotations
+
+import numpy as np
+
+from f110_gym.envs.track import Track
+
+
+def sample_around_waypoint(
+    track: Track,
+    waypoint_id: int,
+    n_agents: int,
+    min_dist: float,
+    max_dist: float,
+    move_laterally: bool = True,
+) -> np.ndarray:
+    """
+    Compute n poses around a given waypoint in the track.
+    It iteratively samples the next agent within a distance range from the previous one.
+
+    Args:
+        - track: the track object
+        - waypoint_id: the id of the first waypoint from which start the sampling
+        - n_agents: the number of agents
+        - min_dist: the minimum distance between two consecutive agents
+        - max_dist: the maximum distance between two consecutive agents
+        - move_laterally: if True, the agents are sampled on the left/right of the track centerline
+    """
+    current_wp_id = waypoint_id
+    n_waypoints = track.centerline.n
+
+    poses = []
+    rnd_sign = (
+        np.random.choice([-1, 1]) if move_laterally else 1
+    )  # random sign to sample lateral position (left/right)
+    for i in range(n_agents):
+        # compute pose from current wp_id
+        wp = [
+            track.centerline.xs[current_wp_id],
+            track.centerline.ys[current_wp_id],
+        ]
+        next_wp_id = (current_wp_id + 1) % n_waypoints
+        next_wp = [
+            track.centerline.xs[next_wp_id],
+            track.centerline.ys[next_wp_id],
+        ]
+        theta = np.arctan2(next_wp[1] - wp[1], next_wp[0] - wp[0])
+
+        x, y = wp[0], wp[1]
+        if n_agents > 1:
+            lat_offset = rnd_sign * (-1.0) ** i * (1.0 / n_agents)
+            x += lat_offset * np.cos(theta + np.pi / 2)
+            y += lat_offset * np.sin(theta + np.pi / 2)
+
+        pose = np.array([x, y, theta])
+        poses.append(pose)
+        # find id of next waypoint which has mind <= dist <= maxd
+        first_id, interval_len = (
+            None,
+            None,
+        )  # first wp id with dist > mind, len of the interval btw first/last wp
+        pnt_id = current_wp_id  # moving pointer to scan the next waypoints
+        dist = 0.0
+        while dist <= max_dist:
+            # sanity check
+            if pnt_id > n_waypoints - 1:
+                pnt_id = 0
+            # increment distance
+            x_diff = track.centerline.xs[pnt_id] - track.centerline.xs[pnt_id - 1]
+            y_diff = track.centerline.ys[pnt_id] - track.centerline.ys[pnt_id - 1]
+            dist = dist + np.linalg.norm(
+                [y_diff, x_diff]
+            )  # approx distance by summing linear segments
+            # look for sampling interval
+            if first_id is None and dist >= min_dist:  # not found first id yet
+                first_id = pnt_id
+                interval_len = 0
+            if (
+                first_id is not None and dist <= max_dist
+            ):  # found first id, increment interval length
+                interval_len += 1
+            pnt_id += 1
+        # sample next waypoint
+        current_wp_id = (first_id + np.random.randint(0, interval_len + 1)) % (
+            n_waypoints
+        )
+
+    return np.array(poses)