From 5df35d32e9272076b3909b7d7a9d10b7f3729763 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Tue, 5 Sep 2023 16:24:38 +0100 Subject: [PATCH 1/8] Allow to load halos with rockstar --- yt/frontends/rockstar/data_structures.py | 100 ++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/yt/frontends/rockstar/data_structures.py b/yt/frontends/rockstar/data_structures.py index e93ec9c0b56..d42a072bca1 100644 --- a/yt/frontends/rockstar/data_structures.py +++ b/yt/frontends/rockstar/data_structures.py @@ -1,5 +1,7 @@ import glob import os +from functools import cached_property +from typing import Optional import numpy as np @@ -15,15 +17,48 @@ class RockstarBinaryFile(HaloCatalogFile): + header: dict + _position_offset: int + _member_offset: int + _Npart: np.array + _ids_halos: list[int] + _file_size: int + def __init__(self, ds, io, filename, file_id, range): with open(filename, "rb") as f: self.header = fpu.read_cattrs(f, header_dt, "=") self._position_offset = f.tell() + pcount = self.header["num_halos"] + + halos = np.fromfile(f, dtype=io._halo_dt, count=pcount) + self._member_offset = f.tell() + self._ids_halos = list(halos["particle_identifier"]) + self._Npart = halos["num_p"] + f.seek(0, os.SEEK_END) self._file_size = f.tell() + expected_end = self._member_offset + 8 * self._Npart.sum() + if expected_end != self._file_size: + raise RuntimeError( + f"File size {self._file_size} does not match expected size {expected_end}." + ) + super().__init__(ds, io, filename, file_id, range) + def _read_member(self, ihalo: int) -> Optional[np.array]: + if ihalo not in self._ids_halos: + return None + + ind_halo = self._ids_halos.index(ihalo) + + ipos = self._member_offset + 8 * self._Npart[:ind_halo].sum() + + with open(self.filename, "rb") as f: + f.seek(ipos, os.SEEK_SET) + ids = np.fromfile(f, dtype=np.int64, count=self._Npart[ind_halo]) + return ids + def _read_particle_positions(self, ptype, f=None): """ Read all particle positions in this file. @@ -48,8 +83,18 @@ def _read_particle_positions(self, ptype, f=None): return pos +class RockstarIndex(ParticleIndex): + def get_member(self, ihalo: int): + for df in self.data_files: + members = df._read_member(ihalo) + if members is not None: + return members + + raise RuntimeError(f"Could not find halo {ihalo} in any data file.") + + class RockstarDataset(ParticleDataset): - _index_class = ParticleIndex + _index_class = RockstarIndex _file_class = RockstarBinaryFile _field_info_class = RockstarFieldInfo _suffix = ".bin" @@ -122,3 +167,56 @@ def _is_valid(cls, filename: str, *args, **kwargs) -> bool: return False else: return header["magic"] == 18077126535843729616 + + def halo(self, halo_id, ptype="DM"): + return RockstarHaloContainer( + halo_id, + ptype, + parent_ds=None, + halo_ds=self, + ) + + +class RockstarHaloContainer: + def __init__(self, ptype, particle_identifier, parent_ds, halo_ds): + # if ptype not in parent_ds.particle_types_raw: + # raise RuntimeError( + # f'Possible halo types are {parent_ds.particle_types_raw}, supplied "{ptype}".' + # ) + + self.ds = parent_ds + self.halo_ds = halo_ds + self.ptype = ptype + self.particle_identifier = particle_identifier + + def __repr__(self): + return "%s_%s_%09d" % (self.ds, self.ptype, self.particle_identifier) + + def __getitem__(self, key): + return self.region[key] + + @cached_property + def ihalo(self): + halo_id = self.particle_identifier + halo_ids = list(self.halo_ds.r["halos", "particle_identifier"].astype(int)) + ihalo = halo_ids.index(halo_id) + + assert halo_ids[ihalo] == halo_id + + return ihalo + + @property + def mass(self): + return self.halo_ds.r["halos", "particle_mass"][self.ihalo] + + @property + def position(self): + return self.halo_ds.r["halos", "particle_position"][self.ihalo] + + @property + def velocity(self): + return self.halo_ds.r["halos", "particle_velocity"][self.ihalo] + + @property + def member_ids(self): + return self.halo_ds.index.get_member(self.particle_identifier) From d9e459804e0f8f367a04dec2901ce3b7a3ba35e8 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Tue, 5 Sep 2023 16:39:43 +0100 Subject: [PATCH 2/8] Test halo accessor --- yt/frontends/rockstar/data_structures.py | 4 ++-- yt/frontends/rockstar/tests/test_outputs.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/yt/frontends/rockstar/data_structures.py b/yt/frontends/rockstar/data_structures.py index d42a072bca1..e6dc4b117a5 100644 --- a/yt/frontends/rockstar/data_structures.py +++ b/yt/frontends/rockstar/data_structures.py @@ -1,7 +1,7 @@ import glob import os from functools import cached_property -from typing import Optional +from typing import List, Optional import numpy as np @@ -21,7 +21,7 @@ class RockstarBinaryFile(HaloCatalogFile): _position_offset: int _member_offset: int _Npart: np.array - _ids_halos: list[int] + _ids_halos: List[int] _file_size: int def __init__(self, ds, io, filename, file_id, range): diff --git a/yt/frontends/rockstar/tests/test_outputs.py b/yt/frontends/rockstar/tests/test_outputs.py index 920881ec1cc..617357753b3 100644 --- a/yt/frontends/rockstar/tests/test_outputs.py +++ b/yt/frontends/rockstar/tests/test_outputs.py @@ -38,3 +38,23 @@ def test_particle_selection(): ds = data_dir_load(r1) psc = ParticleSelectionComparison(ds) psc.run_defaults() + + +@requires_file(r1) +def test_halo_loading(): + ds = data_dir_load(r1) + + for halo_id, Npart in zip( + ds.r["halos", "particle_identifier"], + ds.r["halos", "num_p"], + ): + halo = ds.halo("halos", halo_id) + assert halo is not None + + # Try accessing properties + halo.position + halo.velocity + halo.mass + + # Make sure we can access the member particles + assert len(halo.member_ids) == Npart From 1cd1847ea6b7e5a1d7597bb8728456cdd7a50616 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 6 Sep 2023 10:22:27 +0100 Subject: [PATCH 3/8] Take review comments into account --- yt/frontends/rockstar/data_structures.py | 44 +++++++++++++++++------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/yt/frontends/rockstar/data_structures.py b/yt/frontends/rockstar/data_structures.py index e6dc4b117a5..a4e7433781e 100644 --- a/yt/frontends/rockstar/data_structures.py +++ b/yt/frontends/rockstar/data_structures.py @@ -1,7 +1,7 @@ import glob import os from functools import cached_property -from typing import List, Optional +from typing import Any, List, Optional import numpy as np @@ -11,6 +11,7 @@ from yt.geometry.particle_geometry_handler import ParticleIndex from yt.utilities import fortran_utils as fpu from yt.utilities.cosmology import Cosmology +from yt.utilities.exceptions import YTFieldNotFound from .definitions import header_dt from .fields import RockstarFieldInfo @@ -20,7 +21,7 @@ class RockstarBinaryFile(HaloCatalogFile): header: dict _position_offset: int _member_offset: int - _Npart: np.array + _Npart: "np.ndarray[Any, np.dtype[np.int64]]" _ids_halos: List[int] _file_size: int @@ -46,7 +47,9 @@ def __init__(self, ds, io, filename, file_id, range): super().__init__(ds, io, filename, file_id, range) - def _read_member(self, ihalo: int) -> Optional[np.array]: + def _read_member( + self, ihalo: int + ) -> Optional["np.ndarray[Any, np.dtype[np.int64]]"]: if ihalo not in self._ids_halos: return None @@ -59,7 +62,7 @@ def _read_member(self, ihalo: int) -> Optional[np.array]: ids = np.fromfile(f, dtype=np.int64, count=self._Npart[ind_halo]) return ids - def _read_particle_positions(self, ptype, f=None): + def _read_particle_positions(self, ptype: str, f=None): """ Read all particle positions in this file. """ @@ -168,21 +171,21 @@ def _is_valid(cls, filename: str, *args, **kwargs) -> bool: else: return header["magic"] == 18077126535843729616 - def halo(self, halo_id, ptype="DM"): + def halo(self, ptype, particle_identifier): return RockstarHaloContainer( - halo_id, ptype, + particle_identifier, parent_ds=None, halo_ds=self, ) class RockstarHaloContainer: - def __init__(self, ptype, particle_identifier, parent_ds, halo_ds): - # if ptype not in parent_ds.particle_types_raw: - # raise RuntimeError( - # f'Possible halo types are {parent_ds.particle_types_raw}, supplied "{ptype}".' - # ) + def __init__(self, ptype, particle_identifier, *, parent_ds, halo_ds): + if ptype not in halo_ds.particle_types_raw: + raise RuntimeError( + f'Possible halo types are {halo_ds.particle_types_raw}, supplied "{ptype}".' + ) self.ds = parent_ds self.halo_ds = halo_ds @@ -190,10 +193,25 @@ def __init__(self, ptype, particle_identifier, parent_ds, halo_ds): self.particle_identifier = particle_identifier def __repr__(self): - return "%s_%s_%09d" % (self.ds, self.ptype, self.particle_identifier) + return "%s_%s_%09d" % (self.halo_ds, self.ptype, self.particle_identifier) def __getitem__(self, key): - return self.region[key] + if isinstance(key, tuple): + ptype, field = key + else: + ptype = self.ptype + field = key + + data = { + "mass": self.mass, + "position": self.position, + "velocity": self.velocity, + "member_ids": self.member_ids, + } + if ptype == "halos" and field in data: + return data[field] + + raise YTFieldNotFound((ptype, field), dataset=self.ds) @cached_property def ihalo(self): From e4f8a86dd9aafe869c85cd2150ce92d4b2fafcb8 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Fri, 6 Oct 2023 10:32:10 +0200 Subject: [PATCH 4/8] Fix linting --- yt/frontends/rockstar/data_structures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt/frontends/rockstar/data_structures.py b/yt/frontends/rockstar/data_structures.py index a4e7433781e..3eafc659808 100644 --- a/yt/frontends/rockstar/data_structures.py +++ b/yt/frontends/rockstar/data_structures.py @@ -1,7 +1,7 @@ import glob import os from functools import cached_property -from typing import Any, List, Optional +from typing import Any, Optional import numpy as np @@ -22,7 +22,7 @@ class RockstarBinaryFile(HaloCatalogFile): _position_offset: int _member_offset: int _Npart: "np.ndarray[Any, np.dtype[np.int64]]" - _ids_halos: List[int] + _ids_halos: list[int] _file_size: int def __init__(self, ds, io, filename, file_id, range): From 88425d51c11ef3b5a553a9175a592ab64e4171af Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Tue, 20 Feb 2024 21:44:17 +0100 Subject: [PATCH 5/8] Use numpy's assert equal to test for equality --- yt/frontends/rockstar/tests/test_outputs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/frontends/rockstar/tests/test_outputs.py b/yt/frontends/rockstar/tests/test_outputs.py index 617357753b3..0acf4dc1d72 100644 --- a/yt/frontends/rockstar/tests/test_outputs.py +++ b/yt/frontends/rockstar/tests/test_outputs.py @@ -57,4 +57,4 @@ def test_halo_loading(): halo.mass # Make sure we can access the member particles - assert len(halo.member_ids) == Npart + assert_equal(len(halo.member_ids), Npart) From 306280604744ca22d2cc22e184022dd50aac07fe Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Thu, 19 Sep 2024 17:16:56 +0200 Subject: [PATCH 6/8] Promote explicitly to i8 --- yt/frontends/rockstar/data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/frontends/rockstar/data_structures.py b/yt/frontends/rockstar/data_structures.py index 3eafc659808..a0c54a91003 100644 --- a/yt/frontends/rockstar/data_structures.py +++ b/yt/frontends/rockstar/data_structures.py @@ -216,7 +216,7 @@ def __getitem__(self, key): @cached_property def ihalo(self): halo_id = self.particle_identifier - halo_ids = list(self.halo_ds.r["halos", "particle_identifier"].astype(int)) + halo_ids = list(self.halo_ds.r["halos", "particle_identifier"].astype("i8")) ihalo = halo_ids.index(halo_id) assert halo_ids[ihalo] == halo_id From f9c32cc52dd8f9a8db5f66ba978b9420feae8005 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Fri, 20 Sep 2024 13:55:53 +0200 Subject: [PATCH 7/8] Autodetect rockstar catalog format --- yt/frontends/rockstar/io.py | 41 +++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/yt/frontends/rockstar/io.py b/yt/frontends/rockstar/io.py index bb49d56e63c..5a97f21e3d1 100644 --- a/yt/frontends/rockstar/io.py +++ b/yt/frontends/rockstar/io.py @@ -2,9 +2,29 @@ import numpy as np +from yt.utilities import fortran_utils as fpu from yt.utilities.io_handler import BaseParticleIOHandler -from .definitions import halo_dts +from .definitions import halo_dts, header_dt + + +def _can_load_with_format( + filename: str, header_fmt: tuple[str, int, str], halo_format: np.dtype +) -> bool: + with open(filename, "rb") as f: + header = fpu.read_cattrs(f, header_fmt, "=") + Nhalos = header["num_halos"] + Nparttot = header["num_particles"] + halos = np.fromfile(f, dtype=halo_format, count=Nhalos) + + # Make sure all masses are > 0 + if np.any(halos["particle_mass"] <= 0): + return False + # Make sure number of particles sums to expected value + if halos["num_p"].sum() != Nparttot: + return False + + return True class IOHandlerRockstarBinary(BaseParticleIOHandler): @@ -12,7 +32,24 @@ class IOHandlerRockstarBinary(BaseParticleIOHandler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._halo_dt = halo_dts[self.ds.parameters["format_revision"]] + self._halo_dt = self.detect_rockstar_format( + self.ds.filename, + self.ds.parameters["format_revision"], + ) + + @staticmethod + def detect_rockstar_format( + filename: str, + guess: int | str, + ) -> bool: + revisions = list(halo_dts.keys()) + if guess in revisions: + revisions.pop(revisions.index(guess)) + revisions = [guess] + revisions + for revision in revisions: + if _can_load_with_format(filename, header_dt, halo_dts[revision]): + return halo_dts[revision] + raise RuntimeError(f"Could not detect Rockstar format for file {filename}") def _read_fluid_selection(self, chunks, selector, fields, size): raise NotImplementedError From a7755ca9f22d2dcde03560a376b092b7d92c7274 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Mon, 23 Sep 2024 17:15:59 +0200 Subject: [PATCH 8/8] Fix typing --- yt/frontends/rockstar/io.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt/frontends/rockstar/io.py b/yt/frontends/rockstar/io.py index 5a97f21e3d1..29bbaa13a27 100644 --- a/yt/frontends/rockstar/io.py +++ b/yt/frontends/rockstar/io.py @@ -1,4 +1,5 @@ import os +from collections.abc import Sequence import numpy as np @@ -9,7 +10,7 @@ def _can_load_with_format( - filename: str, header_fmt: tuple[str, int, str], halo_format: np.dtype + filename: str, header_fmt: Sequence[tuple[str, int, str]], halo_format: np.dtype ) -> bool: with open(filename, "rb") as f: header = fpu.read_cattrs(f, header_fmt, "=") @@ -40,9 +41,9 @@ def __init__(self, *args, **kwargs): @staticmethod def detect_rockstar_format( filename: str, - guess: int | str, - ) -> bool: - revisions = list(halo_dts.keys()) + guess: int, + ) -> np.dtype: + revisions: list[int] = list(halo_dts.keys()) if guess in revisions: revisions.pop(revisions.index(guess)) revisions = [guess] + revisions