Skip to content

Commit

Permalink
Add support for files using the THIN option
Browse files Browse the repository at this point in the history
Update to support include thinning

Update for thinning data structure

Changed 'thinning_levels' to 'thinning_weight'

Added version test for thin file structure

Added thin Cherenkov binary data file

Added thin binary data file

Updated the test condition for THIN file type

Reduced resource file size
  • Loading branch information
oven8 authored and maxnoe committed Nov 4, 2024
1 parent 7bd6fe1 commit 38e231d
Show file tree
Hide file tree
Showing 14 changed files with 227 additions and 44 deletions.
2 changes: 2 additions & 0 deletions corsikaio/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
BLOCK_SIZE_FLOATS = 273
BLOCK_SIZE_FLOATS_THIN = 312
BLOCK_SIZE_BYTES = BLOCK_SIZE_FLOATS * 4
BLOCK_SIZE_BYTES_THIN = BLOCK_SIZE_FLOATS_THIN * 4

RUNH_VERSION_POSITION = 4
EVTH_VERSION_POSITION = 46
95 changes: 70 additions & 25 deletions corsikaio/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,25 @@

from .subblocks import (
parse_run_header,
parse_run_header_thin,
parse_event_header,
parse_event_header_thin,
parse_event_end,
parse_event_end_thin,
parse_cherenkov_photons,
parse_cherenkov_photons_thin,
parse_particle_data,
parse_particle_data_thin,
parse_longitudinal,
parse_run_end,
parse_run_end_thin,
get_version,
)
from .subblocks.longitudinal import longitudinal_header_dtype
from .subblocks.data import mmcs_cherenkov_photons_dtype
from .io import iter_blocks, read_buffer_size, open_compressed

from .constants import BLOCK_SIZE_BYTES, EVTH_VERSION_POSITION
from .constants import BLOCK_SIZE_BYTES, BLOCK_SIZE_BYTES_THIN, EVTH_VERSION_POSITION

Event = namedtuple('Event', ['header', 'data', 'longitudinal', 'end'])
PhotonEvent = namedtuple('PhotonEvent', ['header', 'photons', 'longitudinal', 'end'])
Expand All @@ -33,24 +39,33 @@ class CorsikaFile:
A file to iterate over events in a CORSIKA binary output file.
"""

def __init__(self, path, parse_blocks=True):
def __init__(self, path, parse_blocks=True, thinning=False):
self.EventClass = Event

self.parse_blocks = parse_blocks
self.thinning = thinning
self._buffer_size = read_buffer_size(path)
self._f = open_compressed(path)
self._block_iter = iter_blocks(self._f)
self._block_iter = iter_blocks(self._f, thinning=self.thinning)
if self.thinning is False:
self.block_size = BLOCK_SIZE_BYTES
else:
self.block_size = BLOCK_SIZE_BYTES_THIN

runh_bytes = next(self._block_iter)
if not runh_bytes[:4] == b'RUNH':
raise ValueError('File does not start with b"RUNH"')

self.run_header = parse_run_header(runh_bytes)[0]
if self.thinning is False:
self.run_header = parse_run_header(runh_bytes)[0]
else:
self.run_header = parse_run_header_thin(runh_bytes)[0]
self.version = round(float(self.run_header['version']), 4)
self._run_end = None

@property
def run_end(self):

if self._run_end is None:
pos = self._f.tell()

Expand All @@ -59,13 +74,16 @@ def run_end(self):
else:
self._f.seek(-4, 2)

self._f.seek(-BLOCK_SIZE_BYTES, 1)
block = self._f.read(BLOCK_SIZE_BYTES)
self._f.seek(-self.block_size, 1)
block = self._f.read(self.block_size)
while block[:4] != b'RUNE':
self._f.seek(-2 * BLOCK_SIZE_BYTES, 1)
block = self._f.read(BLOCK_SIZE_BYTES)
self._f.seek(-2 * self.block_size, 1)
block = self._f.read(self.block_size)

self._run_end = parse_run_end(block)[0]
if self.thinning is False:
self._run_end = parse_run_end(block)[0]
else:
self._run_end = parse_run_end_thin(block)[0]
self._f.seek(pos)

return self._run_end
Expand All @@ -77,14 +95,20 @@ def __next__(self):
raise IOError("File seems to be truncated")

if block[:4] == b'RUNE':
self._run_end = parse_run_end(block)
if self.thinning is False:
self._run_end = parse_run_end(block)
else:
self._run_end = parse_run_end_thin(block)
raise StopIteration()

if block[:4] != b'EVTH':
raise IOError('EVTH block expected but found {}'.format(block[:4]))

if self.parse_blocks:
event_header = parse_event_header(block)[0]
if self.thinning is False:
event_header = parse_event_header(block)[0]
else:
event_header = parse_event_header_thin(block)[0]
else:
event_header = _to_floatarray(block)

Expand All @@ -109,19 +133,28 @@ def __next__(self):
raise IOError("File seems to be truncated")

if self.parse_blocks:
event_end = parse_event_end(block,self.version)[0]
if self.thinning is False:
event_end = parse_event_end(block,self.version)[0]
else:
event_end = parse_event_end_thin(block,self.version)[0]
data = self.parse_data_blocks(data_bytes)
longitudinal = parse_longitudinal(long_bytes)
else:
event_end = _to_floatarray(block)
data = _to_floatarray(data_bytes).reshape(-1, 7)
if self.thinning is False:
data = _to_floatarray(data_bytes).reshape(-1, 7)
else:
data = _to_floatarray(data_bytes).reshape(-1, 8)

longitudinal = _to_floatarray(long_bytes)

return self.EventClass(event_header, data, longitudinal, event_end)

@classmethod
def parse_data_blocks(cls, data_bytes):
array = np.frombuffer(data_bytes, dtype='float32').reshape(-1, 7)
def parse_data_blocks(self, data_bytes):
if self.thinning is False:
array = np.frombuffer(data_bytes, dtype='float32').reshape(-1, 7)
else:
array = np.frombuffer(data_bytes, dtype='float32').reshape(-1, 8)
return array[np.any(array != 0, axis=1)]

def __iter__(self):
Expand All @@ -131,14 +164,17 @@ def read_headers(self):
pos = self._f.tell()
self._f.seek(0)

block_iter = iter_blocks(self._f)
block_iter = iter_blocks(self._f, thinning=self.thinning)
block = next(block_iter)
event_header_data = bytearray()
end_found = True

while block:
if block[:4] == b'RUNE':
self._run_end = parse_run_end(block)[0]
if self.thinning is False:
self._run_end = parse_run_end(block)[0]
else:
self._run_end = parse_run_end_thin(block)[0]
break

if (
Expand All @@ -158,7 +194,10 @@ def read_headers(self):

self._f.seek(pos)

event_headers = parse_event_header(event_header_data)
if self.thinning is False:
event_headers = parse_event_header(event_header_data)
else:
event_headers = parse_event_header_thin(event_header_data)

return self.run_header, event_headers, self._run_end

Expand All @@ -174,14 +213,17 @@ def close(self):

class CorsikaCherenkovFile(CorsikaFile):

def __init__(self, path, mmcs=False, **kwargs):
super().__init__(path, **kwargs)
def __init__(self, path, thinning=False, mmcs=False, **kwargs):
super().__init__(path, thinning=thinning, **kwargs)

self.EventClass = PhotonEvent
self.mmcs = mmcs

def parse_data_blocks(self, data_bytes):
photons = parse_cherenkov_photons(data_bytes)
if self.thinning is False:
photons = parse_cherenkov_photons(data_bytes)
else:
photons = parse_cherenkov_photons_thin(data_bytes)
if not self.mmcs:
return photons

Expand All @@ -199,9 +241,12 @@ def parse_data_blocks(self, data_bytes):

class CorsikaParticleFile(CorsikaFile):

def __init__(self, path, **kwargs):
super().__init__(path, **kwargs)
def __init__(self, path, thinning=False, **kwargs):
super().__init__(path, thinning=thinning, **kwargs)
self.EventClass = ParticleEvent

def parse_data_blocks(self, data_bytes):
return parse_particle_data(data_bytes)
if self.thinning is False:
return parse_particle_data(data_bytes)
else:
return parse_particle_data_thin(data_bytes)
28 changes: 19 additions & 9 deletions corsikaio/io.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import gzip
import struct

from .constants import BLOCK_SIZE_BYTES
from .constants import BLOCK_SIZE_BYTES, BLOCK_SIZE_BYTES_THIN


#: buffersize for files not written as fortran sequential file
DEFAULT_BUFFER_SIZE = BLOCK_SIZE_BYTES * 100
DEFAULT_BUFFER_SIZE_THIN = BLOCK_SIZE_BYTES_THIN * 100
#: struct definition of the fortran record marker
RECORD_MARKER = struct.Struct('i')

Expand Down Expand Up @@ -58,9 +59,14 @@ def read_buffer_size(path):
return buffer_size


def iter_blocks(f):
def iter_blocks(f, thinning=False):
is_fortran_file = True
buffer_size = DEFAULT_BUFFER_SIZE
if thinning == False:
block_size = BLOCK_SIZE_BYTES
buffer_size = DEFAULT_BUFFER_SIZE
else:
block_size = BLOCK_SIZE_BYTES_THIN
buffer_size = DEFAULT_BUFFER_SIZE_THIN


data = f.read(4)
Expand Down Expand Up @@ -89,13 +95,13 @@ def iter_blocks(f):
if len(data) == 0:
return

n_blocks, rest = divmod(len(data), BLOCK_SIZE_BYTES)
n_blocks, rest = divmod(len(data), block_size)
if rest != 0:
raise IOError("Read less bytes than expected, file seems to be truncated")

for block in range(n_blocks):
start = block * BLOCK_SIZE_BYTES
stop = start + BLOCK_SIZE_BYTES
start = block * block_size
stop = start + block_size
block = data[start:stop]
yield block

Expand All @@ -104,7 +110,7 @@ def iter_blocks(f):
f.read(RECORD_MARKER.size)


def read_block(f, buffer_size=None):
def read_block(f, thinning=False, buffer_size=None):
'''
Reads a block of CORSIKA output, e.g. 273 4-byte floats.
Expand All @@ -113,6 +119,10 @@ def read_block(f, buffer_size=None):
integer (the buffer size)
before and after each block, which has to be skipped.
'''
if thinning == False:
block_size = BLOCK_SIZE_BYTES
else:
block_size = BLOCK_SIZE_BYTES_THIN
if buffer_size is not None:
pos = f.tell()
if pos == 0:
Expand All @@ -121,8 +131,8 @@ def read_block(f, buffer_size=None):
if (pos + 4) % (buffer_size + 8) == 0:
f.read(8)

block = f.read(BLOCK_SIZE_BYTES)
if len(block) < BLOCK_SIZE_BYTES:
block = f.read(block_size)
if len(block) < block_size:
raise IOError("Read less bytes than expected, file seems to be truncated")

return block
43 changes: 38 additions & 5 deletions corsikaio/subblocks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
import numpy as np
import struct

from .run_header import run_header_types
from .run_end import run_end_dtype
from .run_header import run_header_types, run_header_thin_types
from .run_end import run_end_dtype, run_end_thin_dtype

from .event_header import event_header_types
from .event_end import event_end_types
from .event_header import event_header_types, event_header_thin_types
from .event_end import event_end_types, event_end_thin_types

from .data import cherenkov_photons_dtype, particle_data_dtype
from .data import cherenkov_photons_dtype, cherenkov_photons_thin_dtype, particle_data_dtype, particle_data_thin_dtype
from .longitudinal import longitudinal_data_dtype

from ..constants import RUNH_VERSION_POSITION, EVTH_VERSION_POSITION

__all__ = [
"parse_event_header",
"parse_event_header_thin",
"parse_run_header",
"parse_run_header_thin",
"parse_cherenkov_photons",
"parse_cherenkov_photons_thin",
"parse_particle_data",
"parse_particle_data_thin",
"parse_longitudinal",
]

Expand All @@ -28,20 +32,41 @@ def parse_run_header(run_header_bytes):
return np.frombuffer(run_header_bytes, dtype=run_header_types[version])


def parse_run_header_thin(run_header_bytes):
version = get_version(run_header_bytes, RUNH_VERSION_POSITION)
# get corsika minor version (Truncate the float after first digit)
version = float(str(version)[:3])
return np.frombuffer(run_header_bytes, dtype=run_header_thin_types[version])


def parse_run_end(run_end_bytes):
return np.frombuffer(run_end_bytes, dtype=run_end_dtype)


def parse_run_end_thin(run_end_bytes):
return np.frombuffer(run_end_bytes, dtype=run_end_thin_dtype)


def parse_event_header(event_header_bytes):
version = get_version(event_header_bytes, EVTH_VERSION_POSITION)
version = float(str(version)[:3])
return np.frombuffer(event_header_bytes, dtype=event_header_types[version])


def parse_event_header_thin(event_header_bytes):
version = get_version(event_header_bytes, EVTH_VERSION_POSITION)
version = float(str(version)[:3])
return np.frombuffer(event_header_bytes, dtype=event_header_thin_types[version])


def parse_event_end(event_end_bytes,version):
return np.frombuffer(event_end_bytes, dtype=event_end_types[float(str(version)[:3])])


def parse_event_end_thin(event_end_bytes,version):
return np.frombuffer(event_end_bytes, dtype=event_end_thin_types[float(str(version)[:3])])


def get_version(header_bytes, version_pos):
sl = slice(4 * (version_pos - 1), 4 * version_pos)
return round(struct.unpack("f", header_bytes[sl])[0], 4)
Expand All @@ -57,10 +82,18 @@ def parse_cherenkov_photons(data_block_bytes):
return parse_data_block(data_block_bytes, dtype=cherenkov_photons_dtype)


def parse_cherenkov_photons_thin(data_block_bytes):
return parse_data_block(data_block_bytes, dtype=cherenkov_photons_thin_dtype)


def parse_particle_data(data_block_bytes):
return parse_data_block(data_block_bytes, dtype=particle_data_dtype)


def parse_particle_data_thin(data_block_bytes):
return parse_data_block(data_block_bytes, dtype=particle_data_thin_dtype)


def parse_longitudinal(longitudinal_data_bytes):
return parse_data_block(longitudinal_data_bytes, longitudinal_data_dtype)

Expand Down
Loading

0 comments on commit 38e231d

Please sign in to comment.