Skip to content

Commit

Permalink
don't iterate over all objects
Browse files Browse the repository at this point in the history
use chunk size and index to skip to expected chunk
  • Loading branch information
johannesloibl committed Dec 9, 2024
1 parent ee3d836 commit 57c2e7d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 11 deletions.
6 changes: 3 additions & 3 deletions nptdms/base_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ def _read_data_chunk(self, file, data_objects, chunk_index):
"""
raise NotImplementedError("Data chunk reading must be implemented in base classes")

def read_channel_data_chunks(self, file, data_objects, channel_path, chunk_offset, stop_chunk):
def read_channel_data_chunks(self, file, data_objects, channel_path, chunk_offset, stop_chunk, chunk_size):
""" Read multiple data chunks for a single channel at once
In the base case we read each chunk individually but subclasses can override this
"""
for chunk_index in range(chunk_offset, stop_chunk):
yield self._read_channel_data_chunk(file, data_objects, chunk_index, channel_path)
yield self._read_channel_data_chunk(file, data_objects, chunk_index, channel_path, chunk_size)

def _read_channel_data_chunk(self, file, data_objects, chunk_index, channel_path):
def _read_channel_data_chunk(self, file, data_objects, chunk_index, channel_path, chunk_size):
""" Read data from a chunk for a single channel
"""
# In the base case we can read data for all channels
Expand Down
18 changes: 10 additions & 8 deletions nptdms/tdms_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def read_raw_data_for_channel(self, f, channel_path, chunk_offset=0, num_chunks=
if chunk_offset > 0:
f.seek(chunk_size * chunk_offset, os.SEEK_CUR)
stop_chunk = self.num_chunks if num_chunks is None else num_chunks + chunk_offset
for chunk in self._read_channel_data_chunks(f, self._get_data_objects(), channel_path, chunk_offset, stop_chunk):
for chunk in self._read_channel_data_chunks(f, self._get_data_objects(), channel_path, chunk_offset, stop_chunk, chunk_size):
yield chunk

def _calculate_chunks(self):
Expand Down Expand Up @@ -376,13 +376,15 @@ def _read_data_chunks(self, file, data_objects, num_chunks):
for chunk in reader.read_data_chunks(file, data_objects, num_chunks):
yield chunk

def _read_channel_data_chunks(self, file, data_objects, channel_path, chunk_offset, stop_chunk):
def _read_channel_data_chunks(self, file, data_objects, channel_path, chunk_offset, stop_chunk, chunk_size):
""" Read multiple data chunks for a single channel at once
In the base case we read each chunk individually but subclasses can override this
"""
reader = self._get_data_reader()
for chunk in reader.read_channel_data_chunks(file, data_objects, channel_path, chunk_offset, stop_chunk):
initial_position = file.tell()
for i, chunk in enumerate(reader.read_channel_data_chunks(file, data_objects, channel_path, chunk_offset, stop_chunk, chunk_size)):
yield chunk
file.seek(initial_position + (i + 1) * chunk_size)

def _get_data_reader(self):
endianness = '>' if (self.toc_mask & toc_properties['kTocBigEndian']) else '<'
Expand Down Expand Up @@ -462,7 +464,7 @@ def read_data_chunks(self, file, data_objects, num_chunks):
raise ValueError("Cannot read interleaved data with different chunk sizes")
return [self._read_interleaved_chunks(file, data_objects, num_chunks)]

def read_channel_data_chunks(self, file, data_objects, channel_path, chunk_offset, stop_chunk):
def read_channel_data_chunks(self, file, data_objects, channel_path, chunk_offset, stop_chunk, chunk_size):
""" Read multiple data chunks for a single channel at once
"""
num_chunks = stop_chunk - chunk_offset
Expand Down Expand Up @@ -514,7 +516,7 @@ def _read_data_chunk(self, file, data_objects, chunk_index):
object_data[obj.path] = obj.read_values(file, number_values, self.endianness)
return RawDataChunk.channel_data(object_data)

def _read_channel_data_chunk(self, file, data_objects, chunk_index, channel_path):
def _read_channel_data_chunk(self, file, data_objects, chunk_index, channel_path, chunk_size):
""" Read data from a chunk for a single channel
"""
channel_data = RawChannelDataChunk.empty()
Expand All @@ -525,13 +527,13 @@ def _read_channel_data_chunk(self, file, data_objects, chunk_index, channel_path
file.seek(current_position)
channel_data = RawChannelDataChunk.channel_data(obj.read_values(file, number_values, self.endianness))
current_position = file.tell()
break
elif number_values == obj.number_values:
# Seek over data for other channel data
current_position += obj.data_size
else:
elif obj.data_type.size is not None:
# In last chunk with reduced chunk size
if obj.data_type.size is not None:
current_position += obj.data_type.size * number_values
current_position += obj.data_type.size * number_values

file.seek(current_position)
return channel_data
Expand Down

0 comments on commit 57c2e7d

Please sign in to comment.