Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom Scan .mrc #356

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 57 additions & 15 deletions rsciio/mrc/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@
raise ValueError(f"Unrecognised mode '{mode}'.")


def read_de_metadata_file(filename, nav_shape=None):
def read_de_metadata_file(filename, nav_shape=None, scan_pos_file=None):
"""This reads the metadata ".txt" file that is saved alongside a DE .mrc file.

There are 3 ways in which the files are saved:
Expand All @@ -163,20 +163,37 @@
original_metadata = {}
with open(filename) as metadata:
for line in metadata.readlines():
key, value = line.split("=")
key = key.strip()
value = value.strip()
original_metadata[key] = value

try:
key, value = line.split("=")
key = key.strip()
value = value.strip()
original_metadata[key] = value
except ValueError:
_logger.warning(

Check warning on line 172 in rsciio/mrc/_api.py

View check run for this annotation

Codecov / codecov/patch

rsciio/mrc/_api.py#L171-L172

Added lines #L171 - L172 were not covered by tests
f"Could not parse line: {line} in metadata file {filename} "
f"Each line should be in the form 'key = value'."
)
# -1 -> Not read from TEM Channel 0 -> TEM 1 -> STEM
in_stem_mode = int(original_metadata.get("Instrument Project TEMorSTEM Mode", -1))
scanning = original_metadata.get("Scan - Enable", "Disable") == "Enable"
raster = original_metadata.get("Scan - Type", "Raster") == "Raster"
if not raster: # pragma: no cover
_logger.warning(
"Non-raster scans are not fully supported yet. Please raise an issue on GitHub"
" if you need this feature."
)

if scanning and not raster:
if scan_pos_file is None:
raise ValueError("Scan position file is required for non-raster scans.")
positions = np.loadtxt(scan_pos_file, delimiter=",", dtype=int)
nav_shape = (positions[:, 1].max() + 1, positions[:, 0].max() + 1)
unique_pos, counts = np.unique(positions, axis=0, return_counts=True)
repeats = np.max(counts)

Check warning on line 187 in rsciio/mrc/_api.py

View check run for this annotation

Codecov / codecov/patch

rsciio/mrc/_api.py#L183-L187

Added lines #L183 - L187 were not covered by tests
if repeats > 1:
nav_shape = (

Check warning on line 189 in rsciio/mrc/_api.py

View check run for this annotation

Codecov / codecov/patch

rsciio/mrc/_api.py#L189

Added line #L189 was not covered by tests
nav_shape[0],
nav_shape[1],
repeats,
) # repeat the scan positions
else:
positions = None

if in_stem_mode == -1:
in_stem_mode = scanning
elif in_stem_mode == 0: # 0 -> TEM Mode
Expand All @@ -191,7 +208,9 @@
has_camera_length != -1 or in_stem_mode
) # Force diffracting if in STEM mode

if in_stem_mode and scanning and raster or nav_shape is not None:
if (
in_stem_mode and scanning and raster or nav_shape is not None
): # nav-shape is not None for pos files
axes_scales = np.array(
[
float(
Expand Down Expand Up @@ -292,7 +311,7 @@
},
"General": {"timestamp": timestamp},
}
return original_metadata, metadata, axes, nav_shape
return original_metadata, metadata, axes, nav_shape, positions


def file_reader(
Expand All @@ -306,6 +325,7 @@
metadata_file="auto",
virtual_images=None,
external_images=None,
scan_file=None,
):
"""
File reader for the MRC format for tomographic and 4D-STEM data.
Expand All @@ -328,6 +348,9 @@
external_images : list
A list of filenames of external images (e.g. external detectors) to be loaded
alongside the main data. For DE movies these are automatically loaded.
scan_file : str
The filename of the scan file. This is only used for DE movies to determine the scan order
for custom scans.

%s
"""
Expand All @@ -342,7 +365,10 @@
suffix = "_".join(split[2:-1])
else:
suffix = ""
if len(dir_name) == 0:
dir_name = "."

Check warning on line 369 in rsciio/mrc/_api.py

View check run for this annotation

Codecov / codecov/patch

rsciio/mrc/_api.py#L369

Added line #L369 was not covered by tests
metadata = glob.glob(dir_name + "/" + unique_id + suffix + "_info.txt")

virtual_images = glob.glob(
dir_name + "/" + unique_id + suffix + "_[0-4]_*.mrc"
)
Expand All @@ -354,6 +380,16 @@
): # Not a DE movie or File Naming Convention is not followed
_logger.warning("Could not find metadata file for DE movie.")
metadata = []
try:
scan_file = glob.glob(
f"{dir_name}/{unique_id}_{suffix}*coordinates.csv"
)[0]
except IndexError:
_logger.warning(
f"Could not find scan file [{dir_name}/{unique_id}_{suffix}*_coordinates.csv]"
f" for DE movie {filename}."
)
scan_file = None
else:
metadata = []
if len(metadata) == 1:
Expand All @@ -368,14 +404,18 @@
metadata,
metadata_axes,
_navigation_shape,
) = read_de_metadata_file(metadata_file, nav_shape=navigation_shape)
positions,
) = read_de_metadata_file(
metadata_file, nav_shape=navigation_shape, scan_pos_file=scan_file
)
if navigation_shape is None:
navigation_shape = _navigation_shape
original_metadata = {"de_metadata": de_metadata}
else:
original_metadata = {}
metadata = {"General": {}, "Signal": {}}
metadata_axes = None
positions = None
metadata["General"]["original_filename"] = os.path.split(filename)[1]
metadata["Signal"]["signal_type"] = ""

Expand Down Expand Up @@ -412,12 +452,14 @@
shape = (NX[0], NY[0], NZ[0])
if navigation_shape is not None:
shape = shape[:2] + navigation_shape
if distributed:

if distributed or positions is not None:
data = memmap_distributed(
filename,
offset=f.tell(),
shape=shape[::-1],
dtype=get_data_type(std_header["MODE"]),
positions=positions,
chunks=chunks,
)
if not lazy:
Expand Down
97 changes: 82 additions & 15 deletions rsciio/utils/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,55 @@
return da.from_array(slices, chunks=(1,) * len(shape) + slices.shape[-2:]), chunks


def slice_memmap(slices, file, dtypes, shape, key=None, **kwargs):
def get_arbitrary_chunk_slice(
positions,
shape,
chunks="auto",
block_size_limit=None,
dtype=None,
):
"""
Get chunk slices for the :func:`rsciio.utils.distributed.slice_memmap` function. From arbitrary positions
given by a list of x, y coordinates.

Parameters
----------
positions: array-like
A numpy array in the form [[x1, y1], [x2, y2], ...] where x, y map the frame to the
real space coordinate of the data.
signal_shape : tuple
Shape of the signal data.
chunks : tuple, optional
Chunk shape. The default is "auto".
block_size_limit : int, optional
Maximum size of a block in bytes. The default is None. This is passed
to the :py:func:`dask.array.core.normalize_chunks` function when chunks == "auto".
dtype : numpy.dtype, optional
Data type. The default is None. This is passed to the
:py:func:`dask.array.core.normalize_chunks` function when chunks == "auto".
"""
if not isinstance(positions, np.ndarray):
positions = np.array(positions)

Check warning on line 104 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L104

Added line #L104 was not covered by tests
if chunks == "auto":
chunks = ("auto",) * (len(shape) - 2) + (-1, -1)

Check warning on line 106 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L106

Added line #L106 was not covered by tests
elif chunks[-2:] != (-1, -1):
raise ValueError("Last two dimensions of chunks must be -1")
chunks = da.core.normalize_chunks(

Check warning on line 109 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L108-L109

Added lines #L108 - L109 were not covered by tests
chunks=chunks, shape=shape, limit=block_size_limit, dtype=dtype
)
pos_mapping = np.zeros(shape=shape[:-2] + (1, 1), dtype=int)

Check warning on line 112 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L112

Added line #L112 was not covered by tests

for i, p in enumerate(positions):
pos_mapping[p[1], p[0]] = i + 1
pos_mapping = pos_mapping - 1 # 0 based indexing, -1 for the empty frames

Check warning on line 116 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L115-L116

Added lines #L115 - L116 were not covered by tests

# Now we chunk the pos_mapping array. In the case each frame remains in a single chunk and we only
# return the navigation dimensions. Later when we populate the data we will use the pos_mapping array
# map some frame index to the position within a dense array.
return da.from_array(pos_mapping, chunks=chunks[:-2] + (1, 1)), chunks

Check warning on line 121 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L121

Added line #L121 was not covered by tests


def slice_memmap(slices, file, dtypes, shape, key=None, positions=False, **kwargs):
"""
Slice a memory mapped file using a tuple of slices.

Expand Down Expand Up @@ -108,13 +156,17 @@
data = np.memmap(file, dtypes, shape=shape, **kwargs)
if key is not None:
data = data[key]
slices_ = tuple([slice(s[0], s[1]) for s in slices_])
return data[slices_]
if positions:
return data[slices_]

Check warning on line 160 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L160

Added line #L160 was not covered by tests
else:
slices_ = tuple([slice(s[0], s[1]) for s in slices_])
return data[slices_]


def memmap_distributed(
filename,
dtype,
positions=None,
offset=0,
shape=None,
order="C",
Expand Down Expand Up @@ -177,15 +229,32 @@
shape = int(os.path.getsize(filename) / unit_size)
if not isinstance(shape, tuple):
shape = (shape,)

# Separates slices into appropriately sized chunks.
chunked_slices, data_chunks = get_chunk_slice(
shape=shape + sub_array_shape,
chunks=chunks,
block_size_limit=block_size_limit,
dtype=array_dtype,
)
num_dim = len(shape)
if positions is not None:
# We have arbitrary positions
chunked_slices, data_chunks = get_arbitrary_chunk_slice(

Check warning on line 235 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L235

Added line #L235 was not covered by tests
positions=positions,
shape=shape + sub_array_shape,
chunks=chunks,
block_size_limit=block_size_limit,
dtype=array_dtype,
)
drop_axes = None
use_positions = True
shape = (len(positions),) + shape[-2:] # update the shape to be linear

Check warning on line 244 in rsciio/utils/distributed.py

View check run for this annotation

Codecov / codecov/patch

rsciio/utils/distributed.py#L242-L244

Added lines #L242 - L244 were not covered by tests
else:
# Separates slices into appropriately sized chunks.
chunked_slices, data_chunks = get_chunk_slice(
shape=shape + sub_array_shape,
chunks=chunks,
block_size_limit=block_size_limit,
dtype=array_dtype,
)
drop_axes = (
num_dim,
num_dim + 1,
) # Dask 2021.10.0 minimum to use negative indexing
use_positions = False
data = da.map_blocks(
slice_memmap,
chunked_slices,
Expand All @@ -197,10 +266,8 @@
dtypes=dtype,
offset=offset,
chunks=data_chunks,
drop_axis=(
num_dim,
num_dim + 1,
), # Dask 2021.10.0 minimum to use negative indexing
drop_axis=drop_axes,
positions=use_positions,
key=key,
)
return data
Loading