From 98397f6fdc0f9c04c7372b68a4795dc657b78edb Mon Sep 17 00:00:00 2001 From: Petras Jokubauskas Date: Thu, 18 May 2023 15:30:20 +0200 Subject: [PATCH 1/7] rename class SFS_reader into SfsReader to follow python class naming convention --- rsciio/bruker/_api.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rsciio/bruker/_api.py b/rsciio/bruker/_api.py index 852daea8..ad1aba90 100644 --- a/rsciio/bruker/_api.py +++ b/rsciio/bruker/_api.py @@ -289,7 +289,7 @@ def get_as_BytesIO_string(self): return data -class SFS_reader(object): +class SfsReader(object): """Class to read sfs file. SFS is AidAim software's(tm) single file system. The class provides basic reading capabilities of such container. @@ -871,11 +871,11 @@ def gen_hspy_item_dict_basic(self): return i -class BCF_reader(SFS_reader): +class BCF_reader(SfsReader): """Class to read bcf (Bruker hypermapping) file. - Inherits SFS_reader and all its attributes and methods. + Inherits SfsReader and all its attributes and methods. Attributes: filename @@ -888,7 +888,7 @@ class BCF_reader(SFS_reader): """ def __init__(self, filename, instrument=None): - SFS_reader.__init__(self, filename) + SfsReader.__init__(self, filename) header_file = self.get_file("EDSDatabase/HeaderData") self.available_indexes = [] # get list of presented indexes from file tree of binary sfs container @@ -968,7 +968,7 @@ def parse_hypermap(self, index=None, downsample=1, cutoff_at_kV=None, lazy=False ceil(self.header.image.width / downsample), n_channels, ) - sfs_file = SFS_reader(self.filename) + sfs_file = SfsReader(self.filename) vrt_file_hand = sfs_file.get_file("EDSDatabase/SpectrumData" + str(index)) if fast_unbcf: parse_func = unbcf_fast.parse_to_numpy @@ -1076,7 +1076,7 @@ def py_parse_hypermap(virtual_file, shape, dtype, downsample=1): Parameters ---------- - virtual_file -- virtual file handle returned by SFS_reader instance + virtual_file -- virtual file handle returned by SfsReader instance or by object inheriting it (e.g. BCF_reader instance) shape -- numpy shape dtype -- numpy dtype From 8fb64903b60228b3fef8ba26a866a14ae718aebe Mon Sep 17 00:00:00 2001 From: Petras Jokubauskas Date: Fri, 19 May 2023 16:49:05 +0200 Subject: [PATCH 2/7] cleanup bruker _api and expose SfsReader under api --- rsciio/bruker/__init__.py | 2 + rsciio/bruker/_api.py | 161 +++++++++++++++++++++++--------------- rsciio/bruker/api.py | 1 + 3 files changed, 103 insertions(+), 61 deletions(-) create mode 100644 rsciio/bruker/api.py diff --git a/rsciio/bruker/__init__.py b/rsciio/bruker/__init__.py index d4de92f6..da8b3dc9 100644 --- a/rsciio/bruker/__init__.py +++ b/rsciio/bruker/__init__.py @@ -1,8 +1,10 @@ from ._api import file_reader +from . import api __all__ = [ "file_reader", + "api", ] diff --git a/rsciio/bruker/_api.py b/rsciio/bruker/_api.py index ad1aba90..1b99393e 100644 --- a/rsciio/bruker/_api.py +++ b/rsciio/bruker/_api.py @@ -46,6 +46,10 @@ from rsciio.docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC +__all__ = [ + "SfsReader", +] + _logger = logging.getLogger(__name__) warn_once = True @@ -71,22 +75,23 @@ class Container(object): class SFSTreeItem(object): - """Class to manage one internal sfs file. - - Reading, reading in chunks, reading and extracting, reading without - extracting even if compression is pressent. + """Class to manage one internal SFS file. + This class provides means to Read, read in blocks, read and extract + compressed blocks and read without extracting even if compression + is present (for debuging purpoises). Attributes: item_raw_string -- the bytes from sfs file table describing the file - parent -- the item higher hierarchicaly in the sfs file tree + parent -- the index of parent item in SFS file table. + The index of root is -1. Methods: read_piece, setup_compression_metadata, get_iter_and_properties, get_as_BytesIO_string """ - def __init__(self, item_raw_string, parent): - self.sfs = parent + def __init__(self, item_raw_string, sfs): + self.sfs = sfs ( self._pointer_to_pointer_table, self.size, @@ -108,6 +113,11 @@ def __init__(self, item_raw_string, parent): self.size_in_chunks = self._calc_pointer_table_size() if self.is_dir == 0: self._fill_pointer_table() + self.uncompressed_block_size = None + self.n_compressed_blocks = None + + def __repr__(self): + return f"" def _calc_pointer_table_size(self): n_chunks = ceil(self.size / self.sfs.usable_chunk) @@ -119,13 +129,10 @@ def _fill_pointer_table(self): self.pointer is the sfs pointer table containing addresses of every chunk of the file. - The pointer table if the file is big can extend throught many - sfs chunks. Differently than files, the pointer table of file have no - table of pointers to the chunks. Instead if pointer table is larger - than sfs chunk, the chunk header contains next chunk number (address - can be calculated using known chunk size and global offset) with - continuation of file pointer table, thus it have to be read and filled - consecutive. + The pointer table, if the internal file is large enough, can extend + throught many sfs blocks. In case pointer table or its continuation + do not fit inside single SFS block, then the block's header contains + next block index. """ # table size in number of chunks: n_of_chunks = ceil(self.size_in_chunks / (self.sfs.usable_chunk // 4)) @@ -198,7 +205,6 @@ def _iter_read_chunks(self, first=0): Keyword arguments: first -- the index of first chunk from which to read. (default 0) - chunks -- the number of chunks to read. (default False) """ last = self.size_in_chunks with open(self.sfs.filename, "rb") as fn: @@ -206,35 +212,13 @@ def _iter_read_chunks(self, first=0): fn.seek(self.pointers[idx]) yield fn.read(self.sfs.usable_chunk) fn.seek(self.pointers[last - 1]) - last_stuff = self.size % self.sfs.usable_chunk - if last_stuff != 0: - yield fn.read(last_stuff) + last_n_bytes = self.size % self.sfs.usable_chunk + if last_n_bytes != 0: + yield fn.read(last_n_bytes) else: yield fn.read(self.sfs.usable_chunk) - def setup_compression_metadata(self): - """parse and setup the number of compression chunks - - and uncompressed chunk size as class attributes. - - Sets up attributes: - self.uncompressed_blk_size, self.no_of_compr_blk - - """ - with open(self.sfs.filename, "rb") as fn: - fn.seek(self.pointers[0]) - # AACS signature, uncompressed size, undef var, number of blocks - aacs, uc_size, _, n_of_blocks = strct_unp(">> instance_of_SFSReader.get_file('catz/kitten.png') + >>> instance_of_SfsReader.get_file('catz/kitten.png') See also -------- diff --git a/rsciio/bruker/api.py b/rsciio/bruker/api.py new file mode 100644 index 00000000..24a99282 --- /dev/null +++ b/rsciio/bruker/api.py @@ -0,0 +1 @@ +from ._api import * From ef337cc8dd24dd7a318491789469abf3b1c40ee9 Mon Sep 17 00:00:00 2001 From: Petras Jokubauskas Date: Sat, 27 May 2023 14:55:56 +0200 Subject: [PATCH 3/7] dont expose .api within __init__ --- rsciio/bruker/__init__.py | 2 -- rsciio/bruker/_api.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/rsciio/bruker/__init__.py b/rsciio/bruker/__init__.py index da8b3dc9..d4de92f6 100644 --- a/rsciio/bruker/__init__.py +++ b/rsciio/bruker/__init__.py @@ -1,10 +1,8 @@ from ._api import file_reader -from . import api __all__ = [ "file_reader", - "api", ] diff --git a/rsciio/bruker/_api.py b/rsciio/bruker/_api.py index 1b99393e..a643c17e 100644 --- a/rsciio/bruker/_api.py +++ b/rsciio/bruker/_api.py @@ -82,7 +82,7 @@ class SFSTreeItem(object): Attributes: item_raw_string -- the bytes from sfs file table describing the file - parent -- the index of parent item in SFS file table. + parent -- the index of parent item in SFS file table. The index of root is -1. Methods: @@ -116,7 +116,7 @@ def __init__(self, item_raw_string, sfs): self.uncompressed_block_size = None self.n_compressed_blocks = None - def __repr__(self): + def __repr__(self): # pragma: no cover return f"" def _calc_pointer_table_size(self): From efac6ebff9f32489f03f1b9626e7d176707349d0 Mon Sep 17 00:00:00 2001 From: Petras Jokubauskas Date: Sun, 28 May 2023 13:59:11 +0200 Subject: [PATCH 4/7] rename classes to comply with PEP8: SfsReader->SFSReader BCF_reader -> BCFReader update docstrings --- rsciio/bruker/_api.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/rsciio/bruker/_api.py b/rsciio/bruker/_api.py index a643c17e..23e4a271 100644 --- a/rsciio/bruker/_api.py +++ b/rsciio/bruker/_api.py @@ -47,7 +47,7 @@ from rsciio.docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC __all__ = [ - "SfsReader", + "SFSReader", ] _logger = logging.getLogger(__name__) @@ -277,26 +277,22 @@ def get_as_BytesIO_string(self): return data -class SfsReader(object): - """Class to read sfs file. SFS is known to be used by bruker for - saving particle analysis (*.pan) frames, hyperspectral maps and +class SFSReader(object): + """Class to read sfs file. SFS is known to be used by Bruker for + saving particle analysis (*.pan) frames, hyper-spectral maps and EBSD maps and metadata (*.bcf). SFS stands for AidAim software's(tm) [s]ingle [f]ile [s]ystem. The class provides basic reading capabilities of such container. It is capable to read compressed data in zlib, but - SFS can contain other compression which is not implemented here. - It is also not able to read encrypted sfs containers. + SFS can use other compression than zlib, which is not implemented + here. This implementation also does not cover SFS internal encryption. - This class can be used stand alone or inherited in construction of + This class can be used stand-alone or inherited in construction of file readers for files using this format. The format can be recognised independently from file extension by file "magic" in the first 8 bytes: AAMVHFSS - Attributes - ---------- - filename - Attributes: ---------- vfs - virtual file system -- the dictionarized representation of sfs @@ -313,7 +309,7 @@ class SfsReader(object): n_tree_items - number of items in tree descriptor (including files and directories) sfs_n_of_chunks - total number of blocks in the SFS file (serves - no purpoise in reading, probably used for appending data) + no purpose in reading, probably used for appending data) Methods: ---------- @@ -322,6 +318,8 @@ class SfsReader(object): """ def __init__(self, filename): + """SFSReader requires valid relative or absolute path-like object. + which is used in python built-in open() function""" self.filename = filename self.compression = None # read the file header @@ -407,11 +405,12 @@ def _setup_compression_metadata(self, item): if aacs == 0x53434141: # AACS as string item.uncompressed_block_size = uc_size item.n_compressed_blocks = n_of_blocks - else: + else: # pragma: no cover raise ValueError( """The file is marked to be compressed, but compression signature is missing in the header. Aborting....""" ) + # Such kind of bug failed to be replicated more than once @staticmethod def _flat_items_to_dict(paths, temp_item_list): @@ -480,7 +479,7 @@ def get_file(self, path): to get "file" object 'kitten.png' in folder 'catz' which resides in root directory of sfs, you would use: - >>> instance_of_SfsReader.get_file('catz/kitten.png') + >>> instance_of_SFSReader.get_file('catz/kitten.png') See also -------- @@ -910,11 +909,11 @@ def gen_hspy_item_dict_basic(self): return i -class BCF_reader(SfsReader): +class BCFReader(SFSReader): """Class to read bcf (Bruker hypermapping) file. - Inherits SfsReader and all its attributes and methods. + Inherits SFSReader and all its attributes and methods. Attributes: filename @@ -927,7 +926,7 @@ class BCF_reader(SfsReader): """ def __init__(self, filename, instrument=None): - SfsReader.__init__(self, filename) + SFSReader.__init__(self, filename) header_file = self.get_file("EDSDatabase/HeaderData") self.available_indexes = [] # get list of presented indexes from file tree of binary sfs container @@ -1007,7 +1006,7 @@ def parse_hypermap(self, index=None, downsample=1, cutoff_at_kV=None, lazy=False ceil(self.header.image.width / downsample), n_channels, ) - sfs_file = SfsReader(self.filename) + sfs_file = SFSReader(self.filename) vrt_file_hand = sfs_file.get_file("EDSDatabase/SpectrumData" + str(index)) if fast_unbcf: parse_func = unbcf_fast.parse_to_numpy @@ -1115,8 +1114,8 @@ def py_parse_hypermap(virtual_file, shape, dtype, downsample=1): Parameters ---------- - virtual_file -- virtual file handle returned by SfsReader instance - or by object inheriting it (e.g. BCF_reader instance) + virtual_file -- virtual file handle returned by SFSReader instance + or by object inheriting it (e.g. instance of BCFReader) shape -- numpy shape dtype -- numpy dtype downsample -- downsample factor @@ -1391,7 +1390,7 @@ def bcf_reader( """ # objectified bcf file: - obj_bcf = BCF_reader(filename, instrument=instrument) + obj_bcf = BCFReader(filename, instrument=instrument) if select_type == "image": return bcf_images(obj_bcf) elif select_type == "spectrum_image": From 4d48681024b23564f1aad1a46f312f7882861f11 Mon Sep 17 00:00:00 2001 From: Petras Jokubauskas Date: Sun, 28 May 2023 14:19:41 +0200 Subject: [PATCH 5/7] fix_test --- rsciio/tests/test_bruker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsciio/tests/test_bruker.py b/rsciio/tests/test_bruker.py index ea224330..8b08c73d 100644 --- a/rsciio/tests/test_bruker.py +++ b/rsciio/tests/test_bruker.py @@ -243,7 +243,7 @@ def test_fast_bcf(): for bcffile in test_files: filename = os.path.join(my_path, "bruker_data", bcffile) - thingy = _api.BCF_reader(filename) + thingy = _api.BCFReader(filename) for j in range(2, 5, 1): print("downsampling:", j) _api.fast_unbcf = True # manually enabling fast parsing From 63979e4d3d551818bade61d8f1fd093c15ea72d8 Mon Sep 17 00:00:00 2001 From: Petras Jokubauskas Date: Mon, 29 May 2023 09:57:31 +0200 Subject: [PATCH 6/7] expose spectra_from_xml, other polishing --- rsciio/bruker/_api.py | 160 +++++++++++++++++++----------------- rsciio/tests/test_bruker.py | 4 +- 2 files changed, 85 insertions(+), 79 deletions(-) diff --git a/rsciio/bruker/_api.py b/rsciio/bruker/_api.py index 23e4a271..8a272119 100644 --- a/rsciio/bruker/_api.py +++ b/rsciio/bruker/_api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # -# Copyright 2016 Petras Jokubauskas -# Copyright 2016 The HyperSpy developers +# Copyright 2023 Petras Jokubauskas +# Copyright 2023 The HyperSpy developers # # This library is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,10 +18,10 @@ # If not, see . # # This python library subset provides read functionality of -# Bruker bcf files. +# Bruker bcf and spx files. # The basic reading capabilities of proprietary AidAim Software(tm) -# SFS (Single File System) (used in bcf technology) is present in -# the same library. +# SFS (Single File System) used in bcf technology is exposed for +# eventual reuse for other Bruker files. # Plugin characteristics @@ -30,7 +30,7 @@ from math import ceil import logging from zlib import decompress as unzip_block -from struct import unpack as strct_unp +from struct import unpack from datetime import datetime from ast import literal_eval import codecs @@ -47,7 +47,9 @@ from rsciio.docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC __all__ = [ + "BCFReader", "SFSReader", + "spectra_from_xml", ] _logger = logging.getLogger(__name__) @@ -105,7 +107,7 @@ def __init__(self, item_raw_string, sfs): _, name, _, - ) = strct_unp("= size: @@ -1171,7 +1177,7 @@ def py_parse_hypermap(virtual_file, shape, dtype, downsample=1): dummy_size1, n_of_pulses, data_size2, - ) = strct_unp("> 4) + gain pixel += g[:channels] else: length = int(channels * size_p / 2) - temp = strct_unp( + temp = unpack( "<" + channels * st[size_p], buffer1[offset : offset + length], ) @@ -1249,14 +1255,14 @@ def py_parse_hypermap(virtual_file, shape, dtype, downsample=1): pixel += rest * [0] # additional data size: if n_of_pulses > 0: - add_s = strct_unp("= size: buffer1 = buffer1[offset:] + next(iter_data) size = size_chnk + size - offset offset = 0 # the additional pulses: - add_pulses = strct_unp( + add_pulses = unpack( "<" + "H" * n_of_pulses, buffer1[offset : offset + add_s] ) offset += add_s diff --git a/rsciio/tests/test_bruker.py b/rsciio/tests/test_bruker.py index 8b08c73d..50d1a47f 100644 --- a/rsciio/tests/test_bruker.py +++ b/rsciio/tests/test_bruker.py @@ -239,11 +239,11 @@ def test_wrong_file(): def test_fast_bcf(): thingy = pytest.importorskip("rsciio.bruker.unbcf_fast") - from rsciio.bruker import _api + from rsciio.bruker import _api, api for bcffile in test_files: filename = os.path.join(my_path, "bruker_data", bcffile) - thingy = _api.BCFReader(filename) + thingy = api.BCFReader(filename) for j in range(2, 5, 1): print("downsampling:", j) _api.fast_unbcf = True # manually enabling fast parsing From a57fd729d6b12938c2395bf07d4aa2376267a1d5 Mon Sep 17 00:00:00 2001 From: Petras Jokubauskas Date: Mon, 29 May 2023 12:16:20 +0200 Subject: [PATCH 7/7] missed return fix --- rsciio/bruker/_api.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/rsciio/bruker/_api.py b/rsciio/bruker/_api.py index 8a272119..e96455a7 100644 --- a/rsciio/bruker/_api.py +++ b/rsciio/bruker/_api.py @@ -1035,8 +1035,9 @@ def spx_reader(filename, lazy=False): xml_str = spx.read() root = ET.fromstring(xml_str) sp_node = root.find("./ClassInstance[@Type='TRTSpectrum']") - rsc_sig = spectra_from_xml(sp_node) - rsc_sig[0]["metadata"]["General"]["original_filename"] = basename(filename) + spect = spectra_from_xml(sp_node) + spect[0]["metadata"]["General"]["original_filename"] = basename(filename) + return spect def spectra_from_xml(sp_node): @@ -1079,7 +1080,7 @@ def spectra_from_xml(sp_node): }, "Sample": {"name": name}, "Signal": { - "signal_type": "EDS_%s" % mode, + "signal_type": f"EDS_{mode}", "record_by": "spectrum", "quantity": "X-rays (Counts)", },