diff --git a/h5pyd/__init__.py b/h5pyd/__init__.py index 732a15a..28da064 100644 --- a/h5pyd/__init__.py +++ b/h5pyd/__init__.py @@ -29,7 +29,8 @@ from . import h5ds -from .config import Config +from .config import get_config + __version__ = version.version diff --git a/h5pyd/_hl/base.py b/h5pyd/_hl/base.py index f581480..dd1a78e 100644 --- a/h5pyd/_hl/base.py +++ b/h5pyd/_hl/base.py @@ -14,6 +14,7 @@ import posixpath import os +import sys import json import numpy as np import logging @@ -28,6 +29,10 @@ numpy_float_types = (np.float16, np.float32, np.float64) +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + + class FakeLock(): def __init__(self): pass @@ -506,7 +511,7 @@ def readElement(buffer, offset, arr, index, dt): e = np.frombuffer(bytes(e_buffer), dtype=dt) arr[index] = e[0] except ValueError: - print(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}") + eprint(f"ERROR: ValueError setting {e_buffer} and dtype: {dt}") raise else: # variable length element @@ -533,7 +538,7 @@ def readElement(buffer, offset, arr, index, dt): try: e = np.frombuffer(bytes(e_buffer), dtype=vlen) except ValueError: - print("ValueError -- e_buffer:", e_buffer, "dtype:", vlen) + eprint("ValueError -- e_buffer:", e_buffer, "dtype:", vlen) raise arr[index] = e diff --git a/h5pyd/_hl/config.py b/h5pyd/_hl/config.py deleted file mode 100755 index 40ac2e8..0000000 --- a/h5pyd/_hl/config.py +++ /dev/null @@ -1,103 +0,0 @@ -############################################################################## -# Copyright by The HDF Group. # -# All rights reserved. # -# # -# This file is part of HSDS (HDF5 Scalable Data Service), Libraries and # -# Utilities. The full HSDS copyright notice, including # -# terms governing use, modification, and redistribution, is contained in # -# the file COPYING, which can be found at the root of the source code # -# distribution tree. If you do not have access to this file, you may # -# request a copy from help@hdfgroup.org. # -############################################################################## -import os -import json -import sys - - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - - -class Config: - """ - User Config state - """ - def __init__(self, config_file=None, **kwargs): - self._cfg = {} - if config_file: - self._config_file = config_file - elif os.path.isfile(".hscfg"): - self._config_file = ".hscfg" - else: - self._config_file = os.path.expanduser("~/.hscfg") - # process config file if found - if os.path.isfile(self._config_file): - line_number = 0 - with open(self._config_file) as f: - for line in f: - line_number += 1 - s = line.strip() - if not s: - continue - if s[0] == '#': - # comment line - continue - index = line.find('=') - if index <= 0: - eprint(f"config file: {self._config_file} line: {line_number} is not valid") - continue - k = line[:index].strip() - v = line[(index + 1):].strip() - if v and v.upper() != "NONE": - self._cfg[k] = v - # override any config values with environment variable if found - for k in self._cfg.keys(): - if k.upper() in os.environ: - self._cfg[k] = os.environ[k.upper()] - - # finally update any values that are passed in to the constructor - for k in kwargs.keys(): - self._cfg[k] = kwargs[k] - - def __getitem__(self, name): - """ Get a config item """ - - # Load a variable from environment. It would have only been loaded in - # __init__ if it was also specified in the config file. - env_name = name.upper() - if name not in self._cfg and env_name in os.environ: - self._cfg[name] = os.environ[env_name] - - return self._cfg[name] - - def __setitem__(self, name, obj): - """ set config item """ - self._cfg[name] = obj - - def __delitem__(self, name): - """ Delete option. """ - del self._cfg[name] - - def __len__(self): - return len(self._cfg) - - def __iter__(self): - """ Iterate over config names """ - keys = self._cfg.keys() - for key in keys: - yield key - - def __contains__(self, name): - return name in self._cfg or name.upper() in os.environ - - def __repr__(self): - return json.dumps(self._cfg) - - def keys(self): - return self._cfg.keys() - - def get(self, name, default=None): - if name in self: - return self[name] - else: - return default diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index c623e86..c9d4be5 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -21,7 +21,7 @@ from .objectid import GroupID from .group import Group from .httpconn import HttpConn -from .config import Config +from .. import config VERBOSE_REFRESH_TIME = 1.0 # 1 second @@ -49,7 +49,7 @@ class H5Image(io.RawIOBase): def __init__(self, domain_path, h5path="h5image", chunks_per_page=1, logger=None): """ verify dataset can be accessed and set logger if supplied """ self._cursor = 0 - if domain_path.startswith("hdf5::/"): + if domain_path and domain_path.startswith("hdf5::/"): self._domain_path = domain_path else: self._domain_path = "hdf5:/" + domain_path @@ -276,7 +276,7 @@ def __init__( logger=None, owner=None, linked_domain=None, - track_order=False, + track_order=None, retries=10, timeout=180, **kwds, @@ -320,13 +320,13 @@ def __init__( Create new domain using the root of the linked domain track_order Whether to track dataset/group/attribute creation order within this file. Objects will be iterated - in ascending creation order if this is enabled, otherwise in ascending alphanumeric order. + in ascending creation order if this is True, if False in ascending alphanumeric order. + If None use global default get_config().track_order. retries Number of retry attempts to be used if a server request fails timeout Timeout value in seconds """ - groupid = None dn_ids = [] # if we're passed a GroupId as domain, just initialize the file object @@ -341,7 +341,7 @@ def __init__( if mode is None: mode = "r" - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). # accept domain values in the form: # http://server:port/home/user/myfile.h5 @@ -354,7 +354,7 @@ def __init__( # # For http prefixed values, extract the endpont and use the rest as domain path for protocol in ("http://", "https://", "hdf5://", "http+unix://"): - if domain.startswith(protocol): + if domain and domain.startswith(protocol): if protocol.startswith("http"): domain = domain[len(protocol):] # extract the endpoint @@ -383,7 +383,7 @@ def __init__( endpoint = cfg["hs_endpoint"] # remove the trailing slash on endpoint if it exists - if endpoint.endswith('/'): + if endpoint and endpoint.endswith('/'): endpoint = endpoint.strip('/') if username is None: @@ -433,12 +433,10 @@ def __init__( if bucket: params["bucket"] = bucket - params["CreateOrder"] = "1" if track_order else "0" - # need some special logic for the first request in local mode # to give the sockets time to initialize - if endpoint.startswith("local"): + if endpoint and endpoint.startswith("local"): connect_backoff = [0.5, 1, 2, 4, 8, 16] else: connect_backoff = [] @@ -487,6 +485,10 @@ def __init__( body["owner"] = owner if linked_domain: body["linked_domain"] = linked_domain + if track_order or cfg.track_order: + create_props = {"CreateOrder": 1} + group_body = {"creationProperties": create_props} + body["group"] = group_body rsp = http_conn.PUT(req, params=params, body=body) if rsp.status_code != 201: http_conn.close() @@ -552,22 +554,20 @@ def __init__( groupid = GroupID(None, group_json, http_conn=http_conn) # end else + self._name = "/" self._id = groupid - self._verboseInfo = None # aditional state we'll get when requested + self._verboseInfo = None # additional state we'll get when requested self._verboseUpdated = None # when the verbose data was fetched self._lastScan = None # when summary stats where last updated by server self._dn_ids = dn_ids - self._track_order = track_order self._swmr_mode = swmr Group.__init__(self, self._id, track_order=track_order) def _getVerboseInfo(self): now = time.time() - if ( - self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME - ): + if (self._verboseUpdated is None or now - self._verboseUpdated > VERBOSE_REFRESH_TIME): # resynch the verbose data req = "/?verbose=1" rsp_json = self.GET(req, use_cache=False, params={"CreateOrder": "1" if self._track_order else "0"}) diff --git a/h5pyd/_hl/folders.py b/h5pyd/_hl/folders.py index ab37fcf..bfdfe67 100644 --- a/h5pyd/_hl/folders.py +++ b/h5pyd/_hl/folders.py @@ -17,7 +17,7 @@ import time import logging from .httpconn import HttpConn -from .config import Config +from .. import config class Folder: @@ -143,7 +143,7 @@ def __init__( if mode is None: mode = "r" - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). if endpoint is None and "hs_endpoint" in cfg: endpoint = cfg["hs_endpoint"] diff --git a/h5pyd/_hl/group.py b/h5pyd/_hl/group.py index 2fb9dc0..21404db 100644 --- a/h5pyd/_hl/group.py +++ b/h5pyd/_hl/group.py @@ -25,6 +25,7 @@ from .table import Table from .datatype import Datatype from . import h5type +from .. import config def isUUID(name): @@ -49,7 +50,7 @@ class Group(HLObject, MutableMappingHDF5): """ Represents an HDF5 group. """ - def __init__(self, bind, track_order=False, **kwargs): + def __init__(self, bind, track_order=None, **kwargs): # print "group init, bind:", bind """ Create a new Group object by binding to a low-level GroupID. @@ -58,7 +59,20 @@ def __init__(self, bind, track_order=False, **kwargs): if not isinstance(bind, GroupID): raise ValueError(f"{bind} is not a GroupID") HLObject.__init__(self, bind, **kwargs) - self._track_order = track_order + + if track_order is None: + # set order based on group creation props + gcpl = self.id.gcpl_json + if "CreateOrder" in gcpl: + createOrder = gcpl["CreateOrder"] + if not createOrder or createOrder == "0": + self._track_order = False + else: + self._track_order = True + else: + self._track_order = False + else: + self._track_order = track_order self._req_prefix = "/groups/" + self.id.uuid self._link_db = {} # cache for links @@ -182,9 +196,11 @@ def _get_objdb_links(self): group_json = objdb[self.id.id] return group_json["links"] - def _make_group(self, parent_id=None, parent_name=None, link=None): + def _make_group(self, parent_id=None, parent_name=None, link=None, track_order=None): """ helper function to make a group """ + cfg = config.get_config() + link_json = {} if parent_id: link_json["id"] = parent_id @@ -195,12 +211,17 @@ def _make_group(self, parent_id=None, parent_name=None, link=None): body = {} if link_json: body["link"] = link_json + if track_order or cfg.track_order: + body["creationProperties"] = {"CreateOrder": 1} + self.log.debug(f"create group with body: {body}") rsp = self.POST('/groups', body=body) group_json = rsp groupId = GroupID(self, group_json) sub_group = Group(groupId) + if track_order or cfg.track_order: + sub_group._track_order = True if parent_name: if parent_name[-1] == '/': parent_name = parent_name + link @@ -211,7 +232,7 @@ def _make_group(self, parent_id=None, parent_name=None, link=None): return sub_group - def create_group(self, h5path, track_order=False): + def create_group(self, h5path, track_order=None): """ Create and return a new subgroup. Name may be absolute or relative. Fails if the target name already @@ -223,8 +244,7 @@ def create_group(self, h5path, track_order=False): if h5path is None: # anonymous group - sub_group = self._make_group() - sub_group._track_order = track_order + sub_group = self._make_group(track_order=track_order) return sub_group if h5path[-1] == '/': @@ -255,8 +275,12 @@ def create_group(self, h5path, track_order=False): create_group = True if create_group: - sub_group = self._make_group(parent_id=parent_uuid, parent_name=parent_name, link=link) - sub_group._track_order = track_order + kwargs = {} + kwargs["parent_id"] = parent_uuid + kwargs["parent_name"] = parent_name + kwargs["link"] = link + kwargs["track_order"] = track_order + sub_group = self._make_group(**kwargs) parent_uuid = sub_group.id.id else: @@ -514,7 +538,7 @@ def require_group(self, name): raise TypeError(f"Incompatible object ({grp.__class__.__name__}) already exists") return grp - def getObjByUuid(self, uuid, collection_type=None, track_order=False): + def getObjByUuid(self, uuid, collection_type=None, track_order=None): """ Utility method to get an obj based on collection type and uuid """ self.log.debug(f"getObjByUuid({uuid})") obj_json = None @@ -549,7 +573,10 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=False): # will need to get JSON from server req = f"/{collection_type}/{uuid}" # make server request - obj_json = self.GET(req, params={"CreateOrder": "1" if track_order else "0"}) + params = {} + if track_order is not None: + params["CreateOrder"] = "1" if track_order else "0" + obj_json = self.GET(req, params=params) if collection_type == 'groups': tgt = Group(GroupID(self, obj_json), track_order=track_order) @@ -568,12 +595,12 @@ def getObjByUuid(self, uuid, collection_type=None, track_order=False): return tgt - def __getitem__(self, name, track_order=False): + def __getitem__(self, name, track_order=None): """ Open an object in the file """ # convert bytes to str for PY3 if isinstance(name, bytes): name = name.decode('utf-8') - self.log.debug(f"group.__getitem__({name})") + self.log.debug(f"group.__getitem__({name}, track_order={track_order})") tgt = None if isinstance(name, h5type.Reference): @@ -655,7 +682,7 @@ def _objectify_link_Json(self, link_json): return link_obj - def get(self, name, default=None, getclass=False, getlink=False, track_order=False, **kwds): + def get(self, name, default=None, getclass=False, getlink=False, track_order=None, **kwds): """ Retrieve an item or other information. "name" given only: @@ -696,7 +723,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal """ if not (getclass or getlink): try: - return self.__getitem__(name, track_order) + return self.__getitem__(name, track_order=track_order) except KeyError: return default @@ -739,8 +766,8 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal params["pattern"] = pattern if follow_links: params["follow_links"] = 1 - if track_order: - params["CreateOrder"] = 1 + if track_order is not None: + params["CreateOrder"] = "1" if track_order else "0" if name: body = {} @@ -848,7 +875,10 @@ def __setitem__(self, name, obj): raise IOError("cannot create subgroup of softlink") parent_uuid = link_json["id"] req = "/groups/" + parent_uuid - group_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + group_json = self.GET(req, params=params) tgt = Group(GroupID(self, group_json)) tgt[basename] = obj @@ -946,7 +976,10 @@ def __len__(self): return len(links_json) req = "/groups/" + self.id.uuid - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + rsp_json = self.GET(req, params=params) return rsp_json['linkCount'] def __iter__(self): @@ -955,7 +988,10 @@ def __iter__(self): if links is None: req = "/groups/" + self.id.uuid + "/links" - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + rsp_json = self.GET(req, params=params) links = rsp_json['links'] # reset the link cache @@ -1180,7 +1216,10 @@ def visititems(self, func): else: # request from server req = "/groups/" + parent.id.uuid + "/links" - rsp_json = self.GET(req, params={"CreateOrder": "1" if self._track_order else "0"}) + params = {} + if self._track_order is not None: + params["CreateOrder"] = "1" if self._track_order else "0" + rsp_json = self.GET(req, params=params) links = rsp_json['links'] for link in links: obj = None diff --git a/h5pyd/_hl/httpconn.py b/h5pyd/_hl/httpconn.py index 84e21e0..8d55d6d 100644 --- a/h5pyd/_hl/httpconn.py +++ b/h5pyd/_hl/httpconn.py @@ -25,7 +25,7 @@ import logging from . import openid -from .config import Config +from .. import config from . import requests_lambda MAX_CACHE_ITEM_SIZE = 10000 # max size of an item to put in the cache @@ -73,7 +73,7 @@ def getAzureApiKey(): api_key = None # if Azure AD ids are set, pass them to HttpConn via api_key dict - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). ad_app_id = None # Azure AD HSDS Server id if "HS_AD_APP_ID" in os.environ: @@ -114,7 +114,7 @@ def getAzureApiKey(): def getKeycloakApiKey(): # check for keycloak next - cfg = Config() # pulls in state from a .hscfg file (if found). + cfg = config.get_config() # pulls in state from a .hscfg file (if found). api_key = None # check to see if we are configured for keycloak authentication if "HS_KEYCLOAK_URI" in os.environ: @@ -293,7 +293,6 @@ def __init__( if isinstance(api_key, dict): # Maintain Azure-defualt backwards compatibility, but allow # both environment variable and kwarg override. - # provider = Config().get('hs_openid_provider', 'azure') provider = api_key.get("openid_provider", "azure") if provider == "azure": self.log.debug("creating OpenIDHandler for Azure") @@ -327,6 +326,7 @@ def __del__(self): self._s = None def getHeaders(self, username=None, password=None, headers=None): + if headers is None: headers = {} elif "Authorization" in headers: diff --git a/h5pyd/_hl/objectid.py b/h5pyd/_hl/objectid.py index b16b6ca..173f6d5 100644 --- a/h5pyd/_hl/objectid.py +++ b/h5pyd/_hl/objectid.py @@ -168,6 +168,14 @@ def get_type(self): dtype = createDataType(type_json) return dtype + @property + def tcpl_json(self): + if 'creationProperties' in self._obj_json: + tcpl = self._obj_json['creationProperties'] + else: + tcpl = {} + return tcpl + def __init__(self, parent, item, **kwds): """Create a new TypeID. """ @@ -255,3 +263,11 @@ def __init__(self, parent, item, http_conn=None, **kwds): if self.collection_type != "groups": raise IOError(f"Unexpected collection_type: {self._collection_type}") + + @property + def gcpl_json(self): + if 'creationProperties' in self._obj_json: + gcpl = self._obj_json['creationProperties'] + else: + gcpl = {} + return gcpl diff --git a/h5pyd/_hl/openid.py b/h5pyd/_hl/openid.py index aae0a08..e0eb0f0 100644 --- a/h5pyd/_hl/openid.py +++ b/h5pyd/_hl/openid.py @@ -28,7 +28,8 @@ def eprint(*args, **kwargs): pass # change this to the eprint below to see the import error # eprint("Unable to import google auth packages") -from .config import Config + +from .. import config as hsconfig class OpenIDHandler(ABC): @@ -136,7 +137,7 @@ def __init__(self, endpoint, config=None): """Store configuration.""" # Configuration manager - hs_config = Config() + hs_config = hsconfig.get_config() # Config is a dictionary. if isinstance(config, dict): @@ -256,7 +257,7 @@ def __init__(self, endpoint, config=None, scopes=None): raise ModuleNotFoundError(msg) # Configuration manager - hs_config = Config() + hs_config = hsconfig.get_config() if scopes is None: scopes = hs_config.get('hs_google_scopes', 'openid').split() @@ -345,7 +346,7 @@ def __init__(self, endpoint, config=None, scopes=None, username=None, password=N """Store configuration.""" # Configuration manager - hs_config = Config() + hs_config = hsconfig.get_config() if scopes is None: scopes = hs_config.get('hs_keycloak_scopes', 'openid').split() diff --git a/h5pyd/_hl/serverinfo.py b/h5pyd/_hl/serverinfo.py index ff9f602..10203cb 100644 --- a/h5pyd/_hl/serverinfo.py +++ b/h5pyd/_hl/serverinfo.py @@ -14,12 +14,12 @@ import time from .httpconn import HttpConn -from .config import Config +from .. import config def getServerInfo(endpoint=None, username=None, password=None, api_key=None, **kwds): - cfg = Config() # get credentials from .hscfg file (if found) + cfg = config.get_config() # get credentials from .hscfg file (if found) if endpoint is None and "hs_endpoint" in cfg: endpoint = cfg["hs_endpoint"] @@ -40,7 +40,7 @@ def getServerInfo(endpoint=None, username=None, password=None, api_key=None, **k # need some special logic for the first request in local mode # to give the sockets time to initialize - if endpoint.startswith("local"): + if endpoint and endpoint.startswith("local"): connect_backoff = [0.5, 1, 2, 4, 8, 16] else: connect_backoff = [] diff --git a/h5pyd/config.py b/h5pyd/config.py index 504dfcb..57b6676 100755 --- a/h5pyd/config.py +++ b/h5pyd/config.py @@ -17,8 +17,11 @@ class Config: """ User Config state """ + _cfg = {} # global state + def __init__(self, config_file=None, **kwargs): - self._cfg = {} + if Config._cfg: + return # already initialized if config_file: self._config_file = config_file elif os.path.isfile(".hscfg"): @@ -39,51 +42,164 @@ def __init__(self, config_file=None, **kwargs): continue fields = s.split('=') if len(fields) < 2: - print("config file: {} line: {} is not valid".format(self._config_file, line_number)) + print(f"config file: {self._config_file} line: {line_number} is not valid") continue k = fields[0].strip() v = fields[1].strip() - self._cfg[k] = v + if k == "complex_names": + self.complex_names = v + elif k == "bool_names": + self.bool_names = v + elif k == "track_order": + self.track_order = v + else: + Config._cfg[k] = v + + # add standard keys if not already picked up + for k in ("hs_endpoint", "hs_username", "hs_password", "hs_api_key"): + if k not in Config._cfg: + Config._cfg[k] = "" + # override any config values with environment variable if found - for k in self._cfg.keys(): + for k in Config._cfg.keys(): if k.upper() in os.environ: - self._cfg[k] = os.environ[k.upper()] + Config._cfg[k] = os.environ[k.upper()] - # finally update any values that are passed in to the constructor + # update any values that are passed in to the constructor for k in kwargs.keys(): - self._cfg[k] = kwargs[k] + Config._cfg[k] = kwargs[k] + + # finally, set defaults for any expected keys that are not already set + for k in ("hs_endpoint", "hs_username", "hs_endpoint"): + if k not in Config._cfg: + Config._cfg[k] = None + if "bool_names" not in Config._cfg: + Config._cfg["bool_names"] = (b"FALSE", b"TRUE") + if "complex_names" not in Config._cfg: + Config._cfg["complex_names"] = ("r", "i") + if "track_order" not in Config._cfg: + Config._cfg["track_order"] = False def __getitem__(self, name): """ Get a config item """ - if name not in self._cfg: + if name not in Config._cfg: if name.upper() in os.environ: - self._cfg[name] = os.environ[name.upper()] + Config._cfg[name] = os.environ[name.upper()] else: return None - return self._cfg[name] + return Config._cfg[name] def __setitem__(self, name, obj): """ set config item """ - self._cfg[name] = obj + Config._cfg[name] = obj def __delitem__(self, name): """ Delete option. """ - del self._cfg[name] + del Config._cfg[name] def __len__(self): - return len(self._cfg) + return len(Config._cfg) def __iter__(self): """ Iterate over config names """ - keys = self._cfg.keys() + keys = Config._cfg.keys() for key in keys: yield key def __contains__(self, name): - return name in self._cfg + return name in Config._cfg def __repr__(self): - return json.dumps(self._cfg) + return json.dumps(Config._cfg) def keys(self): - return self._cfg.keys() + return Config._cfg.keys() + + @property + def hs_endpoint(self): + return Config._cfg.get("hs_endpoint") + + @property + def hs_username(self): + return Config._cfg.get("hs_username") + + @property + def hs_password(self): + return Config._cfg.get("hs_password") + + @property + def hs_api_key(self): + return Config._cfg.get("hs_api_key") + + @property + def bool_names(self): + if "bool_names" in Config._cfg: + names = Config._cfg["bool_names"] + else: + names = (b"FALSE", b"TRUE") + return names + + @bool_names.setter + def bool_names(self, value): + if isinstance(value, str): + names = value.split(()) + if len(names) < 2: + raise ValueError("bool_names must have two items") + elif len(names) == 2: + pass + else: + names = names[:2] # just use the first two items + elif len(value) != 2: + raise ValueError("expected two-element list for bool_names") + else: + names = value + Config._cfg["bool_names"] = tuple(names) + + @property + def complex_names(self): + if "complex_names" in Config._cfg: + names = Config._cfg["complex_names"] + else: + names = ("r", "i") + return names + + @complex_names.setter + def complex_names(self, value): + if isinstance(value, str): + names = value.split() + if len(names) < 2: + raise ValueError("complex_names must have two items") + elif len(names) == 2: + pass + else: + names = names[:2] # just use the first two items + elif len(value) != 2: + raise ValueError("complex_names must have two values") + else: + names = value + + Config._cfg["complex_names"] = tuple(names) + + @property + def track_order(self): + if "track_order" in Config._cfg: + track = Config._cfg["track_order"] + else: + track = False + return track + + @track_order.setter + def track_order(self, value): + if isinstance(value, str): + tokens = value.split() + if len(tokens) == 0: + track = False + else: + track = bool(tokens[0]) # strip any comments + else: + track = bool(value) + Config._cfg["track_order"] = track + + +def get_config(config_file=None, **kwargs): + return Config(config_file=config_file, **kwargs) diff --git a/h5pyd/version.py b/h5pyd/version.py index f1051e9..db13f9e 100644 --- a/h5pyd/version.py +++ b/h5pyd/version.py @@ -16,7 +16,7 @@ import sys import numpy -version = "0.19.0" +version = "0.20.0" hdf5_version = "REST" @@ -28,8 +28,8 @@ else ("",) ) -api_version_tuple = (0, 19, 0) -api_version = "0.19.0" +api_version_tuple = (0, 20, 0) +api_version = "0.20.0" __doc__ = f"""\ This is h5pyd **{version}** diff --git a/pyproject.toml b/pyproject.toml index fe230ab..28239dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", ] requires-python = ">=3.8" -version = "0.19.0" +version = "0.20.0" dependencies = [ "numpy >=2.0.0rc1; python_version>='3.9'", diff --git a/test/hl/test_attribute.py b/test/hl/test_attribute.py index 21d2290..408ed99 100644 --- a/test/hl/test_attribute.py +++ b/test/hl/test_attribute.py @@ -292,49 +292,68 @@ def test_delete_multiple(self): class TestTrackOrder(TestCase): - def fill_attrs(self, track_order): - attrs = self.f.create_group('test', track_order=track_order).attrs - for i in range(100): - attrs[str(i)] = i - return attrs + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") + + def fill_attrs(self, obj): + count = len(self.titles) + attrs = obj.attrs + for i in range(count): + title = self.titles[i] + val = i + 1 + attrs[title] = val - # https://forum.hdfgroup.org/t/bug-h5arename-fails-unexpectedly/4881 def test_track_order(self): filename = self.getFileName("test_test_track_order_attribute") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - attrs = self.fill_attrs(track_order=True) # creation order - self.assertEqual(list(attrs), - [str(i) for i in range(100)]) + with h5py.File(filename, 'w') as f: + g1 = f.create_group('test', track_order=True) + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), list(self.titles)) + # group should return track order + with h5py.File(filename) as f: + g1 = f['test'] + self.assertEqual(list(g1.attrs), list(self.titles)) + + def test_track_order_cfg(self): + filename = self.getFileName("test_test_track_order_attribute") + print(f"filename: {filename}") + cfg = h5py.get_config() + with h5py.File(filename, 'w') as f: + cfg.track_order = True + g1 = f.create_group('test') + cfg.track_order = False # reset + + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), list(self.titles)) + + with h5py.File(filename) as f: + g1 = f['test'] + self.assertEqual(list(g1.attrs), list(self.titles)) def test_no_track_order(self): filename = self.getFileName("test_test_no_track_order_attribute") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - attrs = self.fill_attrs(track_order=False) # name alphanumeric - self.assertEqual(list(attrs), - sorted([str(i) for i in range(100)])) - - def fill_attrs2(self, track_order): - group = self.f.create_group('test', track_order=track_order) - for i in range(12): - group.attrs[str(i)] = i - return group + f = h5py.File(filename, 'w') + g1 = f.create_group('test') # name alphanumeric + self.fill_attrs(g1) + self.assertEqual(list(g1.attrs), sorted(list(self.titles))) def test_track_order_overwrite_delete(self): filename = self.getFileName("test_test_track_order_overwrite_delete") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - # issue h5py#1385 - group = self.fill_attrs2(track_order=True) # creation order - self.assertEqual(group.attrs["11"], 11) + f = h5py.File(filename, 'w') + + g1 = f.create_group("g1", track_order=True) # creation order + self.fill_attrs(g1) + title = 'three' + self.assertEqual(g1.attrs[title], 3) # overwrite attribute - group.attrs['11'] = 42.0 - self.assertEqual(group.attrs["11"], 42.0) + g1.attrs[title] = 42.0 + self.assertEqual(g1.attrs[title], 42.0) # delete attribute - self.assertIn('10', group.attrs) - del group.attrs['10'] - self.assertNotIn('10', group.attrs) + self.assertIn(title, g1.attrs) + del g1.attrs[title] + self.assertNotIn(title, g1.attrs) def test_track_order_not_inherited(self): """ @@ -343,13 +362,11 @@ def test_track_order_not_inherited(self): """ filename = self.getFileName("test_test_track_order_not_inherited") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w', track_order=True) - group = self.f.create_group('test') - - for i in range(12): - group.attrs[str(i)] = i + f = h5py.File(filename, 'w', track_order=True) + g1 = f.create_group('test') + self.fill_attrs(g1) - self.assertEqual(list(group.attrs), sorted([str(i) for i in range(12)])) + self.assertEqual(list(g1.attrs), sorted(list(self.titles))) if __name__ == '__main__': diff --git a/test/hl/test_config.py b/test/hl/test_config.py new file mode 100644 index 0000000..1112e70 --- /dev/null +++ b/test/hl/test_config.py @@ -0,0 +1,57 @@ +############################################################################## +# Copyright by The HDF Group. # +# All rights reserved. # +# # +# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and # +# Utilities. The full HDF5 REST Server copyright notice, including # +# terms governing use, modification, and redistribution, is contained in # +# the file COPYING, which can be found at the root of the source code # +# distribution tree. If you do not have access to this file, you may # +# request a copy from help@hdfgroup.org. # +############################################################################## + +import numpy as np +import logging +import config + + +if config.get("use_h5py"): + import h5py +else: + import h5pyd as h5py + +from common import ut, TestCase + + +class TestConfig(TestCase): + + def test_config_h5py(self): + cfg = h5py.get_config() + + self.assertEqual(cfg.bool_names, (b"FALSE", b"TRUE")) + self.assertEqual(cfg.complex_names, ("r", "i")) + self.assertEqual(cfg.track_order, False) + + cfg.bool_names = ("nope", "yep") + cfg.complex_names = ("real", "imag") + cfg.track_order = True + + cfg2 = h5py.get_config() + self.assertEqual(cfg2.bool_names, ("nope", "yep")) + self.assertEqual(cfg2.complex_names, ("real", "imag")) + self.assertEqual(cfg2.track_order, True) + + def test_config_hs(self): + if config.get("use_h5py"): + return # test with h5pyd only + cfg = h5py.get_config() + self.assertTrue(cfg.hs_endpoint.startswith("http")) + cfg["XYZ"] = 42 + cfg2 = h5py.get_config() + self.assertEqual(cfg2["XYZ"], 42) + + +if __name__ == '__main__': + loglevel = logging.ERROR + logging.basicConfig(format='%(asctime)s %(message)s', level=loglevel) + ut.main() diff --git a/test/hl/test_file.py b/test/hl/test_file.py index 34c6826..a8f6760 100644 --- a/test/hl/test_file.py +++ b/test/hl/test_file.py @@ -350,29 +350,65 @@ def test_close(self): class TestTrackOrder(TestCase): + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") + def populate(self, f): - for i in range(100): + count = len(self.titles) + # create count datasets/groups + for i in range(count): + title = self.titles[i] # Mix group and dataset creation. - if i % 10 == 0: - f.create_group(str(i)) + if i % 2 == 0: + f.create_group(title) else: - f[str(i)] = [i] + f[title] = [i] + # create count attributes + for i in range(count): + title = self.titles[i] + f.attrs[title] = i def test_track_order(self): filename = self.getFileName("test_track_order_file") print(f"filename: {filename}") - f = h5py.File(filename, 'w', track_order=True) # creation order - self.populate(f) - self.assertEqual(list(f), - [str(i) for i in range(100)]) + # write file using creation order + with h5py.File(filename, 'w', track_order=True) as f: + self.populate(f) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + + with h5py.File(filename) as f: + # domain/file should have been saved with track_order state + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + + def test_cfg_track_order(self): + filename = self.getFileName("test_cfg_track_order_file") + print(f"filename: {filename}") + # write file using creation order + cfg = h5py.get_config() + cfg.track_order = True + with h5py.File(filename, 'w') as f: + self.populate(f) + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) + cfg.track_order = False # reset + + with h5py.File(filename) as f: + # domain/file should have been saved with track_order state + self.assertEqual(list(f), list(self.titles)) + self.assertEqual(list(f.attrs), list(self.titles)) def test_no_track_order(self): filename = self.getFileName("test_no_track_order_file") print(f"filename: {filename}") - f = h5py.File(filename, 'w', track_order=False) # name alphanumeric - self.populate(f) - self.assertEqual(list(f), - sorted([str(i) for i in range(100)])) + + # create file using alphanumeric order + with h5py.File(filename, 'w', track_order=False) as f: + self.populate(f) + self.assertEqual(list(f), sorted(self.titles)) + + with h5py.File(filename) as f: # name alphanumeric + self.assertEqual(list(f), sorted(self.titles)) if __name__ == '__main__': diff --git a/test/hl/test_group.py b/test/hl/test_group.py index b4db405..89d391d 100644 --- a/test/hl/test_group.py +++ b/test/hl/test_group.py @@ -313,7 +313,7 @@ def test_link_multi_removal(self): if config.get("use_h5py"): return filename = self.getFileName("test_link_multi_removal") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -358,7 +358,7 @@ def test_link_multi_create(self): return filename = self.getFileName("test_link_multi_create") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -438,7 +438,7 @@ def test_link_get_multi(self): return filename = self.getFileName("test_link_get_multi") - print(filename) + print(f"filename: {filename}") f = h5py.File(filename, 'w') g1 = f.create_group("g1") @@ -550,40 +550,87 @@ def test_link_get_multi(self): class TestTrackOrder(TestCase): + titles = ("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten") def populate(self, g): - for i in range(100): + count = len(self.titles) + for i in range(count): # Mix group and dataset creation. - if i % 10 == 0: - g.create_group(str(i)) + if i % 2 == 0: + g.create_group(self.titles[i]) else: - g[str(i)] = [i] + g[self.titles[i]] = [i] def populate_attrs(self, d): - for i in range(100): - d.attrs[str(i)] = i + count = len(self.titles) + for i in range(count): + d.attrs[self.titles[i]] = i def test_track_order(self): filename = self.getFileName("test_track_order_group") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order', track_order=True) # creation order - self.populate(g) - - ref = [str(i) for i in range(100)] - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + with h5py.File(filename, 'w') as f: + g = f.create_group('order', track_order=True) # creation order + self.populate(g) + + ref = self.titles + self.assertEqual(tuple(g), ref) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 + + # re-opening the file should retain the track_order setting + with h5py.File(filename) as f: + g = f['order'] + self.assertEqual(len(g), len(self.titles)) + self.assertEqual(tuple(g), self.titles) + self.assertEqual(tuple(reversed(g)), tuple(reversed(self.titles))) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 + + def test_track_order_cfg(self): + filename = self.getFileName("test_track_order_cfg_group") + print(f"filename: {filename}") + cfg = h5py.get_config() + with h5py.File(filename, 'w') as f: + cfg.track_order = True # creation order + g = f.create_group('order') + cfg.track_order = False # reset + self.populate(g) + self.assertEqual(tuple(g), self.titles) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 + + # re-opening the file should retain the track_order setting + with h5py.File(filename) as f: + g = f['order'] + self.assertEqual(len(g), len(self.titles)) + self.assertEqual(tuple(g), self.titles) + i = 0 + for title in g: + self.assertEqual(title, self.titles[i]) + i += 1 def test_no_track_order(self): filename = self.getFileName("test_no_track_order_group") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order', track_order=False) # name alphanumeric - self.populate(g) - - ref = sorted([str(i) for i in range(100)]) - self.assertEqual(list(g), ref) - self.assertEqual(list(reversed(g)), list(reversed(ref))) + with h5py.File(filename, 'w') as f: + g = f.create_group('order', track_order=False) # name alphanumeric + self.populate(g) + ref = sorted(self.titles) + self.assertEqual(list(g), ref) + self.assertEqual(list(reversed(g)), list(reversed(ref))) + + with h5py.File(filename) as f: + g = f['order'] # name alphanumeric + ref = sorted(self.titles) + self.assertEqual(list(g), ref) + self.assertEqual(list(reversed(g)), list(reversed(ref))) def test_get_dataset_track_order(self): @@ -593,28 +640,24 @@ def test_get_dataset_track_order(self): filename = self.getFileName("test_get_dataset_track_order") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order') + with h5py.File(filename, 'w') as f: + g = f.create_group('order') - dset = g.create_dataset('dset', (10,), dtype='i4') - dset2 = g.create_dataset('dset2', (10,), dtype='i4') + dset = g.create_dataset('dset', (10,), dtype='i4') + dset2 = g.create_dataset('dset2', (10,), dtype='i4') - self.populate_attrs(dset) - self.populate_attrs(dset2) + self.populate_attrs(dset) + self.populate_attrs(dset2) - self.f.close() - self.f = h5py.File(filename, 'r') - g = self.f['order'] + with h5py.File(filename) as f: + g = f['order'] - d = g.get('dset', track_order=True) - ref = [str(i) for i in range(100)] - self.assertEqual(list(d.attrs), ref) - self.assertEqual(list(reversed(d.attrs)), list(reversed(ref))) + d = g.get('dset', track_order=True) + self.assertEqual(list(d.attrs), list(self.titles)) - d2 = g.get('dset2', track_order=False) - ref = sorted([str(i) for i in range(100)]) - self.assertEqual(list(d2.attrs), ref) - self.assertEqual(list(reversed(d2.attrs)), list(reversed(ref))) + d2 = g.get('dset2', track_order=False) + ref = sorted(self.titles) + self.assertEqual(list(d2.attrs), ref) def test_get_group_track_order(self): # h5py does not support track_order on group.get() @@ -622,29 +665,21 @@ def test_get_group_track_order(self): return filename = self.getFileName("test_get_group_track_order") print(f"filename: {filename}") - self.f = h5py.File(filename, 'w') - g = self.f.create_group('order') - - # create subgroup and populate it with links - g.create_group('subgroup') - self.populate(g['subgroup']) - - self.f.close() - self.f = h5py.File(filename, 'r') - g = self.f['order'] - - subg = g.get('subgroup', track_order=True) - ref = [str(i) for i in range(100)] - self.assertEqual(list(subg), ref) - self.assertEqual(list(reversed(subg)), list(reversed(ref))) - - self.f.close() - self.f = h5py.File(filename, 'r') - g = self.f['order'] - subg2 = g.get('subgroup', track_order=False) - ref = sorted([str(i) for i in range(100)]) - self.assertEqual(list(subg2), ref) - self.assertEqual(list(reversed(subg2)), list(reversed(ref))) + with h5py.File(filename, 'w') as f: + g = f.create_group('order') + # create subgroup and populate it with links + g.create_group('subgroup') + self.populate(g['subgroup']) + + with h5py.File(filename) as f: + g = f['order'] + subg = g.get('subgroup', track_order=True) + self.assertEqual(tuple(subg), self.titles) + + with h5py.File(filename) as f: + g = f['order'] + subg2 = g.get('subgroup', track_order=False) + self.assertEqual(list(subg2), sorted(self.titles)) if __name__ == '__main__': diff --git a/testall.py b/testall.py index 10efbf1..860cafc 100755 --- a/testall.py +++ b/testall.py @@ -16,6 +16,7 @@ hl_tests = ('test_attribute', + 'test_config', 'test_committedtype', 'test_complex_numbers', 'test_dataset',