Skip to content

Commit

Permalink
Replace and Update Datetime Parser (#525)
Browse files Browse the repository at this point in the history
* add timestamp2datetime function

* Replace datenum8601 with internal datetime parser

* Revert error id change for correctType function

* Explicitly allow datetime cell arrays

datetimes cannot be concatenated if their time zone values are different
for some reason.

* fix datetime logic error

* Fix time zone formatting

* Fix correctType behavior with cell datetimes

* Refactor and fix tests

- Slightly shorten some test data.
- Fix datetime comparison precision.

* Broaden datetime detection from string

Includes MATLAB default common times and moves formatting into
correctType instead.

* FIx missing format call when reading iso8601

* Cover all datetime parse failure error ids

* Loosen datetime comparisons in python testing

* Fix logic error when comparing matlab datetimes

* Fix Datastub loading char matrices

New MATLAB versions might return an array of strings from h5load().
This was not handled correctly beforehand and accidentally created char
matrices. These now convert string arrays properly to cell arrays of
character vectors.

---------

Co-authored-by: Lawrence Niu <[email protected]>
  • Loading branch information
lawrence-mbf and lawrence-mbf authored Jul 27, 2023
1 parent 601ee60 commit b7583a3
Show file tree
Hide file tree
Showing 9 changed files with 334 additions and 550 deletions.
9 changes: 6 additions & 3 deletions +io/mapData2H5.m
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
forceChunked = any(strcmp('forceChunking', varargin));

if iscell(data)
assert(iscellstr(data), 'NWB:MapData:NonCellStr', ['Cell arrays must be ' ...
'cell arrays of character vectors. Cell arrays of other types are ' ...
'not supported.']);
assert(...
iscellstr(data) ...
|| all(cellfun('isclass', data, 'datetime')) ...
|| all(cellfun('isclass', data, 'string')) ...
, 'NWB:MapData:NonCellStr', ['Cell arrays must be cell arrays of character vectors. ' ...
'Cell arrays of other types are not supported.']);
end
tid = io.getBaseType(class(data));

Expand Down
123 changes: 123 additions & 0 deletions +io/timestamp2datetime.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
function Datetimes = timestamp2datetime(timestamps)
%TIMESTAMP2DATETIME converts string timestamps to MATLAB datetime object

timestamps = timestamp2cellstr(timestamps);
for iTimestamp = 1:length(timestamps)
timestampString = timestamps{iTimestamp};
try
Datetime = datetime(timestampString);
catch ME
unrecognizedStringId = { ...
'MATLAB:datetime:UnrecognizedDateStringSuggestLocale' ...
, 'MATLAB:datetime:UnrecognizedDateStringsSuggestLocale' ...
}; % missing plural strings lmao
if any(strcmp(ME.identifier, unrecognizedStringId))
Datetime = detectDatetime(timestampString);
else
rethrow(ME);
end
end
Datetimes(iTimestamp) = Datetime;
end
end

function Datetime = detectDatetime(timestamp)
errorId = 'NWB:InvalidTimestamp';
errorTemplate = sprintf('Timestamp `%s` is not a valid ISO8601 subset for NWB:\n %%s', timestamp);
Datetime = datetime(0, 0, 0, 0, 0, 0, 0);
%% YMoD
hmsStart = find(timestamp == 'T', 1);
if isempty(hmsStart)
ymdStamp = timestamp;
else
ymdStamp = extractBefore(timestamp, hmsStart);
end
errorMessage = sprintf(errorTemplate, 'YMD should be in the form YYYY-mm-dd or YYYYmmdd');
if contains(ymdStamp, '-')
assert(length(ymdStamp) == 10, errorId, errorMessage);
YmdToken = struct(...
'Year', ymdStamp(1:4) ...
, 'Month', ymdStamp(6:7) ...
, 'Day', ymdStamp(9:10) ...
);
else
assert(length(ymdStamp) == 8, errorId, errorMessage);
YmdToken = struct(...
'Year', ymdStamp(1:4) ...
, 'Month', ymdStamp(5:6) ...
, 'Day', ymdStamp(7:8) ...
);
end
Datetime.Year = str2double(YmdToken.Year);
Datetime.Month = str2double(YmdToken.Month);
Datetime.Day = str2double(YmdToken.Day);
assert(~isnat(Datetime), errorId, sprintf(errorTemplate, 'non-numeric YMD values detected'));

%% HMiS TZ
if isempty(hmsStart)
return;
end
afterDateStamp = extractAfter(timestamp, 'T'); % needs to do this so we don't have wrong '-' checks.
timeZoneStart = find(afterDateStamp == 'Z' | afterDateStamp == '+' | afterDateStamp == '-', 1);
if isempty(timeZoneStart)
hmsStamp = afterDateStamp;
else
hmsStamp = extractBefore(afterDateStamp, timeZoneStart);
end
errorMessage = sprintf(errorTemplate ...
, 'H:m:s should be in the form HH:mm:ss.ssssss or HHmmss.ssssss');
if contains(hmsStamp, ':')
% note, seconds must be at least 2 digits
assert(length(hmsStamp) >= 8, errorId, errorMessage);
HmsToken = struct(...
'Hour', hmsStamp(1:2) ...
, 'Minute', hmsStamp(4:5) ...
, 'Second', hmsStamp(7:end) ...
);
else
assert(length(hmsStamp) >= 6, errorId, errorMessage);
HmsToken = struct(...
'Hour', hmsStamp(1:2) ...
, 'Minute', hmsStamp(3:4) ...
, 'Second', hmsStamp(5:end) ...
);
end
Datetime.Hour = str2double(HmsToken.Hour);
Datetime.Minute = str2double(HmsToken.Minute);
Datetime.Second = str2double(HmsToken.Second);
assert(~isnat(Datetime), errorId, sprintf(errorTemplate, 'non-numeric H:m:s values detected'));

%% TimeZone
if isempty(timeZoneStart)
return;
end
timeZoneStamp = afterDateStamp(timeZoneStart:end);
try
Datetime.TimeZone = timeZoneStamp;
catch ME
Cause = MException(errorId ...
, sprintf(errorTemplate, sprintf('invalid time zone `%s` provided', timeZoneStamp)));
addCause(ME, Cause);
throwAsCaller(ME);
end
end

function cells = timestamp2cellstr(timestamps)
if isstring(timestamps)
cells = num2cell(timestamps);
for iString = 1:length(cells)
cells{iString} = char(cells{iString});
end
elseif iscell(timestamps)
cells = cell(size(timestamps));
for iElement = 1:length(timestamps)
cells(iElement) = timestamp2cellstr(timestamps{iElement});
end
elseif ischar(timestamps)
cells = {timestamps};
else
error(['timestamps must be a ' ...
, 'string, character array, or cell array of strings/character arrays.']);
end
end

67 changes: 34 additions & 33 deletions +tests/+system/PyNWBIOTest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import unittest
from datetime import datetime
from datetime import datetime, timedelta
import os.path
import numpy as np
from dateutil.tz import tzlocal, tzoffset
Expand Down Expand Up @@ -64,47 +64,48 @@ def assertContainerEqual(self, container1, container2): # noqa: C901
container_fields = container1.__fields__
for nwbfield in container_fields:
with self.subTest(nwbfield=nwbfield, container_type=type1.__name__):
f1 = getattr(container1, nwbfield)
f2 = getattr(container2, nwbfield)
if isinstance(f1, (tuple, list, np.ndarray)):
if len(f1) > 0:
if isinstance(f1[0], Container):
for sub1, sub2 in zip(f1, f2):
field1 = getattr(container1, nwbfield)
field2 = getattr(container2, nwbfield)
if isinstance(field1, (tuple, list, np.ndarray)):
if len(field1) > 0:
if isinstance(field1[0], Container):
for sub1, sub2 in zip(field1, field2):
self.assertContainerEqual(sub1, sub2)
elif isinstance(f1[0], Data):
for sub1, sub2 in zip(f1, f2):
elif isinstance(field1[0], Data):
for sub1, sub2 in zip(field1, field2):
self.assertDataEqual(sub1, sub2)
continue
else:
self.assertEqual(len(f1), len(f2))
if len(f1) == 0:
self.assertEqual(len(field1), len(field2))
if len(field1) == 0:
continue
if isinstance(f1[0], float):
for v1, v2 in zip(f1, f2):
if isinstance(field1[0], float):
for v1, v2 in zip(field1, field2):
self.assertAlmostEqual(v1, v2, places=6)
else:
self.assertTrue(np.array_equal(f1, f2))
elif isinstance(f1, dict) and len(f1) and isinstance(next(iter(f1.values())), Container):
f1_keys = set(f1.keys())
f2_keys = set(f2.keys())
self.assertSetEqual(f1_keys, f2_keys)
for k in f1_keys:
self.assertTrue(np.array_equal(field1, field2))
elif isinstance(field1, dict) and len(field1) and isinstance(next(iter(field1.values())), Container):
field1_keys = set(field1.keys())
field2_keys = set(field2.keys())
self.assertSetEqual(field1_keys, field2_keys)
for k in field1_keys:
with self.subTest(module_name=k):
self.assertContainerEqual(f1[k], f2[k])
elif isinstance(f1, Container) or isinstance(f1, Container):
self.assertContainerEqual(f1, f2)
elif isinstance(f1, Data) or isinstance(f2, Data):
if isinstance(f1, Data) and isinstance(f2, Data):
self.assertDataEqual(f1, f2)
elif isinstance(f1, Data):
self.assertTrue(np.array_equal(f1.data, f2))
elif isinstance(f2, Data):
self.assertTrue(np.array_equal(f1.data, f2))
self.assertContainerEqual(field1[k], field2[k])
elif isinstance(field1, Container) or isinstance(field1, Container):
self.assertContainerEqual(field1, field2)
elif isinstance(field1, Data) and isinstance(field2, Data):
self.assertDataEqual(field1, field2)
elif isinstance(field1, Data) or isinstance(field2, Data):
self.assertTrue(np.array_equal(field1.data, field2))
elif isinstance(field1, (float, np.float32, np.float16, h5py.Dataset)):
npt.assert_almost_equal(field1, field2)
elif isinstance(field1, datetime):
self.assertTrue(isinstance(field2, datetime))
field1_upper = field1 + timedelta(milliseconds = 1)
field1_lower = field1 - timedelta(milliseconds = 1)
self.assertTrue(field2 >= field1_lower and field2 <= field1_upper)
else:
if isinstance(f1, (float, np.float32, np.float16, h5py.Dataset)):
npt.assert_almost_equal(f1, f2)
else:
self.assertEqual(f1, f2)
self.assertEqual(field1, field2)

def assertDataEqual(self, data1, data2):
self.assertEqual(type(data1), type(data2))
Expand Down
6 changes: 2 additions & 4 deletions +tests/+unit/dataStubTest.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ function setup(testCase)
end

function testRegionRead(testCase)
date = datetime(2018, 3, 1, 12, 0, 0);
session_start_time = datetime(date,'Format','yyyy-MM-dd''T''HH:mm:SSZZ',...
'TimeZone','local');

nwb = NwbFile(...
'session_description', 'a test NWB File', ...
'identifier', 'mouse004_day4', ...
'session_start_time', session_start_time);
'session_start_time', datetime(2018, 3, 1, 12, 0, 0, 'TimeZone', 'local'));

data = reshape(1:5000, 25, 5, 4, 2, 5);

Expand Down
74 changes: 38 additions & 36 deletions +tests/+util/verifyContainerEqual.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,54 +11,56 @@ function verifyContainerEqual(testCase, actual, expected, ignoreList)
for i = 1:numel(props)
prop = props{i};

actualVal = actual.(prop);
expectedVal = expected.(prop);
failmsg = ['Values for property ''' prop ''' are not equal'];
actualValue = actual.(prop);
expectedValue = expected.(prop);
failureMessage = ['Values for property ''' prop ''' are not equal'];

if isa(actualVal, 'types.untyped.DataStub')
actualVal = actualVal.load();
if isa(actualValue, 'types.untyped.DataStub')
actualValue = actualValue.load();
end

if startsWith(class(expectedVal), 'types.') && ~startsWith(class(expectedVal), 'types.untyped')
tests.util.verifyContainerEqual(testCase, actualVal, expectedVal);
elseif isa(expectedVal, 'types.untyped.Set')
tests.util.verifySetEqual(testCase, actualVal, expectedVal, failmsg);
elseif ischar(expectedVal)
testCase.verifyEqual(char(actualVal), expectedVal, failmsg);
elseif isa(expectedVal, 'types.untyped.ObjectView') || isa(expectedVal, 'types.untyped.SoftLink')
testCase.verifyEqual(actualVal.path, expectedVal.path, failmsg);
elseif isa(expectedVal, 'types.untyped.RegionView')
testCase.verifyEqual(actualVal.path, expectedVal.path, failmsg);
testCase.verifyEqual(actualVal.region, expectedVal.region, failmsg);
elseif isa(expectedVal, 'types.untyped.Anon')
testCase.verifyEqual(actualVal.name, expectedVal.name, failmsg);
tests.util.verifyContainerEqual(testCase, actualVal.value, expectedVal.value);
elseif isdatetime(expectedVal)...
|| (iscell(expectedVal) && all(cellfun('isclass', expectedVal, 'datetime')))
if startsWith(class(expectedValue), 'types.') && ~startsWith(class(expectedValue), 'types.untyped')
tests.util.verifyContainerEqual(testCase, actualValue, expectedValue);
elseif isa(expectedValue, 'types.untyped.Set')
tests.util.verifySetEqual(testCase, actualValue, expectedValue, failureMessage);
elseif ischar(expectedValue)
testCase.verifyEqual(char(actualValue), expectedValue, failureMessage);
elseif isa(expectedValue, 'types.untyped.ObjectView') || isa(expectedValue, 'types.untyped.SoftLink')
testCase.verifyEqual(actualValue.path, expectedValue.path, failureMessage);
elseif isa(expectedValue, 'types.untyped.RegionView')
testCase.verifyEqual(actualValue.path, expectedValue.path, failureMessage);
testCase.verifyEqual(actualValue.region, expectedValue.region, failureMessage);
elseif isa(expectedValue, 'types.untyped.Anon')
testCase.verifyEqual(actualValue.name, expectedValue.name, failureMessage);
tests.util.verifyContainerEqual(testCase, actualValue.value, expectedValue.value);
elseif isdatetime(expectedValue)...
|| (iscell(expectedValue) && all(cellfun('isclass', expectedValue, 'datetime')))
% linux MATLAB doesn't appear to propery compare datetimes whereas
% Windows MATLAB does. This is a workaround to get tests to work
% while getting close enough to exact date representation.
actualVal = types.util.checkDtype(prop, 'datetime', actualVal);
if ~iscell(expectedVal)
expectedVal = {expectedVal};
actualValue = types.util.checkDtype(prop, 'datetime', actualValue);
if ~iscell(expectedValue)
expectedValue = num2cell(expectedValue);
end
if ~iscell(actualVal)
actualVal = {actualVal};
if ~iscell(actualValue)
actualValue = num2cell(actualValue);
end
for iDates = 1:length(expectedVal)
for iDates = 1:length(expectedValue)
% ignore microseconds as linux datetime has some strange error
% even when datetime doesn't change in Windows.
actualNtfs = convertTo(actualVal{iDates}, 'ntfs');
expectedNtfs = convertTo(expectedVal{iDates}, 'ntfs');
testCase.verifyGreaterThanOrEqual(actualNtfs, expectedNtfs - 10, failmsg);
testCase.verifyLessThanOrEqual(actualNtfs, expectedNtfs + 10, failmsg);
ActualDate = actualValue{iDates};
ExpectedDate = expectedValue{iDates};
ExpectedUpperBound = ExpectedDate + milliseconds(1);
ExpectedLowerBound = ExpectedDate - milliseconds(1);
testCase.verifyTrue(isbetween(ActualDate, ExpectedLowerBound, ExpectedUpperBound) ...
, failureMessage);
end
elseif startsWith(class(expectedVal), 'int')
testCase.verifyEqual(int64(actualVal), int64(expectedVal), failmsg);
elseif startsWith(class(expectedVal), 'uint')
testCase.verifyEqual(uint64(actualVal), uint64(expectedVal), failmsg);
elseif startsWith(class(expectedValue), 'int')
testCase.verifyEqual(int64(actualValue), int64(expectedValue), failureMessage);
elseif startsWith(class(expectedValue), 'uint')
testCase.verifyEqual(uint64(actualValue), uint64(expectedValue), failureMessage);
else
testCase.verifyEqual(actualVal, expectedVal, failmsg);
testCase.verifyEqual(actualValue, expectedValue, failureMessage);
end
end
end
2 changes: 1 addition & 1 deletion +types/+untyped/DataStub.m
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
if iscellstr(data) && isscalar(data)
data = data{1};
elseif isstring(data)
data = char(data);
data = convertStringsToChars(data);
end
case 'logical'
% data assumed to be cell array of enum string
Expand Down
Loading

0 comments on commit b7583a3

Please sign in to comment.