-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix an issue where the SDK fails with a
TimeseriesServiceError
if q…
…uerying time series data for more than 100 variables (#40) closes #7 This PR fixes an issue where the user was allowed to request timeseries data for any number of variables but it'd fail because the `timeseries` API accepts a maximum of `100` variables per request. We now chunk the variables requested into chunks of size `<=100` and issue a request for each chunk and then merge the data returned in all of the responses. This PR also refactors the logic to fetch timeseries data so that it has less repetition. Previously, there was a lot of overlap in the code in the `get_timeseries` and `get_timeseries_with_resampling` methods.
- Loading branch information
1 parent
fcce1e7
commit dd84243
Showing
7 changed files
with
425 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from typing import Iterable, Sequence, TypeVar | ||
|
||
MINIMUM_CHUNK_SIZE = 1 | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
def chunk(seq: Sequence[T], chunk_size: int) -> Iterable[Sequence[T]]: | ||
if chunk_size < MINIMUM_CHUNK_SIZE: | ||
raise ValueError(f"{chunk_size=} is less than {MINIMUM_CHUNK_SIZE=}") | ||
|
||
return (seq[i : i + chunk_size] for i in range(0, len(seq), chunk_size)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
from datetime import datetime, timedelta, timezone | ||
|
||
import pytest | ||
|
||
from enlyze.api_clients.timeseries.models import TimeseriesData | ||
|
||
# We use this to skip columns that contain the timestamp assuming | ||
# it starts at the beginning of the sequence. We also use it | ||
# when computing lengths to account for a timestamp column. | ||
TIMESTAMP_OFFSET = 1 | ||
|
||
|
||
@pytest.fixture | ||
def timestamp(): | ||
return datetime.now(tz=timezone.utc) | ||
|
||
|
||
@pytest.fixture | ||
def timeseries_data_1(timestamp): | ||
return TimeseriesData( | ||
columns=["time", "var1", "var2"], | ||
records=[ | ||
[timestamp.isoformat(), 1, 2], | ||
[(timestamp - timedelta(minutes=10)).isoformat(), 3, 4], | ||
], | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def timeseries_data_2(timestamp): | ||
return TimeseriesData( | ||
columns=["time", "var3"], | ||
records=[ | ||
[timestamp.isoformat(), 5], | ||
[(timestamp - timedelta(minutes=10)).isoformat(), 6], | ||
], | ||
) | ||
|
||
|
||
class TestTimeseriesData: | ||
def test_merge(self, timeseries_data_1, timeseries_data_2): | ||
timeseries_data_1_records_len = len(timeseries_data_1.records) | ||
timeseries_data_1_columns_len = len(timeseries_data_1.columns) | ||
timeseries_data_2_records_len = len(timeseries_data_2.records) | ||
timeseries_data_2_columns_len = len(timeseries_data_2.columns) | ||
expected_merged_record_len = len(timeseries_data_1.records[0]) + len( | ||
timeseries_data_2.records[0][TIMESTAMP_OFFSET:] | ||
) | ||
|
||
merged = timeseries_data_1.merge(timeseries_data_2) | ||
|
||
assert merged is timeseries_data_1 | ||
assert ( | ||
len(merged.records) | ||
== timeseries_data_1_records_len | ||
== timeseries_data_2_records_len | ||
) | ||
assert ( | ||
len(merged.columns) | ||
== timeseries_data_1_columns_len | ||
+ timeseries_data_2_columns_len | ||
- TIMESTAMP_OFFSET | ||
) | ||
|
||
for r in merged.records: | ||
assert len(r) == expected_merged_record_len == len(merged.columns) | ||
|
||
def test_merge_raises_number_of_records_mismatch( | ||
self, timeseries_data_1, timeseries_data_2 | ||
): | ||
timeseries_data_2.records = timeseries_data_2.records[1:] | ||
with pytest.raises( | ||
ValueError, match="Number of records in both instances has to be the same" | ||
): | ||
timeseries_data_1.merge(timeseries_data_2) | ||
|
||
def test_merge_raises_mismatched_timestamps( | ||
self, timeseries_data_1, timeseries_data_2, timestamp | ||
): | ||
timeseries_data_2.records[0][0] = (timestamp - timedelta(days=1)).isoformat() | ||
|
||
with pytest.raises(ValueError, match="mismatched timestamps"): | ||
timeseries_data_1.merge(timeseries_data_2) |
Oops, something went wrong.