From ceb5748d7a70e2ee9bc54c67b6efed617e93546b Mon Sep 17 00:00:00 2001 From: KBolashev Date: Thu, 23 Jan 2025 11:33:53 +0200 Subject: [PATCH] Fix is_null queries with datetime fields --- dagshub/data_engine/model/datasource.py | 21 +++++++++++-------- .../model/metadata_field_builder.py | 6 +++--- dagshub/data_engine/model/query.py | 8 +++---- dagshub/data_engine/model/schema_util.py | 17 ++++++++++----- tests/data_engine/conftest.py | 4 ++-- tests/data_engine/test_querying.py | 10 +++++++++ 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/dagshub/data_engine/model/datasource.py b/dagshub/data_engine/model/datasource.py index 6de8b226..03db3016 100644 --- a/dagshub/data_engine/model/datasource.py +++ b/dagshub/data_engine/model/datasource.py @@ -52,7 +52,10 @@ from dagshub.data_engine.model.metadata import wrap_bytes from dagshub.data_engine.model.metadata_field_builder import MetadataFieldBuilder from dagshub.data_engine.model.query import QueryFilterTree -from dagshub.data_engine.model.schema_util import metadataTypeLookup, metadataTypeLookupReverse +from dagshub.data_engine.model.schema_util import ( + metadata_type_lookup, + default_metadata_type_value, +) from dagshub.data_engine.model.datasource_state import DatasourceState if TYPE_CHECKING: @@ -647,7 +650,7 @@ def _df_to_metadata( value_type = field_value_types.get(key) time_zone = None if value_type is None: - value_type = metadataTypeLookup[type(sub_val)] + value_type = metadata_type_lookup[type(sub_val)] field_value_types[key] = value_type # Don't override bytes if they're not bytes - probably just undownloaded values if value_type == MetadataFieldType.BLOB and not isinstance(sub_val, bytes): @@ -676,7 +679,7 @@ def _df_to_metadata( else: value_type = field_value_types.get(key) if value_type is None: - value_type = metadataTypeLookup[type(val)] + value_type = metadata_type_lookup[type(val)] field_value_types[key] = value_type # Don't override bytes if they're not bytes - probably just undownloaded values if value_type == MetadataFieldType.BLOB and not isinstance(val, bytes): @@ -1555,8 +1558,8 @@ def is_null(self): :meta private: """ field = self._get_filtering_field() - value_type = metadataTypeLookupReverse[field.valueType.value] - return self.add_query_op("isnull", value_type()) + val = default_metadata_type_value(field.valueType) + return self.add_query_op("isnull", val) def is_not_null(self): """ @@ -1565,8 +1568,8 @@ def is_not_null(self): :meta private: """ field = self._get_filtering_field() - value_type = metadataTypeLookupReverse[field.valueType.value] - return self.add_query_op("!isnull", value_type()) + val = default_metadata_type_value(field.valueType) + return self.add_query_op("!isnull", val) def _get_filtering_field(self) -> MetadataFieldSchema: field_name = self.get_query().filter.column_filter @@ -1791,7 +1794,7 @@ def update_metadata(self, datapoints: Union[List[str], str], metadata: Dict[str, time_zone = None value_type = field_value_types.get(k) if value_type is None: - value_type = metadataTypeLookup[type(sub_val)] + value_type = metadata_type_lookup[type(sub_val)] field_value_types[k] = value_type # Don't override bytes if they're not bytes and not documents # - probably just undownloaded values @@ -1820,7 +1823,7 @@ def update_metadata(self, datapoints: Union[List[str], str], metadata: Dict[str, else: value_type = field_value_types.get(k) if value_type is None: - value_type = metadataTypeLookup[type(v)] + value_type = metadata_type_lookup[type(v)] field_value_types[k] = value_type # Don't override bytes if they're not bytes - probably just undownloaded values if value_type == MetadataFieldType.BLOB and not isinstance(v, bytes): diff --git a/dagshub/data_engine/model/metadata_field_builder.py b/dagshub/data_engine/model/metadata_field_builder.py index d48df732..98a12bf6 100644 --- a/dagshub/data_engine/model/metadata_field_builder.py +++ b/dagshub/data_engine/model/metadata_field_builder.py @@ -4,7 +4,7 @@ from dagshub.data_engine.client.models import MetadataFieldSchema from dagshub.data_engine.dtypes import DagshubDataType, MetadataFieldType, ReservedTags, ThumbnailType -from dagshub.data_engine.model.schema_util import metadataTypeLookup +from dagshub.data_engine.model.schema_util import metadata_type_lookup if TYPE_CHECKING: from dagshub.data_engine.model.datasource import Datasource @@ -174,9 +174,9 @@ def _get_backing_type(t: Union[Type, DagshubDataType]) -> MetadataFieldType: return t.backing_field_type if type(t) is type: - if t not in metadataTypeLookup.keys(): + if t not in metadata_type_lookup.keys(): raise ValueError(f"Primitive type {type(t)} is not supported") - return metadataTypeLookup[t] + return metadata_type_lookup[t] raise ValueError(f"{t} of type ({type(t)}) is not a valid primitive type or DagshubDataType") diff --git a/dagshub/data_engine/model/query.py b/dagshub/data_engine/model/query.py index f64a85d6..918daded 100644 --- a/dagshub/data_engine/model/query.py +++ b/dagshub/data_engine/model/query.py @@ -7,7 +7,7 @@ from treelib import Tree, Node from dagshub.data_engine.model.errors import WrongOperatorError -from dagshub.data_engine.model.schema_util import metadataTypeLookup, metadataTypeLookupReverse +from dagshub.data_engine.model.schema_util import metadata_type_lookup, metadata_type_lookup_reverse from dagshub.data_engine.dtypes import MetadataFieldType logger = logging.getLogger(__name__) @@ -228,7 +228,7 @@ def _serialize_node(node: Node, tree: Tree) -> Dict: value = node.data["value"] as_of = node.data.get("as_of") - value_type = metadataTypeLookup[type(value)].value if type(value) in metadataTypeLookup else None + value_type = metadata_type_lookup[type(value)].value if type(value) in metadata_type_lookup else None # if one of basic value types: if value_type: @@ -244,7 +244,7 @@ def _serialize_node(node: Node, tree: Tree) -> Dict: if value_type is None and query_op not in dt_range_ops: raise RuntimeError( f"Value type {value_type} is not supported for querying.\r\n" - f"Supported types: {list(metadataTypeLookup.keys())}" + f"Supported types: {list(metadata_type_lookup.keys())}" ) res = { "filter": { @@ -317,7 +317,7 @@ def _deserialize_node(node_dict: Dict, tree: Tree, parent_node=None) -> None: # timeFilter replaced comparator in query, so now the reverse action comparator = val["timeFilter"].lower() else: - value_type = metadataTypeLookupReverse[val["valueType"]] + value_type = metadata_type_lookup_reverse[val["valueType"]] converter = _metadataTypeCustomConverters.get(value_type, lambda x: value_type(x)) value = converter(val["value"]) as_of = val.get("asOf") diff --git a/dagshub/data_engine/model/schema_util.py b/dagshub/data_engine/model/schema_util.py index b75b9c9a..1b3316a0 100644 --- a/dagshub/data_engine/model/schema_util.py +++ b/dagshub/data_engine/model/schema_util.py @@ -1,12 +1,12 @@ import datetime -from typing import Type, Dict, Set +from typing import Type, Dict, Set, Any import dacite from dagshub.data_engine.client.models import IntegrationStatus, DatasourceType, PreprocessingStatus from dagshub.data_engine.dtypes import MetadataFieldType -metadataTypeLookup = { +metadata_type_lookup = { int: MetadataFieldType.INTEGER, bool: MetadataFieldType.BOOLEAN, float: MetadataFieldType.FLOAT, @@ -15,9 +15,16 @@ datetime.datetime: MetadataFieldType.DATETIME, } -metadataTypeLookupReverse: Dict[str, Type] = {} -for k, v in metadataTypeLookup.items(): - metadataTypeLookupReverse[v.value] = k +metadata_type_lookup_reverse: Dict[str, Type] = {} +for k, v in metadata_type_lookup.items(): + metadata_type_lookup_reverse[v.value] = k + + +def default_metadata_type_value(metadata_type: MetadataFieldType) -> Any: + if metadata_type == MetadataFieldType.DATETIME: + return datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc) + else: + return metadata_type_lookup_reverse[metadata_type.value]() def timestamp_to_datetime(timestamp: int) -> datetime.datetime: diff --git a/tests/data_engine/conftest.py b/tests/data_engine/conftest.py index 8b7619e3..c1594797 100644 --- a/tests/data_engine/conftest.py +++ b/tests/data_engine/conftest.py @@ -7,7 +7,7 @@ from dagshub.data_engine.model.datapoint import Datapoint from dagshub.data_engine.model.datasource import Datasource, DatasetState from dagshub.data_engine.model.query_result import QueryResult -from dagshub.data_engine.model.schema_util import metadataTypeLookup +from dagshub.data_engine.model.schema_util import metadata_type_lookup from tests.data_engine.util import add_string_fields from tests.mocks.repo_api import MockRepoAPI @@ -70,7 +70,7 @@ def query_result(ds, some_datapoints): autoGenerated=False, originalName=k, multiple=False, - valueType=metadataTypeLookup[type(v)], + valueType=metadata_type_lookup[type(v)], name=k, tags=None, ) diff --git a/tests/data_engine/test_querying.py b/tests/data_engine/test_querying.py index 731f8989..0d8ec957 100644 --- a/tests/data_engine/test_querying.py +++ b/tests/data_engine/test_querying.py @@ -675,3 +675,13 @@ def test_periodic_datetime_timeofday(ds): } assert ds2.serialize_gql_query_input() == expected_serialized + + +def test_datetime_is_null(ds): + add_datetime_fields(ds, "x") + q = ds["x"].is_null() + print(q.get_query().filter.tree_to_dict()) + + expected = {"query": {"filter": {"comparator": "IS_NULL", "key": "x", "value": "0", "valueType": "DATETIME"}}} + + assert q.serialize_gql_query_input() == expected