Skip to content

Commit

Permalink
Fix is_null queries with datetime fields
Browse files Browse the repository at this point in the history
  • Loading branch information
kbolashev committed Jan 23, 2025
1 parent 6f45a25 commit ceb5748
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 23 deletions.
21 changes: 12 additions & 9 deletions dagshub/data_engine/model/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@
from dagshub.data_engine.model.metadata import wrap_bytes
from dagshub.data_engine.model.metadata_field_builder import MetadataFieldBuilder
from dagshub.data_engine.model.query import QueryFilterTree
from dagshub.data_engine.model.schema_util import metadataTypeLookup, metadataTypeLookupReverse
from dagshub.data_engine.model.schema_util import (
metadata_type_lookup,
default_metadata_type_value,
)
from dagshub.data_engine.model.datasource_state import DatasourceState

if TYPE_CHECKING:
Expand Down Expand Up @@ -647,7 +650,7 @@ def _df_to_metadata(
value_type = field_value_types.get(key)
time_zone = None
if value_type is None:
value_type = metadataTypeLookup[type(sub_val)]
value_type = metadata_type_lookup[type(sub_val)]
field_value_types[key] = value_type
# Don't override bytes if they're not bytes - probably just undownloaded values
if value_type == MetadataFieldType.BLOB and not isinstance(sub_val, bytes):
Expand Down Expand Up @@ -676,7 +679,7 @@ def _df_to_metadata(
else:
value_type = field_value_types.get(key)
if value_type is None:
value_type = metadataTypeLookup[type(val)]
value_type = metadata_type_lookup[type(val)]
field_value_types[key] = value_type
# Don't override bytes if they're not bytes - probably just undownloaded values
if value_type == MetadataFieldType.BLOB and not isinstance(val, bytes):
Expand Down Expand Up @@ -1555,8 +1558,8 @@ def is_null(self):
:meta private:
"""
field = self._get_filtering_field()
value_type = metadataTypeLookupReverse[field.valueType.value]
return self.add_query_op("isnull", value_type())
val = default_metadata_type_value(field.valueType)
return self.add_query_op("isnull", val)

def is_not_null(self):
"""
Expand All @@ -1565,8 +1568,8 @@ def is_not_null(self):
:meta private:
"""
field = self._get_filtering_field()
value_type = metadataTypeLookupReverse[field.valueType.value]
return self.add_query_op("!isnull", value_type())
val = default_metadata_type_value(field.valueType)
return self.add_query_op("!isnull", val)

def _get_filtering_field(self) -> MetadataFieldSchema:
field_name = self.get_query().filter.column_filter
Expand Down Expand Up @@ -1791,7 +1794,7 @@ def update_metadata(self, datapoints: Union[List[str], str], metadata: Dict[str,
time_zone = None
value_type = field_value_types.get(k)
if value_type is None:
value_type = metadataTypeLookup[type(sub_val)]
value_type = metadata_type_lookup[type(sub_val)]
field_value_types[k] = value_type
# Don't override bytes if they're not bytes and not documents
# - probably just undownloaded values
Expand Down Expand Up @@ -1820,7 +1823,7 @@ def update_metadata(self, datapoints: Union[List[str], str], metadata: Dict[str,
else:
value_type = field_value_types.get(k)
if value_type is None:
value_type = metadataTypeLookup[type(v)]
value_type = metadata_type_lookup[type(v)]
field_value_types[k] = value_type
# Don't override bytes if they're not bytes - probably just undownloaded values
if value_type == MetadataFieldType.BLOB and not isinstance(v, bytes):
Expand Down
6 changes: 3 additions & 3 deletions dagshub/data_engine/model/metadata_field_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dagshub.data_engine.client.models import MetadataFieldSchema
from dagshub.data_engine.dtypes import DagshubDataType, MetadataFieldType, ReservedTags, ThumbnailType
from dagshub.data_engine.model.schema_util import metadataTypeLookup
from dagshub.data_engine.model.schema_util import metadata_type_lookup

if TYPE_CHECKING:
from dagshub.data_engine.model.datasource import Datasource
Expand Down Expand Up @@ -174,9 +174,9 @@ def _get_backing_type(t: Union[Type, DagshubDataType]) -> MetadataFieldType:
return t.backing_field_type

if type(t) is type:
if t not in metadataTypeLookup.keys():
if t not in metadata_type_lookup.keys():
raise ValueError(f"Primitive type {type(t)} is not supported")
return metadataTypeLookup[t]
return metadata_type_lookup[t]

raise ValueError(f"{t} of type ({type(t)}) is not a valid primitive type or DagshubDataType")

Expand Down
8 changes: 4 additions & 4 deletions dagshub/data_engine/model/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from treelib import Tree, Node

from dagshub.data_engine.model.errors import WrongOperatorError
from dagshub.data_engine.model.schema_util import metadataTypeLookup, metadataTypeLookupReverse
from dagshub.data_engine.model.schema_util import metadata_type_lookup, metadata_type_lookup_reverse
from dagshub.data_engine.dtypes import MetadataFieldType

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -228,7 +228,7 @@ def _serialize_node(node: Node, tree: Tree) -> Dict:
value = node.data["value"]
as_of = node.data.get("as_of")

value_type = metadataTypeLookup[type(value)].value if type(value) in metadataTypeLookup else None
value_type = metadata_type_lookup[type(value)].value if type(value) in metadata_type_lookup else None

# if one of basic value types:
if value_type:
Expand All @@ -244,7 +244,7 @@ def _serialize_node(node: Node, tree: Tree) -> Dict:
if value_type is None and query_op not in dt_range_ops:
raise RuntimeError(
f"Value type {value_type} is not supported for querying.\r\n"
f"Supported types: {list(metadataTypeLookup.keys())}"
f"Supported types: {list(metadata_type_lookup.keys())}"
)
res = {
"filter": {
Expand Down Expand Up @@ -317,7 +317,7 @@ def _deserialize_node(node_dict: Dict, tree: Tree, parent_node=None) -> None:
# timeFilter replaced comparator in query, so now the reverse action
comparator = val["timeFilter"].lower()
else:
value_type = metadataTypeLookupReverse[val["valueType"]]
value_type = metadata_type_lookup_reverse[val["valueType"]]
converter = _metadataTypeCustomConverters.get(value_type, lambda x: value_type(x))
value = converter(val["value"])
as_of = val.get("asOf")
Expand Down
17 changes: 12 additions & 5 deletions dagshub/data_engine/model/schema_util.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import datetime
from typing import Type, Dict, Set
from typing import Type, Dict, Set, Any

import dacite

from dagshub.data_engine.client.models import IntegrationStatus, DatasourceType, PreprocessingStatus
from dagshub.data_engine.dtypes import MetadataFieldType

metadataTypeLookup = {
metadata_type_lookup = {
int: MetadataFieldType.INTEGER,
bool: MetadataFieldType.BOOLEAN,
float: MetadataFieldType.FLOAT,
Expand All @@ -15,9 +15,16 @@
datetime.datetime: MetadataFieldType.DATETIME,
}

metadataTypeLookupReverse: Dict[str, Type] = {}
for k, v in metadataTypeLookup.items():
metadataTypeLookupReverse[v.value] = k
metadata_type_lookup_reverse: Dict[str, Type] = {}
for k, v in metadata_type_lookup.items():
metadata_type_lookup_reverse[v.value] = k


def default_metadata_type_value(metadata_type: MetadataFieldType) -> Any:
if metadata_type == MetadataFieldType.DATETIME:
return datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
else:
return metadata_type_lookup_reverse[metadata_type.value]()


def timestamp_to_datetime(timestamp: int) -> datetime.datetime:
Expand Down
4 changes: 2 additions & 2 deletions tests/data_engine/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dagshub.data_engine.model.datapoint import Datapoint
from dagshub.data_engine.model.datasource import Datasource, DatasetState
from dagshub.data_engine.model.query_result import QueryResult
from dagshub.data_engine.model.schema_util import metadataTypeLookup
from dagshub.data_engine.model.schema_util import metadata_type_lookup
from tests.data_engine.util import add_string_fields
from tests.mocks.repo_api import MockRepoAPI

Expand Down Expand Up @@ -70,7 +70,7 @@ def query_result(ds, some_datapoints):
autoGenerated=False,
originalName=k,
multiple=False,
valueType=metadataTypeLookup[type(v)],
valueType=metadata_type_lookup[type(v)],
name=k,
tags=None,
)
Expand Down
10 changes: 10 additions & 0 deletions tests/data_engine/test_querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,3 +675,13 @@ def test_periodic_datetime_timeofday(ds):
}

assert ds2.serialize_gql_query_input() == expected_serialized


def test_datetime_is_null(ds):
add_datetime_fields(ds, "x")
q = ds["x"].is_null()
print(q.get_query().filter.tree_to_dict())

expected = {"query": {"filter": {"comparator": "IS_NULL", "key": "x", "value": "0", "valueType": "DATETIME"}}}

assert q.serialize_gql_query_input() == expected

0 comments on commit ceb5748

Please sign in to comment.