Skip to content

Commit

Permalink
introduced experimental c.this.dispatch
Browse files Browse the repository at this point in the history
  • Loading branch information
westandskif committed Feb 6, 2024
1 parent fbfc789 commit 7e5b703
Show file tree
Hide file tree
Showing 28 changed files with 1,567 additions and 1,167 deletions.
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ disable=abstract-method,
consider-using-f-string,
not-callable,
import-outside-toplevel,
cyclic-import,


[REPORTS]
Expand Down
1,206 changes: 627 additions & 579 deletions ci-requirements/py3.9-lint-doc-build/poetry.lock

Large diffs are not rendered by default.

1,096 changes: 568 additions & 528 deletions ci-requirements/py3.9-lint-doc-build/requirements.txt

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 1.6.0 (2024-02-07)

- added experimental `c.this.dispatch(key, key_to_conv, default)` to switch
between conversions based on dict lookups


## 1.5.1 (2023-10-08)

- added python 3.12 benchmark results
Expand Down
19 changes: 19 additions & 0 deletions docs/conditions_n_pipes.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,22 @@ There are two ways to label data for further use:
To reference previously labeled data use `c.label("label_name")`.

{!examples-md/api__pipe_labels.md!}

## Dispatch

!!! warning ""
Experimental feature added on Feb 7, 2024. It will be stabilized in ~ half
a year.

There are performance critical cases where it's desired to replace `c.if_` and
`c.if_multiple` with dict lookups. However it limits what can be used as keys
as these need to be hashable.

Interface: `c.this.dispatch(key, key_to_conv, default)`

1. `key` defines a conversion, which gets a key
1. `key_to_conv` is a dict which maps keys to conversions
1. `default` is an optional default conversion, when the dict doesn't contain
the key

{!examples-md/api__dispatch.md!}
49 changes: 49 additions & 0 deletions docs/examples-md/api__dispatch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
===! "convtools"
```python
from convtools import conversion as c


input_data = [
{"version": "v1", "field1": 10},
{"version": "v2", "field2": 20},
{"version": "v3", "field": 30},
]

converter = (
c.iter(
c.this.dispatch(
c.item("version"),
{
"v1": c.item("field1"),
"v2": c.item("field2"),
},
default=c.item("field"),
)
)
.as_type(list)
.gen_converter(debug=True)
)

assert converter(input_data) == [10, 20, 30]
```

=== "debug stdout"
```python
def branch(data_):
return data_["field1"]

def branch_i(data_):
return data_["field2"]

def branch_else(data_):
return data_["field"]

def converter(data_, *, __branch_else=__naive_values__["__branch_else"], __v=__naive_values__["__v"]):
try:
return [__v.get(i["version"], __branch_else)(i) for i in data_]
except __exceptions_to_dump_sources:
__convtools__code_storage.dump_sources()
raise

```

25 changes: 25 additions & 0 deletions docs/examples-raw/api__dispatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from convtools import conversion as c


input_data = [
{"version": "v1", "field1": 10},
{"version": "v2", "field2": 20},
{"version": "v3", "field": 30},
]

converter = (
c.iter(
c.this.dispatch(
c.item("version"),
{
"v1": c.item("field1"),
"v2": c.item("field2"),
},
default=c.item("field"),
)
)
.as_type(list)
.gen_converter(debug=True)
)

assert converter(input_data) == [10, 20, 30]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include = ["/src"]

[project]
name = "convtools"
version = "1.5.1"
version = "1.6.0"
description = "dynamic, declarative data transformations with automatic code generation"

readme = "README.md"
Expand Down
1 change: 1 addition & 0 deletions src/convtools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Public API."""

from ._conversion import conversion # noqa: F401
from ._dt import DateGrid, DateTimeGrid

Expand Down
9 changes: 5 additions & 4 deletions src/convtools/_aggregations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Define aggregations with various reduce functions."""

import typing as t
import warnings
from collections import defaultdict
Expand Down Expand Up @@ -1440,10 +1441,10 @@ def _gen_code_and_update_ctx(self, code_input, ctx) -> str:
)
else:
converter_name = f"group_by{suffix}"
ctx[
var_agg_data_cls
] = reduce_manager.gen_group_by_data_container(
self, var_agg_data_cls, ctx
ctx[var_agg_data_cls] = (
reduce_manager.gen_group_by_data_container(
self, var_agg_data_cls, ctx
)
)
grouper_code = GROUPER_TEMPLATE.format(
converter_name=converter_name,
Expand Down
114 changes: 111 additions & 3 deletions src/convtools/_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Base and basic conversions are defined here."""

import re
import string
import sys
Expand Down Expand Up @@ -984,13 +985,43 @@ def and_then(self, conversion, condition=bool) -> "BaseConversion":

return self.pipe(
If(
CallFunc(condition, This())
if callable(condition)
else condition,
(
CallFunc(condition, This())
if callable(condition)
else condition
),
conversion,
)
)

def dispatch(
self,
key: "t.Any",
key_to_conv: dict,
default=_none,
):
"""Applies one of conversions of the dict, based on the key.
Takes a conversion to be used as a key and applies corresponding
conversion from dict. The key conversion should return hashable object.
>>> c.this.dispatch(
>>> c.item("version"),
>>> {
>>> "v1": c.item("field_a"),
>>> "v2": c.item("field_b"),
>>> },
>>> c.item("field")
>>> ).gen_converter()
"""
return self.pipe(
Dispatcher(
key,
key_to_conv,
default=None if default is _none else default,
)
)

def cumulative(self, prepare_first, reduce_two, label_name=None):
"""Calculate cumulative values within iterables.
Expand Down Expand Up @@ -3033,6 +3064,83 @@ def _gen_code_and_update_ctx(self, code_input, ctx):
).gen_code_and_update_ctx(code_input, ctx)


class Dispatcher(BaseConversion):
"""Applies one of conversions of the dict, based on the key."""

self_content_type = (
BaseConversion.self_content_type
& ~BaseConversion.ContentTypes.FUNCTION_OF_INPUT
)
weight = Weights.FUNCTION_CALL

def __init__(
self,
key: "t.Any",
key_to_conv: dict,
default: "t.Optional[t.Any]" = None,
):
super().__init__()
self.key_getter = self.ensure_conversion(key)
self.key_to_conversion = {
k: self.ensure_conversion(v) for k, v in key_to_conv.items()
}
self.default_conversion = (
None if default is None else self.ensure_conversion(default)
)
self.number_of_input_uses = 2

def _gen_code_and_update_ctx(self, code_input, ctx):
converter_name = self.gen_random_name("dispatch", ctx)
var_input = "data_"

function_ctx = self.as_function_ctx(ctx)
function_ctx.add_arg(var_input, This())
with function_ctx:
key_to_func = {}
for key, then_conversion in self.key_to_conversion.items():
converter_name = self.gen_random_name("branch", ctx)
code = Code()
code.add_line(
f"def {converter_name}({function_ctx.get_def_all_args_code()}):",
1,
)

code.add_line(
f"return {then_conversion.gen_code_and_update_ctx(var_input, ctx)}",
-1,
)
key_to_func[key] = function_ctx.gen_function(
converter_name, code.to_string(0)
)

conversion: "BaseConversion"
if self.default_conversion is None:
conversion = NaiveConversion(key_to_func).item(self.key_getter)

else:
converter_name = self.gen_random_name("branch_else", ctx)
code = Code()
code.add_line(
f"def {converter_name}({function_ctx.get_def_all_args_code()}):",
1,
)
code.add_line(
f"return {self.default_conversion.gen_code_and_update_ctx(var_input, ctx)}",
-1,
)
else_func = function_ctx.gen_function(
converter_name, code.to_string(0)
)

conversion = NaiveConversion(key_to_func).call_method(
"get", self.key_getter, else_func
)

return function_ctx.call_with_all_args(
conversion
).gen_code_and_update_ctx(code_input, ctx)


def delegate_simple_0_args(name):
def method(self):
if self.label_output is None:
Expand Down
1 change: 1 addition & 0 deletions src/convtools/_chunks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Conversions for slicing iterables into chunks."""

import typing as t

from ._aggregations import Aggregate
Expand Down
1 change: 1 addition & 0 deletions src/convtools/_columns.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Base conversions to reference/define columns for tables."""

import typing as t
from collections import defaultdict

Expand Down
1 change: 1 addition & 0 deletions src/convtools/_conversion.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""The main module exposing public API."""

from itertools import repeat

from ._aggregations import (
Expand Down
1 change: 1 addition & 0 deletions src/convtools/_cumulative.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Cumulative conversions."""

import typing as t
from uuid import uuid4

Expand Down
1 change: 1 addition & 0 deletions src/convtools/_debug.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Provide conversions which simplify debugging."""

import pdb
import sys

Expand Down
11 changes: 7 additions & 4 deletions src/convtools/_dt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Defines datetime utility functions."""

import re
from datetime import date, datetime, timedelta
from functools import lru_cache
Expand Down Expand Up @@ -229,7 +230,9 @@ def datetime_trunc_to_microsecond(dt, to_us, offset_us, mode):
)


STEP_CLASSES: "List[Union[Type[MonthStep], Type[DayOfWeekStep], Type[MicroSecondStep]]]" = [
STEP_CLASSES: (
"List[Union[Type[MonthStep], Type[DayOfWeekStep], Type[MicroSecondStep]]]"
) = [
MonthStep,
DayOfWeekStep,
MicroSecondStep,
Expand Down Expand Up @@ -913,9 +916,9 @@ def __init__(self, fmt):
self.format_args,
) = self._parse_fmt(fmt)
except UnsupportedFormatCode:
self.re_pattern = (
self.assignment_code_lines
) = self.format_args = None
self.re_pattern = self.assignment_code_lines = self.format_args = (
None
)

@staticmethod
def _seq_to_re_group_str(seq):
Expand Down
1 change: 1 addition & 0 deletions src/convtools/_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Run multiple conversions until success, while catching exceptions."""

from typing import Any, Sequence, Tuple, Type, Union

from ._base import BaseConversion, NaiveConversion, This, ensure_conversion
Expand Down
1 change: 1 addition & 0 deletions src/convtools/_expect.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Conversions to check expected conditions or raise exception."""

from ._base import BaseConversion, ConversionException


Expand Down
1 change: 1 addition & 0 deletions src/convtools/_heuristics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Helpers to collect info about environment."""

import sys


Expand Down
Loading

0 comments on commit 7e5b703

Please sign in to comment.