Skip to content

Commit

Permalink
Merge pull request #34 from westandskif/optimize/agg
Browse files Browse the repository at this point in the history
Optimize/agg
  • Loading branch information
westandskif authored Aug 22, 2024
2 parents 710dd25 + cb64b67 commit 21b2821
Show file tree
Hide file tree
Showing 108 changed files with 2,378 additions and 982 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ class-rgx=^_?[A-Z][a-zA-Z0-9]*_?$
module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$

# Regular expression matching correct method names
method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|visit_.+|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$

# Regular expression which should only match function or class names that do
# not require a docstring.
Expand Down
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,10 @@ test-py%:
"pip install -e . && pytest"

test: test-py3.6 test-py3.7 test-py3.8 test-py3.9 test-py3.10 test-py3.11 test-py3.12

benchmark-py%:
~/.pyenv/versions/convtools-$*/bin/pip install -e .
~/.pyenv/versions/convtools-$*/bin/pip install tabulate
~/.pyenv/versions/convtools-$*/bin/python run_benchmarks.py

benchmarks: benchmark-py3.7 benchmark-py3.8 benchmark-py3.9 benchmark-py3.10 benchmark-py3.11 benchmark-py3.12 benchmark-py3.13
204 changes: 120 additions & 84 deletions benchmarks/benchmark_results_V1.csv

Large diffs are not rendered by default.

11 changes: 5 additions & 6 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def gen_converter(self):
{
"a": c.ReduceFuncs.Sum(c.item("value_1")),
"b": c.ReduceFuncs.Min(c.item("value_2")),
"c": c.ReduceFuncs.Max(c.item("value_3")),
"c": c.ReduceFuncs.Max(c.item("value_2")),
}
).gen_converter()

Expand All @@ -35,10 +35,10 @@ def f(data):
):
b = i["value_2"]

if i["value_3"] is not None and (
c is None or c < i["value_3"]
if i["value_2"] is not None and (
c is None or c < i["value_2"]
):
c = i["value_3"]
c = i["value_2"]
return {
"a": a,
"b": b,
Expand All @@ -49,8 +49,7 @@ def f(data):

def gen_data(self):
return [
{"value_1": random(), "value_2": random(), "value_3": random()}
for i in range(10000)
{"value_1": random(), "value_2": random()} for i in range(10000)
]


Expand Down
6 changes: 5 additions & 1 deletion benchmarks/build_heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ def print_new_weights(): # pragma: no cover
"LIST_INIT": SimpleTimer("[1,2,3,4,5]"),
"SET_INIT": SimpleTimer("{1,2,3,4,5}"),
"DICT_INIT": SimpleTimer("{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}"),
"FUNCTION_CALL": SimpleTimer("f(v, 1)", "v=1\ndef f(v, x): return x"),
}
print("# WARMING UP")
for i in range(2):
SimpleTimer.get_base_time()

print("# CALCULATING BASE TIME")
base_time = SimpleTimer.get_base_time() / 100.0

Expand All @@ -34,7 +39,6 @@ def print_new_weights(): # pragma: no cover
print(f" {name} = {it}")
max_it = max(max_it, it)

print(" FUNCTION_CALL = DICT_LOOKUP * 4")
print(f" UNPREDICTABLE = {max_it * 100}")


Expand Down
1 change: 1 addition & 0 deletions benchmarks/main_versions.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
0.19.0
0.38.0
1.1.2
1.13.0
30 changes: 19 additions & 11 deletions benchmarks/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
from itertools import chain
from time import time

from pydantic import BaseModel

import convtools
from convtools import conversion as c
from convtools.contrib.tables import Table

from .timer import SimpleTimer
from typing import NamedTuple


class Environment(BaseModel):
class Environment(NamedTuple):
system: str
arch: str
py_version: str
Expand All @@ -26,7 +26,14 @@ class Environment(BaseModel):
convtools_version: str


class BenchmarkResult(Environment):
class BenchmarkResult(NamedTuple):
system: str
arch: str
py_version: str
py_version_exact: str
py_implementation: str
py_compiler: str
convtools_version: str
name: str
diff: float

Expand Down Expand Up @@ -58,9 +65,10 @@ def _measure(self, f, data):
def compare_results(self, data1, data2) -> bool:
return data1 == data2

def get_execution_result(self) -> BenchmarkResult:
def get_execution_result(self, silent=False) -> BenchmarkResult:
name = self.get_name()
print(f"TESTING: {name}")
if not silent:
print(f"TESTING: {name}")
data = self.gen_data()

convtools_converter = self.gen_converter()
Expand All @@ -79,7 +87,7 @@ def get_execution_result(self) -> BenchmarkResult:
return BenchmarkResult(
name=name,
diff=best_naive / convtools_time,
**ENVIRONMENT.model_dump(),
**ENVIRONMENT._asdict(),
)

@abc.abstractmethod
Expand Down Expand Up @@ -107,10 +115,10 @@ def load_results(self):
if os.path.exists(self.FILENAME):
return list(
map(
BenchmarkResult.model_validate,
Table.from_csv(self.FILENAME, header=True).into_iter_rows(
dict
),
BenchmarkResult._make,
Table.from_csv(self.FILENAME, header=True)
.update(diff=c.col("diff").as_type(float))
.into_iter_rows(tuple),
)
)
return []
Expand All @@ -133,7 +141,7 @@ def save(self):
merged_results = (
c.group_by(*(c.attr(key_) for key_ in self._key_fields))
.aggregate(c.ReduceFuncs.Last(c.this))
.iter(c.this.call_method("model_dump"))
.iter(c.this.call_method("_asdict"))
.execute(chain(self.load_results(), self.new_results))
)
resulting_rows = sorted(
Expand Down
8 changes: 4 additions & 4 deletions benchmarks/timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ class SimpleTimer(Timer):
"""A simpler timer which allows to automatically measure time taken to get
stable results based on coefficient of variation (sigma to mean ratio)"""

REL_PRECISION = 0.005
REL_PRECISION = 0.015

def auto_measure(
self,
max_time=10,
max_time=20,
min_time=0.02,
rel_precision=REL_PRECISION,
expected_num_of_checks=40,
Expand Down Expand Up @@ -57,8 +57,8 @@ def auto_measure(

while True:
mean = sum(times) / checks
std_deviation = (
sqrt(sum(d * d for d in (mean - t for t in times))) / checks
std_deviation = sqrt(
sum(d * d for d in (mean - t for t in times)) / checks
)
ratio = std_deviation / mean
if ratio < std_deviation_to_mean_ratio:
Expand Down
21 changes: 16 additions & 5 deletions build-docs-performance.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os
from glob import glob
import subprocess
from glob import glob
from hashlib import sha256

from convtools.contrib.tables import Table
from convtools import conversion as c
from convtools.contrib.tables import Table


DOCS_ROOT = "./docs"
MD_DIR = os.path.join(DOCS_ROOT, "performance-md")
Expand All @@ -21,17 +22,25 @@ def ensure_dir(file_path):


from typing import List
from benchmarks.storage import BenchmarkResultsStorage, BenchmarkResult

from benchmarks.storage import BenchmarkResult, BenchmarkResultsStorage
from tabulate import tabulate

import convtools


def gen_md(results: List[BenchmarkResult], py_version: str, indent=" "):
filtered_results = (
c.filter(c.attr("py_version") == py_version)
c.filter(
c.and_(
c.attr("py_version") == py_version,
c.attr("convtools_version") == convtools.__version__,
)
)
.pipe(
c.group_by(c.attr("name"))
.aggregate(
c.ReduceFuncs.MinRow(c.attr("diff")).call_method("model_dump")
c.ReduceFuncs.MinRow(c.attr("diff")).call_method("_asdict")
)
.sort(key=lambda x: x["diff"])
)
Expand All @@ -57,9 +66,11 @@ def gen_md(results: List[BenchmarkResult], py_version: str, indent=" "):

if __name__ == "__main__":
benchmark_results = BenchmarkResultsStorage().load_results()
gen_md(benchmark_results, "3.6")
gen_md(benchmark_results, "3.7")
gen_md(benchmark_results, "3.8")
gen_md(benchmark_results, "3.9")
gen_md(benchmark_results, "3.10")
gen_md(benchmark_results, "3.11")
gen_md(benchmark_results, "3.12")
gen_md(benchmark_results, "3.13")
1 change: 0 additions & 1 deletion ci-requirements/requirements3.10.in
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ mkdocs-material
pygments
pymdown-extensions

pydantic
tabulate
33 changes: 15 additions & 18 deletions ci-requirements/requirements3.10.out
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
annotated-types==0.7.0
astroid==3.2.3
Babel==2.15.0
black==24.4.2
astroid==3.2.4
babel==2.16.0
black==24.8.0
build==1.2.1
certifi==2024.7.4
charset-normalizer==3.3.2
click==8.1.7
colorama==0.4.6
coverage==7.6.0
coverage==7.6.1
dill==0.3.8
exceptiongroup==1.2.2
ghp-import==2.1.0
idna==3.7
iniconfig==2.0.0
isort==5.13.2
Jinja2==3.1.4
Markdown==3.6
Markdown==3.7
markdown-include==0.8.1
MarkupSafe==2.1.5
mccabe==0.7.0
Expand All @@ -24,35 +23,33 @@ mergedeep==1.3.4
mkdocs==1.6.0
mkdocs-exclude==1.0.2
mkdocs-get-deps==0.2.0
mkdocs-material==9.5.29
mkdocs-material==9.5.32
mkdocs-material-extensions==1.3.1
mypy==1.10.1
mypy==1.11.1
mypy-extensions==1.0.0
packaging==24.1
paginate==0.5.6
pathspec==0.12.1
platformdirs==4.2.2
pluggy==1.5.0
py-cpuinfo==9.0.0
pydantic==2.8.2
pydantic_core==2.20.1
Pygments==2.18.0
pylint==3.2.5
pymdown-extensions==10.8.1
pylint==3.2.6
pymdown-extensions==10.9
pyproject_hooks==1.1.0
pytest==8.2.2
pytest==8.3.2
pytest-benchmark==4.0.0
pytest-cov==5.0.0
python-dateutil==2.9.0.post0
PyYAML==6.0.1
PyYAML==6.0.2
pyyaml_env_tag==0.1
regex==2024.5.15
regex==2024.7.24
requests==2.32.3
ruff==0.5.2
ruff==0.6.1
six==1.16.0
tabulate==0.9.0
tomli==2.0.1
tomlkit==0.13.0
tomlkit==0.13.2
typing_extensions==4.12.2
urllib3==2.2.2
watchdog==4.0.1
watchdog==4.0.2
4 changes: 4 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.13.0 (2024-08-22)

- sped up aggregations by CSE (common subexpression elimination)

## 1.12.2 (2024-08-22)

- fixed `c.this.datetime_parse` with default when uncovered data remains
Expand Down
20 changes: 14 additions & 6 deletions docs/benefits.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,30 +47,38 @@ other parts of your code. The below table provides the speed-up of
convtools-based solutions over the naive ones.


/// tab | py3.12
/// tab | 3.13
{! performance-md/perf-3.13.md !}
///

/// tab | 3.12
{! performance-md/perf-3.12.md !}
///

/// tab | py3.11
/// tab | 3.11
{! performance-md/perf-3.11.md !}
///

/// tab | py3.10
/// tab | 3.10
{! performance-md/perf-3.10.md !}
///

/// tab | py3.9
/// tab | 3.9
{! performance-md/perf-3.9.md !}
///

/// tab | py3.8
/// tab | 3.8
{! performance-md/perf-3.8.md !}
///

/// tab | py3.7
/// tab | 3.7
{! performance-md/perf-3.7.md !}
///

/// tab | 3.6
{! performance-md/perf-3.6.md !}
///


In cases where there are multiple speed test results, the worst is
taken. See [benchmarks on Github](https://github.com/westandskif/convtools/blob/master/run_benchmarks.py) for source code.
8 changes: 4 additions & 4 deletions docs/examples-md/api__and_then.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ assert converter(range(3)) == [10, 1, 12]

/// tab | debug stdout
```python
def converter(data_):
def _converter(data_):
try:
return [(i and int(i)) for i in data_]
return [(_i and int(_i)) for _i in data_]
except __exceptions_to_dump_sources:
__convtools__code_storage.dump_sources()
raise

def converter(data_):
def _converter(data_):
try:
return [(((i + 10) if (i != 1) else i)) for i in data_]
return [(((_i + 10) if (_i != 1) else _i)) for _i in data_]
except __exceptions_to_dump_sources:
__convtools__code_storage.dump_sources()
raise
Expand Down
Loading

0 comments on commit 21b2821

Please sign in to comment.