From 622491a3e31f42ca200a405303f0e1160e87bed3 Mon Sep 17 00:00:00 2001 From: pavel Date: Sun, 10 Nov 2024 16:31:28 +0300 Subject: [PATCH 1/4] Add "Why not Pydantic?" article --- docs/benchmarks.rst | 2 + docs/examples/why_not_pydantic/__init__.py | 0 .../why_not_pydantic/aliasing_mess_extra.py | 12 + .../why_not_pydantic/implicit_conversions.py | 12 + .../implicit_conversions_workaround.py | 23 ++ .../instantiating_penalty_benchmark.py | 15 + ...nstantiating_penalty_benchmark_datetime.py | 26 ++ ...ating_penalty_benchmark_model_construct.py | 10 + .../instantiating_penalty_models.py | 20 ++ .../migration_pydantic_model.py | 19 ++ .../why_not_pydantic/mistakes_silencing.py | 14 + .../underdone_class_mapping_default.py | 21 ++ .../underdone_class_mapping_intro.py | 19 ++ docs/index.rst | 1 + docs/loading-and-dumping/tutorial.rst | 2 + docs/why-not-pydantic.rst | 308 ++++++++++++++++++ tests/test_doc.py | 4 + tests/tests_helpers/tests_helpers/misc.py | 14 + 18 files changed, 522 insertions(+) create mode 100644 docs/examples/why_not_pydantic/__init__.py create mode 100644 docs/examples/why_not_pydantic/aliasing_mess_extra.py create mode 100644 docs/examples/why_not_pydantic/implicit_conversions.py create mode 100644 docs/examples/why_not_pydantic/implicit_conversions_workaround.py create mode 100644 docs/examples/why_not_pydantic/instantiating_penalty_benchmark.py create mode 100644 docs/examples/why_not_pydantic/instantiating_penalty_benchmark_datetime.py create mode 100644 docs/examples/why_not_pydantic/instantiating_penalty_benchmark_model_construct.py create mode 100644 docs/examples/why_not_pydantic/instantiating_penalty_models.py create mode 100644 docs/examples/why_not_pydantic/migration_pydantic_model.py create mode 100644 docs/examples/why_not_pydantic/mistakes_silencing.py create mode 100644 docs/examples/why_not_pydantic/underdone_class_mapping_default.py create mode 100644 docs/examples/why_not_pydantic/underdone_class_mapping_intro.py create mode 100644 docs/why-not-pydantic.rst diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst index c1605e79..4993ca5a 100644 --- a/docs/benchmarks.rst +++ b/docs/benchmarks.rst @@ -1,3 +1,5 @@ +.. _benchmarks: + ================== Benchmarks ================== diff --git a/docs/examples/why_not_pydantic/__init__.py b/docs/examples/why_not_pydantic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/docs/examples/why_not_pydantic/aliasing_mess_extra.py b/docs/examples/why_not_pydantic/aliasing_mess_extra.py new file mode 100644 index 00000000..7fa451bb --- /dev/null +++ b/docs/examples/why_not_pydantic/aliasing_mess_extra.py @@ -0,0 +1,12 @@ +from pydantic import BaseModel, ConfigDict, Field + + +class User(BaseModel): + model_config = ConfigDict(populate_by_name=True, extra="allow") + + name: str = Field(alias="full_name") + age: int + + +data = {"name": "name_value", "age": 20} +assert User.model_validate(data).model_extra == {} diff --git a/docs/examples/why_not_pydantic/implicit_conversions.py b/docs/examples/why_not_pydantic/implicit_conversions.py new file mode 100644 index 00000000..0d4e7c91 --- /dev/null +++ b/docs/examples/why_not_pydantic/implicit_conversions.py @@ -0,0 +1,12 @@ +# mypy: disable-error-code="arg-type" +from decimal import Decimal + +from pydantic import BaseModel + + +class Product(BaseModel): + id: int + amount: Decimal + + +assert Product(id=1, amount=14.6) == Product(id=1, amount=Decimal("14.6")) diff --git a/docs/examples/why_not_pydantic/implicit_conversions_workaround.py b/docs/examples/why_not_pydantic/implicit_conversions_workaround.py new file mode 100644 index 00000000..a1f42969 --- /dev/null +++ b/docs/examples/why_not_pydantic/implicit_conversions_workaround.py @@ -0,0 +1,23 @@ +# mypy: disable-error-code="arg-type" +from decimal import Decimal + +from pydantic import BaseModel, ConfigDict, ValidationError + + +class Product(BaseModel): + id: int + amount: Decimal + + model_config = ConfigDict(strict=True) + + +try: + Product(id=1, amount=14.6) +except ValidationError: + pass + +assert ( + Product.model_validate({"id": 1, "amount": 14.6}, strict=False) + == + Product(id=1, amount=Decimal("14.6")) +) diff --git a/docs/examples/why_not_pydantic/instantiating_penalty_benchmark.py b/docs/examples/why_not_pydantic/instantiating_penalty_benchmark.py new file mode 100644 index 00000000..50046989 --- /dev/null +++ b/docs/examples/why_not_pydantic/instantiating_penalty_benchmark.py @@ -0,0 +1,15 @@ +# ruff: noqa: T201 +from datetime import datetime +from timeit import timeit + +from .instantiating_penalty_models import UserDataclass, UserPydantic + +stmt = """ +User( + id=123, + signup_ts=datetime(year=2019, month=6, day=1, hour=12, minute=22), + tastes={'wine': 9, 'cheese': 7, 'cabbage': '1'}, +) +""" +print("pydantic ", timeit(stmt, globals={"User": UserPydantic, "datetime": datetime})) +print("dataclass", timeit(stmt, globals={"User": UserDataclass, "datetime": datetime})) diff --git a/docs/examples/why_not_pydantic/instantiating_penalty_benchmark_datetime.py b/docs/examples/why_not_pydantic/instantiating_penalty_benchmark_datetime.py new file mode 100644 index 00000000..b4e8938d --- /dev/null +++ b/docs/examples/why_not_pydantic/instantiating_penalty_benchmark_datetime.py @@ -0,0 +1,26 @@ +# ruff: noqa: T201 +from datetime import datetime +from timeit import timeit + +from .instantiating_penalty_models import UserDataclass, UserPydantic + +stmt = """ +User( + id=123, + signup_ts=dt, + tastes={'wine': 9, 'cheese': 7, 'cabbage': '1'}, +) +""" +dt = datetime(year=2019, month=6, day=1, hour=12, minute=22) +print( + "pydantic ", + timeit(stmt, globals={"User": UserPydantic, "dt": dt}), +) +print( + "pydantic (model_construct)", + timeit(stmt, globals={"User": UserPydantic.model_construct, "dt": dt}), +) +print( + "dataclass ", + timeit(stmt, globals={"User": UserDataclass, "dt": dt}), +) diff --git a/docs/examples/why_not_pydantic/instantiating_penalty_benchmark_model_construct.py b/docs/examples/why_not_pydantic/instantiating_penalty_benchmark_model_construct.py new file mode 100644 index 00000000..9e39db42 --- /dev/null +++ b/docs/examples/why_not_pydantic/instantiating_penalty_benchmark_model_construct.py @@ -0,0 +1,10 @@ +# ruff: noqa: T201 +from datetime import datetime +from timeit import timeit + +from .instantiating_penalty_benchmark import UserPydantic, stmt + +print( + "pydantic (model_construct)", + timeit(stmt, globals={"User": UserPydantic.model_construct, "datetime": datetime}), +) diff --git a/docs/examples/why_not_pydantic/instantiating_penalty_models.py b/docs/examples/why_not_pydantic/instantiating_penalty_models.py new file mode 100644 index 00000000..35caf57e --- /dev/null +++ b/docs/examples/why_not_pydantic/instantiating_penalty_models.py @@ -0,0 +1,20 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, PositiveInt + + +class UserPydantic(BaseModel): + id: int + name: str = "John Doe" + signup_ts: Optional[datetime] + tastes: dict[str, PositiveInt] + + +@dataclass(kw_only=True) +class UserDataclass: + id: int + name: str = "John Doe" + signup_ts: Optional[datetime] + tastes: dict[str, PositiveInt] diff --git a/docs/examples/why_not_pydantic/migration_pydantic_model.py b/docs/examples/why_not_pydantic/migration_pydantic_model.py new file mode 100644 index 00000000..25472f8e --- /dev/null +++ b/docs/examples/why_not_pydantic/migration_pydantic_model.py @@ -0,0 +1,19 @@ +from pydantic import BaseModel + +from adaptix import Retort + + +class Book(BaseModel): + title: str + price: int + + +data = { + "title": "Fahrenheit 451", + "price": 100, +} + +retort = Retort() +book = retort.load(data, Book) +assert book == Book(title="Fahrenheit 451", price=100) +assert retort.dump(book) == data diff --git a/docs/examples/why_not_pydantic/mistakes_silencing.py b/docs/examples/why_not_pydantic/mistakes_silencing.py new file mode 100644 index 00000000..1e048ad4 --- /dev/null +++ b/docs/examples/why_not_pydantic/mistakes_silencing.py @@ -0,0 +1,14 @@ +# mypy: disable-error-code="call-arg" +from pydantic import BaseModel + + +class SomeModel(BaseModel): + a: int + b: int + + +SomeModel( + a=1, + b=2, + c=3, # unknown filed! +) diff --git a/docs/examples/why_not_pydantic/underdone_class_mapping_default.py b/docs/examples/why_not_pydantic/underdone_class_mapping_default.py new file mode 100644 index 00000000..2b6a6db5 --- /dev/null +++ b/docs/examples/why_not_pydantic/underdone_class_mapping_default.py @@ -0,0 +1,21 @@ +# mypy: disable-error-code="call-arg" +from dataclasses import dataclass +from typing import Optional + +from pydantic import BaseModel + + +@dataclass +class Book: + title: str + author: str + + +class BookDTO(BaseModel): + title: str + writer: Optional[str] = None # alias is forgotten! + + +book = Book(title="Fahrenheit 451", author="Ray Bradbury") +book_dto = BookDTO.model_validate(book, from_attributes=True) +assert book_dto == BookDTO(title="Fahrenheit 451", author=None) diff --git a/docs/examples/why_not_pydantic/underdone_class_mapping_intro.py b/docs/examples/why_not_pydantic/underdone_class_mapping_intro.py new file mode 100644 index 00000000..03f01eab --- /dev/null +++ b/docs/examples/why_not_pydantic/underdone_class_mapping_intro.py @@ -0,0 +1,19 @@ +from dataclasses import dataclass + +from pydantic import BaseModel + + +@dataclass +class Person: + name: str + age: float + + +class PersonDTO(BaseModel): + name: str + age: float + + +person = Person(name="Anna", age=20) +person_dto = PersonDTO.model_validate(person, from_attributes=True) +assert person_dto == PersonDTO(name="Anna", age=20) diff --git a/docs/index.rst b/docs/index.rst index 087b372e..5f6255f5 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,6 +7,7 @@ Adaptix overview benchmarks + why-not-pydantic .. toctree:: :maxdepth: 2 diff --git a/docs/loading-and-dumping/tutorial.rst b/docs/loading-and-dumping/tutorial.rst index d70e2b33..83d84926 100644 --- a/docs/loading-and-dumping/tutorial.rst +++ b/docs/loading-and-dumping/tutorial.rst @@ -160,6 +160,8 @@ Some facts about ``P``: situated at field ``name``, placed at model ``Foo`` +.. _retort_extension_and_combination: + Retort extension and combination ------------------------------------- diff --git a/docs/why-not-pydantic.rst b/docs/why-not-pydantic.rst new file mode 100644 index 00000000..1f728f1f --- /dev/null +++ b/docs/why-not-pydantic.rst @@ -0,0 +1,308 @@ +===================== +Why not Pydantic? +===================== + + +Introduction +==================== + +.. Надо переписать вступление!!!!!!!!! + +Pydantic is one of the most popular libraries for data serialization and deserialization. +However, the principles it’s built on often hinder ease of use. + +In this article, we’ll explore how using Adaptix instead of Pydantic can help tackle common tasks more efficiently. + +.. note:: + + This article is updated for ``pydantic==2.9.2``, with code snippets run on ``CPython`` version ``3.12``. + Some things may have changed since then, but probably not much. + + +Coupling instance creation and data parsing +================================================ + +Creating any model instance in Pydantic triggers data parsing. +On one hand, this makes instance creation within a program significantly more resource-intensive, +while on the other, it can lead to unexpected and undesirable behavior during instance creation. +Let’s examine this in detail. + + +Instantiating penalty +------------------------- + +Let’s take a model from the Pydantic tutorial: + +.. literalinclude:: /examples/why_not_pydantic/instantiating_penalty_models.py + + +And run a simple benchmark for creating instances: + +.. literalinclude:: /examples/why_not_pydantic/instantiating_penalty_benchmark.py + :lines: 2- + +Here are the results: + +.. code-block:: + + pydantic 2.3817247649421915 + dataclass 0.9756000880151987 + +Creating a Pydantic model instance is nearly 2.4 times slower than creating a similar dataclass instance. +This is the cost you’ll pay each time you want to create an object in your business logic. + +But Pydantic has a method, ``.model_construct()``, for creating instances without validation! +And yet, it’s even slower: + +.. literalinclude:: /examples/why_not_pydantic/instantiating_penalty_benchmark_model_construct.py + :lines: 2- + +.. code-block:: + + pydantic (model_construct) 2.8749908979516476 + + +.. dropdown:: Some notes on the benchmarks + + In fact, a significant portion of the time in the benchmark above is spent creating a ``datetime`` object. + If we remove this object creation, the situation becomes even more dramatic: + + .. literalinclude:: /examples/why_not_pydantic/instantiating_penalty_benchmark_datetime.py + :lines: 2- + + .. code-block:: + + pydantic 1.8139039139496163 + pydantic (model_construct) 2.155639562988654 + dataclass 0.4947519419947639 + + Now Pydantic is 3.7 times slower than a standard dataclass, + and 4.3 times slower if you attempt to disable validation. + + Pydantic's slowdown factor will vary depending on the complexity of the validation and + the time required to create other classes. + + +Fused validation +--------------------- + +Validating invariants within the model is reasonable, +but validation should be separated into business logic and representation layers. + +For example, type checking prevents most type-related errors, +and having basic tests eliminates them altogether. +Do you really need type checks each time you create a model instance? +What if the model includes large lists? + +Let’s look at how ``attrs`` approaches this issue. +Models of ``attrs`` can’t transform themselves into JSON or load themselves from JSON. +External tools (such as ``adaptix`` or ``cattrs``) handle this functionality. + +Within the model, you can declare validators to enforce business invariants, +while Adaptix can perform additional checks when loading data from an untrusted source. + +You can also use the ``__post_init__`` method in dataclasses for necessary validation. + +As a result, with Pydantic you can either constantly run checks that you don't need at all, +or skip any validation at all using ``.model_construct()`` +(which will most likely be even slower, as shown above). + + +Implicit conversions +-------------------------- + +The next issue lies in the fact that implicit type conversion logic, suitable for parsing, +is often inappropriate for creating an object via a constructor. + +For a parser, it’s entirely reasonable to perform implicit conversions +(such as `TolerantReader `__). +However, this behavior can lead to errors when applied within a program. + +For example, if you pass a ``float`` value to a field with the ``Decimal`` type, +Pydantic will implicitly convert it instead of raising an error. +This leads to the fact that the error of using floats for monetary calculations can be hidden, +potentially causing inaccuracies. + +.. literalinclude:: /examples/why_not_pydantic/implicit_conversions.py + :caption: Possible loss of accuracy + :lines: 2- + +There is a way to work around this issue. +To do so, you must enable strict mode and disable it each time model parsing occurs. + +.. literalinclude:: /examples/why_not_pydantic/implicit_conversions_workaround.py + :caption: Necessary workaround to avoid loss of accuracy + :lines: 2- + + +Aliasing mess +------------------------ + +The essence of aliases is that you have an external and an internal field name, +where the external name is unsuitable for use within the program. +However, the Pydantic combines different representations into ball of mud. + +By default, the constructor only accepts fields by their aliases (i.e., using the external names). +You can change this with the ``populate_by_name`` configuration option. +This option allows you to use the internal field names in the constructor, +yet the constructor will still accept the external representation. +Additionally, this option affects JSON parsing, enabling it to use field names alongside aliases. + +.. literalinclude:: /examples/why_not_pydantic/aliasing_mess_extra.py + :caption: Extra field is parsed as usual field + + +Mistakes silencing +------------------------ + +One of the biggest issues with Pydantic’s approach is that extra fields passed into the constructor are ignored. +As a result, such typos do not show up immediately, +but live in the program until they are found by tests or users. + +Static analyzers can reduce the number of such errors, +but this does not always work due to the dynamic nature of Python. + +You can forbid additional fields by setting ``extra='forbid'``, +though this will also affect the parser. + +.. literalinclude:: /examples/why_not_pydantic/mistakes_silencing.py + :caption: Extra field is ignored + :lines: 2- + + +Locking ecosystem +============================= + +Pydantic’s primary purpose is data serialization and deserialization. +Instead of using standard library models (``@dataclass``, ``NamedTuple``, ``TypedDict``), +Pydantic introduces a new model type, even though these tasks don’t necessitate a new model type. + +Pydantic models come with unique semantics, +requiring special support from development tools like type checkers and IDEs. +Most importantly, external libraries that don’t care about the serialization method still must add +support for Pydantic models, creating dependencies on these integrations. + +Pydantic does support standard library models, but this support is very limited. +For example, you can’t alter parsing or serialization logic in an existing class. + +You can avoid these issues by restricting Pydantic to the layer responsible for communication with outer world. +However, this requires duplicating classes and manually writing converters. +Pydantic offers a ``from_attributes=True`` mode, +which allows you to create model instances from other objects, +though it has significant limitations. + + +Underdone class mapping +=============================== + +Pydantic offers very weak support for transforming one model into another. +It behaves like a regular validation mode for an unknown source, +except instead of referencing dictionary keys, it accesses object attributes. + +.. literalinclude:: /examples/why_not_pydantic/underdone_class_mapping_intro.py + :caption: Model mapping in Pydantic + +This results in several issues: + +First, the ``from_attributes=True`` mode uses the same aliases as parsing. +You cannot configure transformations without affecting the logic for external interactions (like JSON parsing). + +Second, mapping does not account for type hints from the source class, leading to unnecessary type checks. +For example, if both classes contain fields with values of type ``list[str]`` with hundreds of elements, +Pydantic will check the type of each value. + +Third, you can't customize class mapping so that the conversion logic differs from parsing from an unknown source. +You are forced to either find workarounds or change interactions with the outside world. + +Fourth, there are no checks to ensure the mapping between the target class and the source is correctly defined. +Many such errors are caught in tests when the code fails with an error, +but some are only noticeable upon careful result comparison, +such as if a field in the target model has a default value. + +.. literalinclude:: /examples/why_not_pydantic/underdone_class_mapping_default.py + :caption: Skipped error + :lines: 2- + + +.. hint:: + + You can use Adaptix’s class conversion with Pydantic models, + eliminating all the problems listed above (except for the second point). + See :ref:`conversion tutorial ` and :ref:`supported-model-kinds` for details. + + +One presentation ought to be enough for anybody +==================================================== + +Pydantic tightly binds parsing rules to the model itself. +This creates major issues if you want to load or export the model differently based on use cases. + +For example, you might load a config from various formats. +While the structure of the config is generally similar, +it may differ in how certain types are loaded and in field naming conventions. + +Or consider having a common user model, +but needing to return a different field set for different clients. + +The only way to get around this problem is to try to use the ``context`` parameter +and write dispatch logic inside the validators. + + +Pydantic written in Rust, so Pydantic is fast? +=================================================== + +As benchmarks show, this is far from true. + +To be cautious, Pydantic’s speed is approximately equal to libraries written in Python and using code generation. + +Speaking more boldly, in some cases, Adaptix outperforms Pydantic by a factor of two +without losing in any benchmark, and PyPy usage can significantly speed up Adaptix. + +For more detail, see :ref:`benchmarks`. + + +Unmentioned Adaptix advantages +=================================== + +All the issues mentioned above highlight problems that don’t arise when using Adaptix. +However, there are aspects that cannot be counted as issues with Pydantic, +but they could highlight Adaptix in comparison. + +Firstly, Adaptix has a predicate system that allows granular customization of behavior. +You can adjust behavior for groups of classes or for a type only if it is within a specific class. +You can also configure logic separately for dictionary keys and values, even if they share the same type. +See :ref:`predicate-system` for details. + + +Secondly, Adaptix is designed to provide the maximum number of opportunities to follow the DRY (Don't Repeat Yourself) principle. + +* You can override behavior for entire groups of fields and types using the predicate system mentioned earlier. +* You can inherit rule groups, reducing code duplication. +* You can separate rules into several isolated layers, simplifying complex transformation cascades. + +For more information on these capabilities, see :ref:`retort_extension_and_combination`. + + +Migrating from Pydantic +======================================== + +Adaptix provides several tools for a gradual migration from Pydantic. + +First, Adaptix supports Pydantic models. +You can load and dump Pydantic models just as you would with ``@dataclass``, ``NamedTuple``, ``TypedDict``, and others. +This method ignores alias settings within the model itself, with all transformation logic defined in the retort. +Adaptix parses the input data and passes it to the model’s constructor. +See :ref:`supported-model-kinds` for details. + +.. literalinclude:: /examples/why_not_pydantic/migration_pydantic_model.py + :caption: Loading and dumping Pydantic model + + +Second, you can delegate handling of specific types directly to Pydantic with +:func:`.integrations.pydantic.native_pydantic`. +Using the built-in predicate system, +you can control behavior more granularly than Pydantic itself allows +(see :ref:`predicate-system` for details). + +.. literalinclude:: /examples/reference/integrations/native_pydantic.py + :caption: Delegating to Pydantic diff --git a/tests/test_doc.py b/tests/test_doc.py index 68e6fbec..acb155bd 100644 --- a/tests/test_doc.py +++ b/tests/test_doc.py @@ -4,9 +4,11 @@ from typing import Optional import pytest +from tests_helpers.misc import AndRequirement from adaptix._internal.feature_requirement import ( HAS_PY_311, + HAS_PY_312, HAS_SUPPORTED_PYDANTIC_PKG, HAS_SUPPORTED_SQLALCHEMY_PKG, Requirement, @@ -48,6 +50,8 @@ def pytest_generate_tests(metafunc): "loading-and-dumping/extended_usage/private_fields_skipping_pydantic": HAS_SUPPORTED_PYDANTIC_PKG, "reference/integrations/sqlalchemy_json/*": HAS_SUPPORTED_SQLALCHEMY_PKG, "conversion/tutorial/tldr": HAS_SUPPORTED_SQLALCHEMY_PKG, + "why_not_pydantic/instantiating_penalty*": AndRequirement(HAS_PY_312, HAS_SUPPORTED_PYDANTIC_PKG), + "why_not_pydantic/*": HAS_SUPPORTED_PYDANTIC_PKG, } diff --git a/tests/tests_helpers/tests_helpers/misc.py b/tests/tests_helpers/tests_helpers/misc.py index a72cb75a..6df53b46 100644 --- a/tests/tests_helpers/tests_helpers/misc.py +++ b/tests/tests_helpers/tests_helpers/misc.py @@ -7,6 +7,7 @@ from collections.abc import Generator, Reversible, Sequence from contextlib import contextmanager from dataclasses import dataclass, is_dataclass +from functools import reduce from pathlib import Path from types import ModuleType, SimpleNamespace from typing import Any, Callable, Optional, TypeVar, Union @@ -254,6 +255,19 @@ def fail_reason(self) -> str: return self._fail_reason +class AndRequirement(Requirement): + def __init__(self, *requirements): + self._requirements = requirements + super().__init__() + + def _evaluate(self) -> bool: + return reduce(lambda a, b: bool(a) and bool(b), self._requirements) + + @property + def fail_reason(self) -> str: + return " AND ".join(requirement.fail_reason for requirement in self._requirements) + + class StubRequest(Request): pass From 1d438e795a4591446c3c1227012c0cb6575d2686 Mon Sep 17 00:00:00 2001 From: pavel Date: Thu, 21 Nov 2024 22:58:42 +0300 Subject: [PATCH 2/4] Refactor "Why not Pydantic?" article --- docs/why-not-pydantic.rst | 126 ++++++++++++++++++++++++++++++++------ 1 file changed, 106 insertions(+), 20 deletions(-) diff --git a/docs/why-not-pydantic.rst b/docs/why-not-pydantic.rst index 1f728f1f..9c860a02 100644 --- a/docs/why-not-pydantic.rst +++ b/docs/why-not-pydantic.rst @@ -6,12 +6,10 @@ Why not Pydantic? Introduction ==================== -.. Надо переписать вступление!!!!!!!!! - Pydantic is one of the most popular libraries for data serialization and deserialization. -However, the principles it’s built on often hinder ease of use. +However, the principles it’s built on often prevent ease of use. -In this article, we’ll explore how using Adaptix instead of Pydantic can help tackle common tasks more efficiently. +In this article, we’ll explore how using Adaptix instead of Pydantic can help manage common tasks more efficiently. .. note:: @@ -19,8 +17,43 @@ In this article, we’ll explore how using Adaptix instead of Pydantic can help Some things may have changed since then, but probably not much. +The Main Thesis +===================== + +Pydantic works smoothly only when you violate the Single Responsibility Principle (SRP). +It wants to know about your domain layer, it wants to penetrate it. +Pydantic performs best when it's simultaneously a (de)serializer for incoming requests and a domain model. +But as soon as you separate your code into layers, +issues arise when transferring data between them. + +Let's imagine that you have three nested domain dataclasses. +You work fine for a while, and then you add new field to the deepest model and +discover that Pydantic doesn't include the UTC offset in the datetime string +(or it has any other unwanted behavior). + +Your possible options: + +* Completely duplicate the dataclass structure, including all nested models, + into an equivalent Pydantic model with the necessary config. +* Perform manual serialization. +* Start using Adaptix. + +And what you get: + +* It does not invade your domain layer, the serialization logic lives entirely in the presentation layer. +* It decreases code coupling. +* You can use adaptix to convert models passing between layers + + +Why I can not use Pydantic as domain model? Well, let’s talk about that. + + +Pydantic's pitfalls +========================== + + Coupling instance creation and data parsing -================================================ +------------------------------------------------- Creating any model instance in Pydantic triggers data parsing. On one hand, this makes instance creation within a program significantly more resource-intensive, @@ -29,7 +62,7 @@ Let’s examine this in detail. Instantiating penalty -------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^ Let’s take a model from the Pydantic tutorial: @@ -84,7 +117,7 @@ And yet, it’s even slower: Fused validation ---------------------- +^^^^^^^^^^^^^^^^^^^^^^^ Validating invariants within the model is reasonable, but validation should be separated into business logic and representation layers. @@ -109,7 +142,7 @@ or skip any validation at all using ``.model_construct()`` Implicit conversions --------------------------- +^^^^^^^^^^^^^^^^^^^^^^^ The next issue lies in the fact that implicit type conversion logic, suitable for parsing, is often inappropriate for creating an object via a constructor. @@ -120,7 +153,8 @@ However, this behavior can lead to errors when applied within a program. For example, if you pass a ``float`` value to a field with the ``Decimal`` type, Pydantic will implicitly convert it instead of raising an error. -This leads to the fact that the error of using floats for monetary calculations can be hidden, +This leads to the possibility +that the error of using floats for monetary calculations can be hidden, potentially causing inaccuracies. .. literalinclude:: /examples/why_not_pydantic/implicit_conversions.py @@ -136,7 +170,7 @@ To do so, you must enable strict mode and disable it each time model parsing occ Aliasing mess ------------------------- +^^^^^^^^^^^^^^^^^^^^^^^ The essence of aliases is that you have an external and an internal field name, where the external name is unsuitable for use within the program. @@ -153,7 +187,7 @@ Additionally, this option affects JSON parsing, enabling it to use field names a Mistakes silencing ------------------------- +^^^^^^^^^^^^^^^^^^^^^^^ One of the biggest issues with Pydantic’s approach is that extra fields passed into the constructor are ignored. As a result, such typos do not show up immediately, @@ -171,7 +205,7 @@ though this will also affect the parser. Locking ecosystem -============================= +------------------------------ Pydantic’s primary purpose is data serialization and deserialization. Instead of using standard library models (``@dataclass``, ``NamedTuple``, ``TypedDict``), @@ -185,7 +219,7 @@ support for Pydantic models, creating dependencies on these integrations. Pydantic does support standard library models, but this support is very limited. For example, you can’t alter parsing or serialization logic in an existing class. -You can avoid these issues by restricting Pydantic to the layer responsible for communication with outer world. +You can avoid these issues by restricting Pydantic to the layer responsible for communication with the outer world. However, this requires duplicating classes and manually writing converters. Pydantic offers a ``from_attributes=True`` mode, which allows you to create model instances from other objects, @@ -193,7 +227,7 @@ though it has significant limitations. Underdone class mapping -=============================== +------------------------------ Pydantic offers very weak support for transforming one model into another. It behaves like a regular validation mode for an unknown source, @@ -232,7 +266,7 @@ such as if a field in the target model has a default value. One presentation ought to be enough for anybody -==================================================== +---------------------------------------------------- Pydantic tightly binds parsing rules to the model itself. This creates major issues if you want to load or export the model differently based on use cases. @@ -249,7 +283,7 @@ and write dispatch logic inside the validators. Pydantic written in Rust, so Pydantic is fast? -=================================================== +-------------------------------------------------- As benchmarks show, this is far from true. @@ -261,8 +295,47 @@ without losing in any benchmark, and PyPy usage can significantly speed up Adapt For more detail, see :ref:`benchmarks`. -Unmentioned Adaptix advantages -=================================== + +About Adaptix +========================== + + +The Philosophy +--------------------------------- + + +Adaptix does not offer a new special model that requires IDE and type checker support. +It works with any model you like +(``@dataclass``, ``attrs``, and even Pydantic, see full list at :ref:`supported-model-kinds`). + +Adaptix does not affect the model definition. +You create a special object that implements the loading and dumping of models. +This object is called a `Retort` (the name of a chemical device used to distill substances). + +For each presentation format, you create a new retort instance. +You can extend and combine instances to eliminate code duplication. + +So, you have: + * Models defined inside your business logic layer (these classes know nothing about serialization mechanism), + * Retorts know how to transform these classes into outer formats. + +Such separation allows you to keep your code clean and simple. +Also, you can create one retort instance to handle dozens of classes +following similar principles of outer representation. + +See :ref:`loading and dumping tutorial ` for details. + +But that's not all. Adaptix can generate object mappers. +Such converters are vital for layered applications +but they are very boring to write and error-prone to maintain. +For the same or similar models, you can produce a converter using one line of code. +This converter knows about two models but does not affect them. + +See :ref:`conversion tutorial ` for details. + + +Unmentioned advantages +--------------------------------- All the issues mentioned above highlight problems that don’t arise when using Adaptix. However, there are aspects that cannot be counted as issues with Pydantic, @@ -273,7 +346,6 @@ You can adjust behavior for groups of classes or for a type only if it is within You can also configure logic separately for dictionary keys and values, even if they share the same type. See :ref:`predicate-system` for details. - Secondly, Adaptix is designed to provide the maximum number of opportunities to follow the DRY (Don't Repeat Yourself) principle. * You can override behavior for entire groups of fields and types using the predicate system mentioned earlier. @@ -284,7 +356,7 @@ For more information on these capabilities, see :ref:`retort_extension_and_combi Migrating from Pydantic -======================================== +--------------------------------- Adaptix provides several tools for a gradual migration from Pydantic. @@ -306,3 +378,17 @@ you can control behavior more granularly than Pydantic itself allows .. literalinclude:: /examples/reference/integrations/native_pydantic.py :caption: Delegating to Pydantic + + +Conclusion +===================== + +While Pydantic has been a popular choice for data serialization and validation in Python, +it comes with notable drawbacks that can complicate software development, particularly in layered architectures. +Its tight coupling of validation, serialization, and domain modeling often violates the Single Responsibility Principle, +leading to issues such as unnecessary complexity, inefficiency, and loss of flexibility. + +Adaptix, by contrast, offers a more modular and developer-friendly approach. +By decoupling serialization logic from domain models, it allows for cleaner code, easier maintenance, +and more efficient operations. Whether it’s class mapping, custom validation, or handling diverse data formats, +Adaptix delivers robust solutions that avoid the pitfalls commonly encountered with Pydantic. From 9647acc49ffd5a60c45614fdc6ef2f8259620ab9 Mon Sep 17 00:00:00 2001 From: pavel Date: Sat, 23 Nov 2024 19:29:01 +0300 Subject: [PATCH 3/4] some text fixes --- docs/why-not-pydantic.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/why-not-pydantic.rst b/docs/why-not-pydantic.rst index 9c860a02..c23ba46c 100644 --- a/docs/why-not-pydantic.rst +++ b/docs/why-not-pydantic.rst @@ -27,7 +27,7 @@ But as soon as you separate your code into layers, issues arise when transferring data between them. Let's imagine that you have three nested domain dataclasses. -You work fine for a while, and then you add new field to the deepest model and +It works fine for a while, and then you add new field to the deepest model and discover that Pydantic doesn't include the UTC offset in the datetime string (or it has any other unwanted behavior). @@ -38,7 +38,7 @@ Your possible options: * Perform manual serialization. * Start using Adaptix. -And what you get: +And what you get after switching: * It does not invade your domain layer, the serialization logic lives entirely in the presentation layer. * It decreases code coupling. @@ -61,7 +61,7 @@ while on the other, it can lead to unexpected and undesirable behavior during in Let’s examine this in detail. -Instantiating penalty +Instantiation penalty ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Let’s take a model from the Pydantic tutorial: From 43f341f0f3e4022f08987a428ddf7c046473c0a3 Mon Sep 17 00:00:00 2001 From: pavel Date: Sat, 23 Nov 2024 22:25:45 +0300 Subject: [PATCH 4/4] some other text fixes --- docs/why-not-pydantic.rst | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/docs/why-not-pydantic.rst b/docs/why-not-pydantic.rst index c23ba46c..3d65836d 100644 --- a/docs/why-not-pydantic.rst +++ b/docs/why-not-pydantic.rst @@ -23,11 +23,10 @@ The Main Thesis Pydantic works smoothly only when you violate the Single Responsibility Principle (SRP). It wants to know about your domain layer, it wants to penetrate it. Pydantic performs best when it's simultaneously a (de)serializer for incoming requests and a domain model. -But as soon as you separate your code into layers, -issues arise when transferring data between them. +However, separating your code into layers can lead to challenges when transferring data between them. Let's imagine that you have three nested domain dataclasses. -It works fine for a while, and then you add new field to the deepest model and +It works fine for a while, and then you add a new field to the deepest model and discover that Pydantic doesn't include the UTC offset in the datetime string (or it has any other unwanted behavior). @@ -40,12 +39,11 @@ Your possible options: And what you get after switching: -* It does not invade your domain layer, the serialization logic lives entirely in the presentation layer. -* It decreases code coupling. -* You can use adaptix to convert models passing between layers +* Nothing invades your domain layer, the serialization logic lives entirely in the presentation layer. +* Decreased code coupling. +* You can use adaptix to convert models passing between layers. - -Why I can not use Pydantic as domain model? Well, let’s talk about that. +Why can't I just use Pydantic as the domain model? Well, let’s talk about that. Pydantic's pitfalls @@ -82,7 +80,7 @@ Here are the results: dataclass 0.9756000880151987 Creating a Pydantic model instance is nearly 2.4 times slower than creating a similar dataclass instance. -This is the cost you’ll pay each time you want to create an object in your business logic. +This performance overhead is the cost you’ll pay each time you create an object in your business logic. But Pydantic has a method, ``.model_construct()``, for creating instances without validation! And yet, it’s even slower: @@ -174,12 +172,12 @@ Aliasing mess The essence of aliases is that you have an external and an internal field name, where the external name is unsuitable for use within the program. -However, the Pydantic combines different representations into ball of mud. +However, the Pydantic combines different representations into a ball of mud. By default, the constructor only accepts fields by their aliases (i.e., using the external names). You can change this with the ``populate_by_name`` configuration option. -This option allows you to use the internal field names in the constructor, -yet the constructor will still accept the external representation. +This option allows you to use the internal field names in the constructor +while still accepting the external representation. Additionally, this option affects JSON parsing, enabling it to use field names alongside aliases. .. literalinclude:: /examples/why_not_pydantic/aliasing_mess_extra.py @@ -191,7 +189,7 @@ Mistakes silencing One of the biggest issues with Pydantic’s approach is that extra fields passed into the constructor are ignored. As a result, such typos do not show up immediately, -but live in the program until they are found by tests or users. +but remain in the program until discovered by tests or users. Static analyzers can reduce the number of such errors, but this does not always work due to the dynamic nature of Python. @@ -229,7 +227,7 @@ though it has significant limitations. Underdone class mapping ------------------------------ -Pydantic offers very weak support for transforming one model into another. +Pydantic offers limited support for transforming one model into another. It behaves like a regular validation mode for an unknown source, except instead of referencing dictionary keys, it accesses object attributes. @@ -269,7 +267,7 @@ One presentation ought to be enough for anybody ---------------------------------------------------- Pydantic tightly binds parsing rules to the model itself. -This creates major issues if you want to load or export the model differently based on use cases. +This creates major issues when loading or exporting the model differently based on use cases. For example, you might load a config from various formats. While the structure of the config is generally similar,