Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Replace validation strict parameter with options object #14

Merged
merged 34 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
78b9750
feat: Validation options can be chosen by the user
Sarrabah Jul 1, 2024
519a79d
refactor: Use dataclass for more lisibility
Sarrabah Jul 2, 2024
f7d418f
refactor: Reorganisation
Sarrabah Jul 2, 2024
00ea089
refactor: Renaming a variable
Sarrabah Jul 2, 2024
9e4b768
refactor: Rename the function
Sarrabah Jul 2, 2024
ec949bd
refactor: Delete the linent enum parameter
Sarrabah Jul 2, 2024
0fb6c0b
fix: Ensure that each instance gets its own separate extra valid values
Sarrabah Jul 2, 2024
3184963
chore: Update pyright dependency
Sarrabah Jul 2, 2024
e851273
fix: Change the order of comparaison instructions to avoid the space …
Sarrabah Jul 3, 2024
ef3810c
refactor: Delete the lenient region set
Sarrabah Jul 3, 2024
2653cb6
fix: Add options parameter to is valid function
Sarrabah Jul 3, 2024
b1ad09e
refactor: Add options parameter in tests
Sarrabah Jul 3, 2024
303f325
docs: Add a detailed docstring to the Options class for better explan…
Sarrabah Jul 4, 2024
eb2bf70
fix: Import Options class
Sarrabah Jul 8, 2024
0178eb6
feat: Handle accented capital letters
Sarrabah Jul 9, 2024
90fec05
refactor: Reorganize options
Sarrabah Jul 9, 2024
3f7a793
refactor: Use a separated private function
Sarrabah Jul 9, 2024
6b1f643
refactor: Renaming
Sarrabah Jul 9, 2024
97137e6
refactor: Using regex to replace extra whitespaces by one space
Sarrabah Jul 9, 2024
64a189a
docs: Reformulate the docstring
Sarrabah Jul 9, 2024
f76ce50
Update src/frformat/options.py and remove "necessary" form the docstring
Sarrabah Jul 9, 2024
5d0ea60
refactor: Renaming
Sarrabah Jul 9, 2024
4275372
refactor: Renaming
Sarrabah Jul 9, 2024
3c0d944
refactor: Ignore is_valid method as a class method
Sarrabah Jul 11, 2024
e1dfb09
fix: Delete the property class method to is_valid method
Sarrabah Jul 11, 2024
137f55d
fix: Ignore the options parameter
Sarrabah Jul 11, 2024
0c704ee
refactor: Using private attributes
Sarrabah Jul 15, 2024
ac1eb51
refactor: Space symbol include also the non-breaking-space
Sarrabah Jul 16, 2024
d8c7728
refactor: Ignore stocking the attribute inside the constructor
Sarrabah Jul 16, 2024
4611590
refactor: Transform format classmethod to a simple method
Sarrabah Jul 16, 2024
b28f4da
refactor: Transform a class method to an instance method
Sarrabah Jul 16, 2024
ae8b569
refactor: Delete the constructor
Sarrabah Jul 16, 2024
6d97f3f
fix: Delete an unused import
Sarrabah Jul 16, 2024
82ed5f5
refactor: Renaming
Sarrabah Jul 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ shapely = "^2.0.2"
[tool.poetry.group.linting.dependencies]
black = "^23.12.1"
isort = "^5.13.2"
pyright = "^1.1.347"
pyright = "^1.1.369"
flake8 = "^7.0.0"


Expand Down
2 changes: 1 addition & 1 deletion src/frformat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# flake8: noqa
from .custom_format import * # isort:skip

from .code_rna import CodeRNA as CodeRNA
from .geo.canton import Canton as Canton
from .geo.code_commune_insee import CodeCommuneInsee as CodeCommuneInsee
Expand All @@ -20,5 +19,6 @@
from .geo.pays import Pays as Pays
from .geo.region import Region as Region
from .nomenclature_acte_format import NomenclatureActe as NomenclatureActe
from .options import Options
from .siren import Siren as Siren
from .siret import Siret as Siret
32 changes: 20 additions & 12 deletions src/frformat/common.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
import re

from frformat.options import Options

NBSP = "\u00A0" # Non-breaking space
NNBSP = "\u202F" # Narrow non-breaking space


def normalize_text(val: str) -> str:
val = val.lower()
val = val.replace("-", " ")
val = val.replace("_", " ")
val = val.replace("'", " ")
val = val.replace(",", " ")
val = val.replace(" ", " ")
val = re.sub(r"[èéêë]", "e", val)
val = re.sub(r"[àáâãäå]", "a", val)
val = re.sub(r"[ìíîï]", "i", val)
val = re.sub(r"[òóôõö]", "o", val)
val = re.sub(r"[ùúûü]", "u", val)
def normalize_value(val: str, options: Options) -> str:
if options.ignore_case is True:
val = val.lower()

if options.ignore_accents is True:
val = re.sub(r"[èéêë]", "e", val)
val = re.sub(r"[àáâãäå]", "a", val)
val = re.sub(r"[ìíîï]", "i", val)
val = re.sub(r"[òóôõö]", "o", val)
val = re.sub(r"[ùúûü]", "u", val)
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved

if options.ignore_non_alphanumeric is True:
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved
val = re.sub(r"[^a-zA-Z0-9]", " ", val)

if options.ignore_extra_white_space is True:
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved
val = val.strip()
val = val.replace(" ", " ")
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved

return val
31 changes: 10 additions & 21 deletions src/frformat/enum_format.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,24 @@
from typing import Optional, Set, Type
from typing import Set, Type

from frformat import CustomStrFormat, Metadata
from frformat.common import normalize_text
from frformat.common import normalize_value
from frformat.options import Options


def new(
class_name: str,
name: str,
description: str,
strict_enum: Set[str],
lenient_enum: Optional[Set[str]] = None,
) -> Type:
if not lenient_enum:
lenient_enum = {normalize_text(e) for e in strict_enum}

def new(class_name: str, name: str, description: str, enum: Set[str]) -> Type:
class EnumFormat(CustomStrFormat):
"""Checks if a value is in a given list

May check with or without string normalization with the "strict"
validation.
"""
May check with string normalization with the "options" of validation."""
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved

metadata = Metadata(name, description)

@classmethod
def is_valid(cls, value: str, strict: bool = True) -> bool:
if not strict:
norm_value = normalize_text(value)
return norm_value in lenient_enum
else:
return value in strict_enum
def is_valid(cls, value: str, options: Options = Options()) -> bool:
normalized_enum = {normalize_value(e, options) for e in enum}
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved
normalized_value = normalize_value(value, options)

return normalized_value in normalized_enum

EnumFormat.__name__ = class_name
EnumFormat.__qualname__ = class_name
Expand Down
3 changes: 2 additions & 1 deletion src/frformat/geo/code_fantoir.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from frformat import CustomStrFormat, Metadata
from frformat.formatter import Formatter
from frformat.geo.code_fantoir_set import PARTIAL_CODE_FANTOIR_SET
from frformat.options import Options

name = "Code fantoir"
description = "Vérifie les codes fantoirs valides"
Expand All @@ -20,7 +21,7 @@ class CodeFantoir(CustomStrFormat):
formatter = CodeFantoirFormatter()

@classmethod
def is_valid(cls, value: str) -> bool:
def is_valid(cls, value: str, options: Options = Options()) -> bool:
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved
if len(value) != 5:
return False

Expand Down
4 changes: 2 additions & 2 deletions src/frformat/geo/region.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from frformat import enum_format
from frformat.geo.region_set import LENIENT_REGION_SET, REGION_SET
from frformat.geo.region_set import REGION_SET

name = "Nom de région"
description = (
"Vérifie les régions françaises valides (code officiel géographique 2020) "
)

Region = enum_format.new("Region", name, description, REGION_SET, LENIENT_REGION_SET)
Region = enum_format.new("Region", name, description, REGION_SET)
49 changes: 0 additions & 49 deletions src/frformat/geo/region_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,52 +18,3 @@
"Provence-Alpes-Côte d'Azur",
"Corse",
}

LENIENT_REGION_SET = {
"alsace",
"aquitaine",
"ara",
"aura",
"auvergne",
"auvergne et rhone alpes",
"auvergne rhone alpes",
"basse normandie",
"bfc",
"bourgogne",
"bourgogne et franche comte",
"bourgogne franche comte",
"bretagne",
"centre",
"centre val de loire",
"champagne ardenne",
"corse",
"franche comte",
"ge",
"nouvelle aquitaine",
"grand est",
"guadeloupe",
"guyane",
"haute normandie",
"hauts de france",
"hdf",
"ile de france",
"languedoc roussillon",
"la reunion",
"la reunion",
"limousin",
"lorraine",
"martinique",
"mayotte",
"midi pyrenees",
"nord pas de calais",
"normandie",
"npdc",
"occitanie",
"paca",
"pays de la loire",
"picardie",
"poitou charentes",
"provence alpes cote d azur",
"reunion",
"rhone alpes",
}
37 changes: 37 additions & 0 deletions src/frformat/options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from dataclasses import dataclass, field


@dataclass
class Options:
"""
The class Options is used to represent a list of necessary options to validate a French format.
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved

Attributes:
ignore_case: Boolean
Indicates if case should be ignored.
When set to True, all characters in the string will be converted to lowercase.

ignore_accents: Boolean
Indicates if accents should be ignored.
When set to True, characters with accents will be replaced with their non-accented counterparts.
Example: 'à' will be replaced by 'a'.

ignore_non_alphanumeric: Boolean
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved
Indicates if non-alphanumeric characters should be ignored.
When set to True, punctuation marks and symbols will be replaced by a space.

ignore_extra_white_space: Boolean
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved
Indicates if extra white space should be ignored.
When set to True, multiple consecutive spaces will be treated as a single space, and leading or trailing spaces will be removed.

extra_valid_values: set of string
A collection of additional valid values.
This set includes any extra values that should be considered valid during the validation process, beyond the original set of valid values.
This allows for customized validation rules to accommodate special cases or exceptions.
"""

ignore_case: bool = False
ignore_accents: bool = False
ignore_non_alphanumeric: bool = False
ignore_extra_white_space: bool = False
extra_valid_values: set = field(default_factory=set)
14 changes: 4 additions & 10 deletions src/tests/test_geo_fr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Region,
)
from frformat.common import NBSP, NNBSP
from frformat.options import Options
from tests.testing import (
strict_lenient_test_helper_factory,
validation_test_helper_factory,
Expand All @@ -27,8 +28,8 @@ def test_code_fantoir():
fantoir_valid = ["ZB03A"]
fantoir_invalid = ["1000"]

_test_fantoir(fantoir_valid, True)
_test_fantoir(fantoir_invalid, False)
_test_fantoir(fantoir_valid, True, Options())
_test_fantoir(fantoir_invalid, False, Options())


def test_code_commune_insee():
Expand Down Expand Up @@ -134,14 +135,7 @@ def test_region():
_test_region = strict_lenient_test_helper_factory(Region)

region_strict = ["Centre-Val de Loire", "La Réunion", "Corse"]
region_lenient = [
"Centre Val de Loire",
"La Reunion",
"corse",
"bfc",
"BFC",
"aura",
]
region_lenient = ["Centre Val de Loire", "La Reunion", "corse"]
region_invalid = ["Beleriand", "Canyon Cosmo"]

_test_region(region_strict, region_lenient, region_invalid)
Expand Down
34 changes: 19 additions & 15 deletions src/tests/testing.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
from typing import List

from frformat.options import Options

def validation_test_helper_factory(Class):
def test_helper(test_cases: List[str], expectValid: bool, **kwargs) -> None:
isStrict = kwargs["strict"] if "strict" in kwargs else None

if isStrict:
def validation_test_helper_factory(Class):
def test_helper(test_cases: List[str], expectValid: bool, options: Options) -> None:
if options == Options():
adjective = "strictly"
elif isStrict is False:
adjective = "leniently"
else:
adjective = ""
adjective = "leniently"

validKeywoard = "valid" if expectValid else "invalid"

for tc in test_cases:
assert (
Class.is_valid(tc, **kwargs) == expectValid
Class.is_valid(tc, options) == expectValid
), f"Check that { Class.__name__ } { tc } is { adjective } { validKeywoard }"

return test_helper
Expand All @@ -33,12 +31,18 @@ def test_helper(
lenient_test_cases: List[str],
Sarrabah marked this conversation as resolved.
Show resolved Hide resolved
invalid_test_cases: List[str],
) -> None:
_test_class(strict_test_cases, expectValid=True, strict=True)
_test_class(lenient_test_cases, expectValid=False, strict=True)
_test_class(invalid_test_cases, expectValid=False, strict=True)

_test_class(strict_test_cases, expectValid=True, strict=False)
_test_class(lenient_test_cases, expectValid=True, strict=False)
_test_class(invalid_test_cases, expectValid=False, strict=False)
optionsTrue = Options(
ignore_case=True,
ignore_non_alphanumeric=True,
ignore_extra_white_space=True,
ignore_accents=True,
)
_test_class(strict_test_cases, expectValid=True, options=Options())
_test_class(lenient_test_cases, expectValid=False, options=Options())
_test_class(invalid_test_cases, expectValid=False, options=Options())

_test_class(strict_test_cases, expectValid=True, options=optionsTrue)
_test_class(lenient_test_cases, expectValid=True, options=optionsTrue)
_test_class(invalid_test_cases, expectValid=False, options=optionsTrue)

return test_helper