diff --git a/price_parser/_currencies.py b/price_parser/_currencies.py index a6563aa..56cc904 100644 --- a/price_parser/_currencies.py +++ b/price_parser/_currencies.py @@ -1734,3 +1734,19 @@ set(chain.from_iterable(c['sn2'] for c in CURRENCIES.values() if 'sn2' in c)) ) +CURRENCY_ALL_SYMBOLS: List[str] = CURRENCY_CODES + CURRENCY_SYMBOLS + CURRENCY_NATIONAL_SYMBOLS +CURRENCIES_DEC_DIGITS: Dict[str, int] = {} +for symbol in CURRENCY_ALL_SYMBOLS: + for currency_key in CURRENCIES.keys(): + if currency_key == symbol: + CURRENCIES_DEC_DIGITS[symbol] = CURRENCIES[currency_key]["d"] + break + if CURRENCIES[currency_key]["s"] == symbol: + CURRENCIES_DEC_DIGITS[symbol] = CURRENCIES[currency_key]["d"] + break + if CURRENCIES[currency_key]["sn"] == symbol: + CURRENCIES_DEC_DIGITS[symbol] = CURRENCIES[currency_key]["d"] + break + if "sn2" in CURRENCIES[currency_key] and symbol in CURRENCIES[currency_key]["sn2"]: + CURRENCIES_DEC_DIGITS[symbol] = CURRENCIES[currency_key]["d"] + break diff --git a/price_parser/parser.py b/price_parser/parser.py index 3bba674..8bae2ca 100644 --- a/price_parser/parser.py +++ b/price_parser/parser.py @@ -6,7 +6,7 @@ import attr from ._currencies import (CURRENCY_CODES, CURRENCY_NATIONAL_SYMBOLS, - CURRENCY_SYMBOLS, CURRENCIES) + CURRENCY_SYMBOLS, CURRENCIES_DEC_DIGITS) @attr.s(auto_attribs=True) @@ -27,7 +27,7 @@ def amount_float(self) -> Optional[float]: def fromstring(cls, price: Optional[str], currency_hint: Optional[str] = None, decimal_separator: Optional[str] = None, - thousand_with_dot: bool = True) -> 'Price': + digit_group_separator: Optional[str] = None) -> 'Price': """ Given price and currency text extracted from HTML elements, return ``Price`` instance, which provides a clean currency symbol and @@ -38,31 +38,28 @@ def fromstring(cls, price: Optional[str], ``price`` string, it could be **preferred** over a value extracted from ``currency_hint`` string. - ``thousand_with_dot`` is optional; it is used to determine if - thousand separator is a dot or a comma. If ``thousand_with_dot`` - is ``True``, then ``1.000`` is parsed as ``1000``. If it is ``False``, - then ``1.000`` is parsed as ``1``. + ``decimal_separator`` is optional; it is used to determine the + decimal separator in price. If ``decimal_separator`` is ``None``, + then it is guessed from ``price`` string. If ``decimal_separator`` + is ``"."``, then ``1.000`` is parsed as ``1``. If it is ``,```, + then ``1.000`` is parsed as ``1000``. + + ``digit_group_separator`` is optional; it is used to determine the + digit group separator in price. If ``digit_group_separator`` is + ``None``, then it is guessed from ``price`` string. If + ``digit_group_separator`` is ``"."``, then ``1.000`` is parsed as + ``1000``. If it is ``,``, then ``1.000`` is parsed as ``1``. """ currency = extract_currency_symbol(price, currency_hint) if currency is not None: currency = currency.strip() - for currency_key in CURRENCIES.keys(): - if CURRENCIES[currency_key]["s"] == currency: - if CURRENCIES[currency_key]["d"] == 3: - thousand_with_dot = False - break - if CURRENCIES[currency_key]["sn"] == currency: - if CURRENCIES[currency_key]["d"] == 3: - thousand_with_dot = False - break - alternative_symbols = CURRENCIES[currency_key].get("sn2", []) - if currency in alternative_symbols: - if CURRENCIES[currency_key]["d"] == 3: - thousand_with_dot = False - break + if currency in CURRENCIES_DEC_DIGITS: + if CURRENCIES_DEC_DIGITS[currency] == 3: + if not digit_group_separator: + digit_group_separator = ',' + if not decimal_separator: + decimal_separator = '.' amount_text = extract_price_text(price) if price is not None else None - if not thousand_with_dot: - decimal_separator = '.' amount_num = ( parse_number(amount_text, decimal_separator) if amount_text is not None else None