diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml deleted file mode 100644 index 5142595..0000000 --- a/.github/workflows/sphinx.yml +++ /dev/null @@ -1,32 +0,0 @@ -# name: CI - -# on: -# push: -# branches: [main] -# tags: -# - "v*" -# pull_request: - -# jobs: -# sphinx-build: -# runs-on: ubuntu-latest -# steps: -# - name: Checkout source -# uses: actions/checkout@v2 -# - name: Set up Python 3.7 -# uses: actions/setup-python@v2 -# with: -# python-version: 3.7 -# - name: Install dependencies -# run: | -# pip install .[doc] -# - name: sphinx apidoc -# run: | -# sphinx-apidoc -f -o ./docs . -# - name: sphinx build -# run: | -# sphinx-build -nW --keep-going -b html docs/ docs/_build/html - -# ToDo: pushlish rtd -# publish: -# runs-on: ubuntu-latest diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..dd9ea4f --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,29 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + builder: html + configuration: conf.py + fail_on_warning: true + +# Build documentation with MkDocs +#mkdocs: +# configuration: mkdocs.yml + +# Optionally build your docs in additional formats such as PDF +# formats: +# - pdf + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.7 + install: + - method: pip + path: . + extra_requirements: + - doc diff --git a/docs/conf.py b/docs/conf.py index e4d3137..126424a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -35,7 +35,7 @@ "sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.napoleon", - "sphinx_rtd_theme", + "myst_parser", ] # Add any paths that contain templates here, relative to this directory. @@ -52,8 +52,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -# html_theme = "sphinx_book_theme" -html_theme = "sphinx_rtd_theme" +html_theme = "sphinx_book_theme" + +html_title = "linkify-it-py" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..a85db29 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,10 @@ +```{include} ../README.md +``` + +```{toctree} +--- +maxdepth: 2 +caption: Contents +--- +linkify_it +``` diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index f0412d1..0000000 --- a/docs/index.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. linkify-it-py documentation master file, created by - sphinx-quickstart on Sun Nov 1 21:26:57 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to linkify-it-py's documentation! -========================================= - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - linkify_it - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/linkify_it.rst b/docs/linkify_it.rst index 2e91d5b..9ec9a62 100644 --- a/docs/linkify_it.rst +++ b/docs/linkify_it.rst @@ -12,18 +12,18 @@ linkify\_it.main module :undoc-members: :show-inheritance: -linkify\_it.re module ---------------------- +linkify\_it.tlds module +----------------------- -.. automodule:: linkify_it.re +.. automodule:: linkify_it.tlds :members: :undoc-members: :show-inheritance: -Module contents ---------------- +linkify\_it.ucre module +----------------------- -.. automodule:: linkify_it +.. automodule:: linkify_it.ucre :members: :undoc-members: :show-inheritance: diff --git a/docs/modules.rst b/docs/modules.rst deleted file mode 100644 index b4c2f2a..0000000 --- a/docs/modules.rst +++ /dev/null @@ -1,9 +0,0 @@ -linkify-it-py -============= - -.. toctree:: - :maxdepth: 4 - - linkify_it - setup - test diff --git a/docs/setup.rst b/docs/setup.rst deleted file mode 100644 index 552eb49..0000000 --- a/docs/setup.rst +++ /dev/null @@ -1,7 +0,0 @@ -setup module -============ - -.. automodule:: setup - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/test.rst b/docs/test.rst deleted file mode 100644 index 412f2b1..0000000 --- a/docs/test.rst +++ /dev/null @@ -1,21 +0,0 @@ -test package -============ - -Submodules ----------- - -test.test\_links module ------------------------ - -.. automodule:: test.test_links - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: test - :members: - :undoc-members: - :show-inheritance: diff --git a/linkify_it/main.py b/linkify_it/main.py index 966bf60..dd70356 100644 --- a/linkify_it/main.py +++ b/linkify_it/main.py @@ -8,17 +8,11 @@ RE_TYPE = type(re.compile(r"")) -class SchemaError(Exception): - def __init__(self, name, val): - message = "(LinkifyIt) Invalid schema '{}': '{}'".format(name, val) - super().__init__(message) - - -def escape_re(string): +def _escape_re(string): return re.sub(r"[.?*+^$[\]\\(){}|-]", "\\$&", string) -def index_of(text, search_value): +def _index_of(text, search_value): try: result = text.index(search_value) except ValueError: @@ -27,6 +21,14 @@ def index_of(text, search_value): return result +class SchemaError(Exception): + """Linkify schema error""" + + def __init__(self, name, val): + message = "(LinkifyIt) Invalid schema '{}': '{}'".format(name, val) + super().__init__(message) + + class Match: """Match result. @@ -37,6 +39,10 @@ class Match: raw (str): Matched string. text (str): Notmalized text of matched string. url (str): Normalized url of matched string. + + Args: + linkifyit (:class:`linkify_it.main.LinkifyIt`) LinkifyIt object + shift (int): text searh position """ def __repr__(self): @@ -65,29 +71,37 @@ class LinkifyIt: - ``http(s)://...`` , ``ftp://...``, ``mailto:...`` & ``//...`` links - "fuzzy" links and emails (example.com, foo@bar.com). - ``schemas`` is an object, where each key/value describes protocol/rule: + ``schemas`` is an dict where each key/value describes protocol/rule: - **key** - link prefix (usually, protocol name with ``:`` at the end, ``skype:`` for example). `linkify-it` makes shure that prefix is not preceeded with - alphanumeric char and symbols. Only whitespaces and punctuation allowed. + alphanumeric char. Only whitespaces and punctuation allowed. + - **value** - rule to check tail after link prefix - - *String* - just alias to existing rule - - *Object* - - *validate* - validator function (should return matched length on - success), or ``RegExp``. + + - *str* - just alias to existing rule + - *dict* + + - *validate* - either a ``re.Pattern``, ``re str`` (start with ``^``, and don't + include the link prefix itself), or a validator ``function`` which, given + arguments *self*, *text* and *pos* returns the length of a match in *text* + starting at index *pos*. *pos* is the index right after the link prefix. - *normalize* - optional function to normalize text & url of matched result (for example, for @twitter mentions). - ``options``: + ``options`` is an dict: - - **fuzzyLink** - recognige URL-s without ``http(s):`` prefix. Default ``true``. + - **fuzzyLink** - recognige URL-s without ``http(s):`` prefix. Default ``True``. - **fuzzyIP** - allow IPs in fuzzy links above. Can conflict with some texts - like version numbers. Default ``false``. + like version numbers. Default ``False``. - **fuzzyEmail** - recognize emails without ``mailto:`` prefix. + - **---** - set `True` to terminate link with `---` (if it's considered as long + dash). - Attributes: + Args: schemas (dict): Optional. Additional schemas to validate (prefix/validator) - options (dict): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false } + options (dict): { fuzzy_link | fuzzy_email | fuzzy_ip: True | False }. + Default: {"fuzzy_link": True, "fuzzy_email": True, "fuzzy_ip": False}. """ def _validate_http(self, text, pos): @@ -336,7 +350,7 @@ def untpl(tpl): # slist = "|".join( [ - escape_re(name) + _escape_re(name) for name, val in self._compiled.items() if len(name) > 0 and val ] @@ -359,36 +373,49 @@ def untpl(tpl): self._reset_scan_cache() def add(self, schema, definition): - """Add new rule definition. + """Add new rule definition. (chainable) - See constructor description for details. + See :class:`linkify_it.main.LinkifyIt` init description for details. + ``schema`` is a link prefix (``skype:``, for example), and ``definition`` + is a ``str`` to alias to another schema, or an ``dict`` with ``validate`` and + optionally `normalize` definitions. To disable an existing rule, use + ``.add(, None)``. Args: schema (str): rule name (fixed pattern prefix) - definition (str or regex or object): schema definition + definition (`str` or `re.Pattern`): schema definition + + Return: + :class:`linkify_it.main.LinkifyIt` """ self._schemas[schema] = definition self._compile() return self def set(self, options): - """Set recognition options for links without schema. + """Override default options. (chainable) + + Missed properties will not be changed. Args: - options (object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false } + options (dict): ``keys``: [``fuzzy_link`` | ``fuzzy_email`` | ``fuzzy_ip``]. + ``values``: [``True`` | ``False``] + + Return: + :class:`linkify_it.main.LinkifyIt` """ self._opts.update(options) return self def test(self, text): - """Searches linkifiable pattern and returns `true` on success or `false` + """Searches linkifiable pattern and returns ``True`` on success or ``False`` on fail. Args: - text (str): xxxxxx + text (str): text to search Returns: - bool: xxxxxx + bool: ``True`` if a linkable pattern was found, otherwise it is ``False``. """ self._text_cache = text self._index = -1 @@ -438,7 +465,7 @@ def test(self, text): if self._opts.get("fuzzy_email") and self._compiled.get("mailto:"): # guess schemaless emails - at_pos = index_of(text, "@") + at_pos = _index_of(text, "@") if at_pos >= 0: # We can't skip this check, because this cases are possible: # 192.168.1.1@gmail.com, my.in@example.com @@ -465,10 +492,10 @@ def pretest(self, text): when you need to check that link NOT exists. Args: - text (str): xxxxxx + text (str): text to search Returns: - bool: xxxxxx + bool: ``True`` if a linkable pattern was found, otherwise it is ``False``. """ if re.search(self.re["pretest"], text, flags=re.IGNORECASE): return True @@ -476,13 +503,16 @@ def pretest(self, text): return False def test_schema_at(self, text, name, position): - """Similar to `~linkify_it.LinkifyIt.test` but checks only specific protocol - tail exactly at given position. Returns length of found pattern (0 on fail). + """Similar to :meth:`linkify_it.main.LinkifyIt.test` but checks only + specific protocol tail exactly at given position. Args: text (str): text to scan name (str): rule (schema) name - position (int): text offset to check from + position (int): length of found pattern (0 on fail). + + Returns: + int: text (str): text to search """ # If not supported schema check requested - terminate if not self._compiled.get(name.lower()): @@ -490,20 +520,20 @@ def test_schema_at(self, text, name, position): return self._compiled.get(name.lower()).get("validate")(text, position) def match(self, text): - """Returns array of found link descriptions or `null` on fail. + """Returns ``list`` of found link descriptions or ``None`` on fail. - We strongly recommend to use `~linkify_it.LinkifyIt.test` first, for best - speed. + We strongly recommend to use :meth:`linkify_it.main.LinkifyIt.test` + first, for best speed. Args: - text (str): + text (str): text to search Returns: - list or None: Result match description - * **schema** - link schema, can be empty for fuzzy links, or `//` - for protocol-neutral links. + ``list`` or ``None``: Result match description: + * **schema** - link schema, can be empty for fuzzy links, or ``//`` + for protocol-neutral links. * **index** - offset of matched text - * **lastIndex** - offset of matched text + * **last_index** - offset of matched text * **raw** - offset of matched text * **text** - normalized text * **url** - link, generated from matched text @@ -532,21 +562,21 @@ def match(self, text): return None def tlds(self, list_tlds, keep_old=False): - """Load (or merge) new tlds list. + """Load (or merge) new tlds list. (chainable) Those are user for fuzzy links (without prefix) to avoid false positives. By default this algorythm used: * hostname with any 2-letter root zones are ok. * biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф - are ok. + are ok. * encoded (`xn--...`) root zones are ok. If list is replaced, then exact match for 2-chars root zones will be checked. Args: - list_tlds (list): list of tlds - keep_old (bool): merge with current list if `true` (`false` by default) + list_tlds (list or str): ``list of tlds`` or ``tlds string`` + keep_old (bool): merge with current list if q`True`q (q`Falseq` by default) """ _list = list_tlds if isinstance(list_tlds, list) else [list_tlds] diff --git a/linkify_it/tlds.py b/linkify_it/tlds.py index 2f9e86f..7f8053d 100644 --- a/linkify_it/tlds.py +++ b/linkify_it/tlds.py @@ -1,4 +1,10 @@ -# Version 2020110600, Last Updated Fri Nov 6 07:07:02 2020 UTC +"""TLDS + +Version 2020110600, Last Updated Fri Nov 6 07:07:02 2020 UTC + +References: + http://data.iana.org/TLD/tlds-alpha-by-domain.txt +""" TLDS = [ "AAA", "AARP", diff --git a/linkify_it/ucre.py b/linkify_it/ucre.py index 3cbd776..d5c51ed 100644 --- a/linkify_it/ucre.py +++ b/linkify_it/ucre.py @@ -130,7 +130,7 @@ ) -def re_src_path(opts): +def _re_src_path(opts): try: _ = opts["---"] # KeyError: Not found key:"---" @@ -203,6 +203,14 @@ def re_src_path(opts): def build_re(opts): + """Build regex + + Args: + opts (dict): options + + Return: + dict: dict of regex string + """ regex = { "src_Any": SRC_ANY, "src_Cc": SRC_CC, @@ -216,7 +224,7 @@ def build_re(opts): "src_auth": SRC_AUTH, "src_port": SRC_PORT, "src_host_terminator": SRC_HOST_TERMINATOR, - "src_path": re_src_path(opts), + "src_path": _re_src_path(opts), "src_email_name": SRC_EMAIL_NAME, "src_xn": SRC_XN, "src_domain_root": SRC_DOMAIN_ROOT, @@ -240,7 +248,7 @@ def build_re(opts): + "))" + "((?![$+<=>^`|\uff5c])" + TPL_HOST_PORT_FUZZY_STRICT - + re_src_path(opts) + + _re_src_path(opts) + ")" ), # Fuzzy link can't be prepended with .:/\- and non punctuation. @@ -251,7 +259,7 @@ def build_re(opts): + "))" + "((?![$+<=>^`|\uff5c])" + TPL_HOST_PORT_NO_IP_FUZZY_STRICT - + re_src_path(opts) + + _re_src_path(opts) + ")" ), } diff --git a/setup.py b/setup.py index ac71733..1222206 100644 --- a/setup.py +++ b/setup.py @@ -40,6 +40,6 @@ "dev": ["pre-commit", "isort", "flake8", "black"], "benchmark": ["pytest", "pytest-benchmark"], "test": ["coverage", "pytest", "pytest-cov"], - "doc": ["sphinx", "sphinx_book_theme", "sphinx-rtd-theme"], + "doc": ["sphinx", "sphinx_book_theme" "myst-parser"], }, )