diff --git a/.github/workflows/python-package.yaml b/.github/workflows/python-package.yaml new file mode 100644 index 0000000..59910d0 --- /dev/null +++ b/.github/workflows/python-package.yaml @@ -0,0 +1,30 @@ +name: Python package + +on: + push: + tags: + - '[0-9]+.[0-9]+.[0-9]+' + pull_request: + +jobs: + cqa: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: pip + cache-dependency-path: '**/pyproject.yaml' + + - name: Install test dependencies + run: | + python -m pip install --upgrade pip + pip install --use-deprecated=legacy-resolver -e .[dev] + + - name: Format check with Ruff + run: | + ruff format --check . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..f1a8b65 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 + hooks: + - id: ruff-format + args: [ --check ] diff --git a/Makefile b/Makefile index 703d326..e750c63 100644 --- a/Makefile +++ b/Makefile @@ -74,12 +74,12 @@ tox: #= UTILITY TARGETS # N.B. Although code is stored in github, I use hg and hg-git on the command line -#=> reformat: reformat code with yapf and commit +#=> reformat: reformat code with Ruff and commit .PHONY: reformat reformat: @if ! git diff --cached --exit-code; then echo "Repository not clean" 1>&2; exit 1; fi - yapf -i -r "${PKGD}" tests - git commit -a -m "reformatted with yapf" + ruff format "${PKGD}" + git commit -a -m "reformatted with ruff" #=> docs -- make sphinx docs .PHONY: docs diff --git a/docs/conf.py b/docs/conf.py index 950bcf2..1484494 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,40 +22,40 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.viewcode', + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.viewcode", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'eutils' -copyright = u'2018, eutils Committers' +project = "eutils" +copyright = "2018, eutils Committers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -68,182 +68,176 @@ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. html_show_sourcelink = False # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'eutilsdoc' +htmlhelp_basename = "eutilsdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - ('index', 'eutils.tex', u'eutils Documentation', - u'Reece Hart', 'manual'), + ("index", "eutils.tex", "eutils Documentation", "Reece Hart", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'eutils', u'eutils Documentation', - [u'Reece Hart'], 1) -] +man_pages = [("index", "eutils", "eutils Documentation", ["Reece Hart"], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -252,28 +246,34 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'eutils', u'eutils Documentation', - u'Reece Hart', 'eutils', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "eutils", + "eutils Documentation", + "Reece Hart", + "eutils", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'http://docs.python.org/': None} +intersphinx_mapping = {"http://docs.python.org/": None} # -- Custom stuff --- -autodoc_default_flags = ['members', 'undoc-members', 'show-inheritance'] #, 'inherited-members'] -autoclass_content = 'both' +autodoc_default_flags = ["members", "undoc-members", "show-inheritance"] # , 'inherited-members'] +autoclass_content = "both" diff --git a/pyproject.toml b/pyproject.toml index 57d3081..6f82fe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,15 +27,16 @@ dev = [ "flake8", "ipython", "mock", + "pre-commit~=3.4", "pytest", "pytest-cov", "restview", + "ruff==0.4.4", "setuptools", "sphinx", "sphinx_rtd_theme", "tox", "vcrpy", - "yapf", ] [project.urls] @@ -93,3 +94,7 @@ markers = [ "network: tests that require network connectivity", "slow: slow tests that should be run infrequently", ] + +[tool.ruff] +src = ["src", "tests"] +line-length = 100 diff --git a/src/biocommons/eutils/__init__.py b/src/biocommons/eutils/__init__.py index ed8e6a5..c137f0a 100644 --- a/src/biocommons/eutils/__init__.py +++ b/src/biocommons/eutils/__init__.py @@ -2,7 +2,12 @@ from importlib.metadata import PackageNotFoundError, version from ._internal.client import Client -from ._internal.exceptions import EutilsError, EutilsNCBIError, EutilsNotFoundError, EutilsRequestError +from ._internal.exceptions import ( + EutilsError, + EutilsNCBIError, + EutilsNotFoundError, + EutilsRequestError, +) from ._internal.queryservice import QueryService __all__ = [ diff --git a/src/biocommons/eutils/_internal/client.py b/src/biocommons/eutils/_internal/client.py index 0a58853..8860403 100644 --- a/src/biocommons/eutils/_internal/client.py +++ b/src/biocommons/eutils/_internal/client.py @@ -35,7 +35,6 @@ def __init__(self, cache=False, api_key=None): self._qs = QueryService(cache=cache, api_key=api_key) - @property def databases(self): """ @@ -66,17 +65,17 @@ def einfo(self, db=None): return EInfoResult(self._qs.einfo({"db": db, "version": "2.0"})).dbinfo def esearch(self, db, term): - """query the esearch endpoint - """ + """query the esearch endpoint""" esr = ESearchResult(self._qs.esearch({"db": db, "term": term})) if esr.count > esr.retmax: - logger.warning("NCBI found {esr.count} results, but we truncated the reply at {esr.retmax}" - " results; see https://github.com/biocommons/eutils/issues/124/".format(esr=esr)) + logger.warning( + "NCBI found {esr.count} results, but we truncated the reply at {esr.retmax}" + " results; see https://github.com/biocommons/eutils/issues/124/".format(esr=esr) + ) return esr def efetch(self, db, id): - """query the efetch endpoint - """ + """query the efetch endpoint""" db = db.lower() xml = self._qs.efetch({"db": db, "id": str(id)}) doc = le.XML(xml) diff --git a/src/biocommons/eutils/_internal/exceptions.py b/src/biocommons/eutils/_internal/exceptions.py index f883cf8..f0f4209 100644 --- a/src/biocommons/eutils/_internal/exceptions.py +++ b/src/biocommons/eutils/_internal/exceptions.py @@ -1,10 +1,12 @@ # -*- coding: utf-8 -*- + class EutilsError(Exception): """Base class for all Eutils exceptions, and also used to raise general exception. """ + pass @@ -13,6 +15,7 @@ class EutilsNCBIError(EutilsError): invalid. """ + pass @@ -21,6 +24,7 @@ class EutilsNotFoundError(EutilsError): :mod:`eutils.sketchy.clientx` interface currently.) """ + pass @@ -29,6 +33,7 @@ class EutilsRequestError(EutilsError): database is specified. """ + pass diff --git a/src/biocommons/eutils/_internal/queryservice.py b/src/biocommons/eutils/_internal/queryservice.py index a8a1b5c..33b323d 100644 --- a/src/biocommons/eutils/_internal/queryservice.py +++ b/src/biocommons/eutils/_internal/queryservice.py @@ -45,7 +45,6 @@ class QueryService(object): - """*provides throttled and cached querying of NCBI E-utilities services* QueryService has three functions: @@ -75,14 +74,15 @@ class QueryService(object): """ - def __init__(self, - email=default_email, - cache=False, - default_args=default_default_args, - request_interval=None, - tool=default_tool, - api_key=None - ): + def __init__( + self, + email=default_email, + cache=False, + default_args=default_default_args, + request_interval=None, + tool=default_tool, + api_key=None, + ): """ :param str email: email of user (for abuse reports) :param str cache: if True, cache at ~/.cache/eutils-db.sqlite; if string, cache there; if False, don't cache @@ -102,16 +102,23 @@ def __init__(self, self.api_key = api_key if request_interval is not None: - _logger.warning("eutils QueryService: request_interval no longer supported; ignoring passed parameter") + _logger.warning( + "eutils QueryService: request_interval no longer supported; ignoring passed parameter" + ) if self.api_key is None: requests_per_second = 3 - _logger.warning("No NCBI API key provided; throttling to {} requests/second; see " - "https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/".format( - requests_per_second)) + _logger.warning( + "No NCBI API key provided; throttling to {} requests/second; see " + "https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/".format( + requests_per_second + ) + ) else: requests_per_second = 10 - _logger.info("Using NCBI API key; throttling to {} requests/second".format(requests_per_second)) + _logger.info( + "Using NCBI API key; throttling to {} requests/second".format(requests_per_second) + ) self.request_interval = 1.0 / requests_per_second @@ -127,7 +134,6 @@ def __init__(self, cache_path = False self._cache = SQLiteCache(cache_path) if cache_path else None - def efetch(self, args): """ execute a cached, throttled efetch query @@ -228,7 +234,6 @@ def esummary(self, args): """ return self._query("/esummary.fcgi", args) - ############################################################################ ## Internals def _query(self, path, args=None, skip_cache=False, skip_sleep=False): @@ -246,11 +251,14 @@ def _query(self, path, args=None, skip_cache=False, skip_sleep=False): """ if args is None: args = {} + def _cacheable(r): """return False if r shouldn't be cached (contains a no-cache meta line); True otherwise""" - return not ("no-cache" in r # obviate parsing, maybe - and lxml.etree.XML(r).xpath("//meta/@content='no-cache'")) + return not ( + "no-cache" in r # obviate parsing, maybe + and lxml.etree.XML(r).xpath("//meta/@content='no-cache'") + ) # cache key: the key associated with this endpoint and args The # key intentionally excludes the identifying args (tool and email) @@ -271,16 +279,18 @@ def _cacheable(r): if not skip_cache and self._cache: try: v = self._cache[cache_key] - _logger.debug("cache hit for key {cache_key} ({url}, {sqas}) ".format( - cache_key=cache_key, - url=url, - sqas=sqas)) + _logger.debug( + "cache hit for key {cache_key} ({url}, {sqas}) ".format( + cache_key=cache_key, url=url, sqas=sqas + ) + ) return v except KeyError: - _logger.debug("cache miss for key {cache_key} ({url}, {sqas}) ".format( - cache_key=cache_key, - url=url, - sqas=sqas)) + _logger.debug( + "cache miss for key {cache_key} ({url}, {sqas}) ".format( + cache_key=cache_key, url=url, sqas=sqas + ) + ) pass if self.api_key: @@ -297,22 +307,26 @@ def _cacheable(r): r = requests.post(url, full_args) self._last_request_clock = time.monotonic() - _logger.debug("post({url}, {fas}): {r.status_code} {r.reason}, {len})".format( - url=url, - fas=full_args_str, - r=r, - len=len(r.text))) + _logger.debug( + "post({url}, {fas}): {r.status_code} {r.reason}, {len})".format( + url=url, fas=full_args_str, r=r, len=len(r.text) + ) + ) if not r.ok: # TODO: discriminate between types of errors if r.headers["Content-Type"] == "application/json": json = r.json() - raise EutilsRequestError('{r.reason} ({r.status_code}): {error}'.format(r=r, error=json["error"])) + raise EutilsRequestError( + "{r.reason} ({r.status_code}): {error}".format(r=r, error=json["error"]) + ) try: xml = lxml.etree.fromstring(r.text.encode("utf-8")) errornode = xml.find("ERROR") errormsg = errornode.text if errornode else "Unknown Error" - raise EutilsRequestError("{r.reason} ({r.status_code}): {error}".format(r=r, error=errormsg)) + raise EutilsRequestError( + "{r.reason} ({r.status_code}): {error}".format(r=r, error=errormsg) + ) except Exception as ex: raise EutilsNCBIError("Error parsing response object from NCBI: {}".format(ex)) @@ -320,7 +334,11 @@ def _cacheable(r): if r.text is not None: try: xml = lxml.etree.fromstring(r.text.encode("utf-8")) - raise EutilsRequestError("{r.reason} ({r.status_code}): {error}".format(r=r, error=xml.find("ERROR").text)) + raise EutilsRequestError( + "{r.reason} ({r.status_code}): {error}".format( + r=r, error=xml.find("ERROR").text + ) + ) except Exception as ex: raise EutilsNCBIError("Error parsing response object from NCBI: {}".format(ex)) @@ -330,10 +348,11 @@ def _cacheable(r): if self._cache and _cacheable(r.text): # N.B. we cache results even when skip_cache (read) is true self._cache[cache_key] = r.content - _logger.info("cached results for key {cache_key} ({url}, {sqas}) ".format( - cache_key=cache_key, - url=url, - sqas=sqas)) + _logger.info( + "cached results for key {cache_key} ({url}, {sqas}) ".format( + cache_key=cache_key, url=url, sqas=sqas + ) + ) return r.content diff --git a/src/biocommons/eutils/_internal/sqlitecache.py b/src/biocommons/eutils/_internal/sqlitecache.py index 55f32db..02bbf8f 100644 --- a/src/biocommons/eutils/_internal/sqlitecache.py +++ b/src/biocommons/eutils/_internal/sqlitecache.py @@ -30,7 +30,6 @@ def val_from(pobj, compress): class SQLiteCache(object): - ############################################################################ ## Exposed methods def __init__(self, db_path, compress_values=True): @@ -45,8 +44,11 @@ def expire(self, age): ############################################################################ ## Special Python methods + def __str__(self): - return "SQLiteCache(db_path={self._db_path},compress_values={self.compress_values})".format(self=self) + return "SQLiteCache(db_path={self._db_path},compress_values={self.compress_values})".format( + self=self + ) def __dir__(self): self._logger.debug("__dir__()") @@ -63,8 +65,10 @@ def __getitem__(self, key): def __setitem__(self, key, value): db_val = val_to(value, self.compress_values) self._logger.debug("__setitem__({key},({vlen} bytes))".format(key=key, vlen=len(db_val))) - self._execute("INSERT OR REPLACE INTO cache (key,value_compressed,value) VALUES (?,?,?)", - [key_to(key), self.compress_values, db_val]) + self._execute( + "INSERT OR REPLACE INTO cache (key,value_compressed,value) VALUES (?,?,?)", + [key_to(key), self.compress_values, db_val], + ) def __delitem__(self, key): self._logger.debug("__delitem__({key})".format(key=key)) @@ -74,7 +78,9 @@ def __delitem__(self, key): def __contains__(self, key): self._logger.debug("__contains__({key})".format(key=key)) - return self._fetch1v("SELECT EXISTS(SELECT 1 FROM cache WHERE key=? LIMIT 1)", [key_to(key)]) + return self._fetch1v( + "SELECT EXISTS(SELECT 1 FROM cache WHERE key=? LIMIT 1)", [key_to(key)] + ) ############################################################################ ## Internal functions @@ -89,21 +95,26 @@ def _connect(self, db_path): self._logger.debug("schema version is " + str(sver)) if sver is None: self._execute( - "CREATE TABLE cache (key BLOB PRIMARY KEY, created INTEGER DEFAULT (strftime('%s','now')), value_compressed BOOL, value BLOB)") + "CREATE TABLE cache (key BLOB PRIMARY KEY, created INTEGER DEFAULT (strftime('%s','now')), value_compressed BOOL, value BLOB)" + ) self._execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)") - self._execute("INSERT INTO meta (key, value) VALUES (?,?)", ['schema version', 1]) + self._execute("INSERT INTO meta (key, value) VALUES (?,?)", ["schema version", 1]) self._logger.debug("created tables") def _get_schema_version(self): - if ('meta', ) not in self._execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall(): + if ("meta",) not in self._execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ).fetchall(): return None return self._fetch1v("SELECT value FROM meta WHERE key = ?", ["schema version"]) def _execute(self, query, params=[]): cur = self._con.cursor() - self._logger.debug("executing query <{query}> with params <{nvars} vars>".format( - query=query, - nvars=len(params))) + self._logger.debug( + "executing query <{query}> with params <{nvars} vars>".format( + query=query, nvars=len(params) + ) + ) cur.execute(query, params) return cur diff --git a/src/biocommons/eutils/_internal/utils.py b/src/biocommons/eutils/_internal/utils.py index 937c3b1..86cb4af 100644 --- a/src/biocommons/eutils/_internal/utils.py +++ b/src/biocommons/eutils/_internal/utils.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- + def xml_get1(node, xpath): return node.xpath(xpath)[0] @@ -18,7 +19,7 @@ def xml_get_text(node, xpath): def xml_get_text_or_none(node, xpath): try: return xml_get_text(node, xpath) - except IndexError: # xpath search found 0 matches + except IndexError: # xpath search found 0 matches return None diff --git a/src/biocommons/eutils/_internal/xmlfacades/base.py b/src/biocommons/eutils/_internal/xmlfacades/base.py index 54a774c..ee91373 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/base.py +++ b/src/biocommons/eutils/_internal/xmlfacades/base.py @@ -35,8 +35,11 @@ def __init__(self, xml): if self._root_tag is None: raise EutilsError("_root_tag not defined for class {}".format(type(self).__name__)) elif self._root_tag != self._xml_root.tag: - raise EutilsError("XML for {} object must be a {} element (got {})".format( - type(self).__name__, self._root_tag, self._xml_root.tag)) + raise EutilsError( + "XML for {} object must be a {} element (got {})".format( + type(self).__name__, self._root_tag, self._xml_root.tag + ) + ) # diff --git a/src/biocommons/eutils/_internal/xmlfacades/dbinfo.py b/src/biocommons/eutils/_internal/xmlfacades/dbinfo.py index 460153c..026e519 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/dbinfo.py +++ b/src/biocommons/eutils/_internal/xmlfacades/dbinfo.py @@ -19,7 +19,6 @@ class DbInfo(Base): - _root_tag = "DbInfo" @property @@ -46,6 +45,7 @@ def count(self): def lastupdate(self): return self._xml_root.findtext("LastUpdate") + # # Copyright 2015 eutils Committers # diff --git a/src/biocommons/eutils/_internal/xmlfacades/dblist.py b/src/biocommons/eutils/_internal/xmlfacades/dblist.py index 34f726b..89636a8 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/dblist.py +++ b/src/biocommons/eutils/_internal/xmlfacades/dblist.py @@ -19,13 +19,13 @@ class DbList(Base): - _root_tag = "DbList" @property def databases(self): return sorted(self._xml_root.xpath("DbName/text()")) + # # Copyright 2015 eutils Committers # diff --git a/src/biocommons/eutils/_internal/xmlfacades/dbsnp.py b/src/biocommons/eutils/_internal/xmlfacades/dbsnp.py index 8d0fa4a..2d67b82 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/dbsnp.py +++ b/src/biocommons/eutils/_internal/xmlfacades/dbsnp.py @@ -10,25 +10,30 @@ class ExchangeSet(Base): - _root_tag = "{https://www.ncbi.nlm.nih.gov/SNP/docsum}ExchangeSet" def __iter__(self): - return (Rs(n) for n in self._xml_root.iterfind("docsum:Rs", namespaces={"docsum": self._xml_root.nsmap[None]})) + return ( + Rs(n) + for n in self._xml_root.iterfind( + "docsum:Rs", namespaces={"docsum": self._xml_root.nsmap[None]} + ) + ) def __len__(self): - return len(self._xml_root.findall("docsum:Rs", namespaces={"docsum": self._xml_root.nsmap[None]})) + return len( + self._xml_root.findall("docsum:Rs", namespaces={"docsum": self._xml_root.nsmap[None]}) + ) class Rs(object): - _root_tag = "Rs" def __init__(self, rs_node): assert rs_node.tag == "{https://www.ncbi.nlm.nih.gov/SNP/docsum}Rs" self._n = rs_node - #def __str__(self): + # def __str__(self): # return "Rs({self.id})".format(self=self) @property @@ -63,6 +68,7 @@ def hgvs_transcript_tags(self): def hgvs_protein_tags(self): return [t for t in self.hgvs_tags if protein_ac_re.match(t)] + # # Copyright 2015 eutils Committers # diff --git a/src/biocommons/eutils/_internal/xmlfacades/einforesult.py b/src/biocommons/eutils/_internal/xmlfacades/einforesult.py index af8aa7b..80072f5 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/einforesult.py +++ b/src/biocommons/eutils/_internal/xmlfacades/einforesult.py @@ -23,7 +23,6 @@ class EInfoResult(Base): - _root_tag = "eInfoResult" @property @@ -53,9 +52,9 @@ def _child(self, tag): if __name__ == "__main__": - import os import lxml.etree as le + data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests", "data") dir_path = os.path.join(data_dir, "einfo.fcgi?db=protein&retmode=xml.xml.gz") diff --git a/src/biocommons/eutils/_internal/xmlfacades/entrezgene.py b/src/biocommons/eutils/_internal/xmlfacades/entrezgene.py index c154970..19d2937 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/entrezgene.py +++ b/src/biocommons/eutils/_internal/xmlfacades/entrezgene.py @@ -7,15 +7,18 @@ class Entrezgene(Base): - _root_tag = "Entrezgene" def __str__(self): - return "Entrezgene(id={self.gene_id};hgnc={self.hgnc};description={self.description};type={self.type})".format(self=self) + return "Entrezgene(id={self.gene_id};hgnc={self.hgnc};description={self.description};type={self.type})".format( + self=self + ) @property def common_tax(self): - return self._xml_root.findtext("Entrezgene_source/BioSource/BioSource_org/Org-ref/Org-ref_common") + return self._xml_root.findtext( + "Entrezgene_source/BioSource/BioSource_org/Org-ref/Org-ref_common" + ) @property def description(self): @@ -30,12 +33,17 @@ def gene_commentaries(self): try: return self._gene_commentaries except AttributeError: - self._gene_commentaries = [GeneCommentary(n) for n in self._xml_root.iterfind("Entrezgene_locus/Gene-commentary")] + self._gene_commentaries = [ + GeneCommentary(n) + for n in self._xml_root.iterfind("Entrezgene_locus/Gene-commentary") + ] return self._gene_commentaries @property def genus_species(self): - return self._xml_root.xpath("Entrezgene_source/BioSource/BioSource_org/Org-ref/Org-ref_taxname/text()")[0] + return self._xml_root.xpath( + "Entrezgene_source/BioSource/BioSource_org/Org-ref/Org-ref_taxname/text()" + )[0] @property def hgnc(self): @@ -55,9 +63,12 @@ def maploc(self): @property def tax_id(self): - return int(self._xml_root.xpath( - "Entrezgene_source/BioSource/BioSource_org/Org-ref/Org-ref_db/" - 'Dbtag[Dbtag_db/text()="taxon"]/Dbtag_tag/Object-id/Object-id_id/text()')[0]) + return int( + self._xml_root.xpath( + "Entrezgene_source/BioSource/BioSource_org/Org-ref/Org-ref_db/" + 'Dbtag[Dbtag_db/text()="taxon"]/Dbtag_tag/Object-id/Object-id_id/text()' + )[0] + ) @property def summary(self): @@ -69,12 +80,13 @@ def synonyms(self): @property def type(self): - return self._xml_root.find('Entrezgene_type').get("value") + return self._xml_root.find("Entrezgene_type").get("value") if __name__ == "__main__": import os from .xmlfacades.entrezgeneset import EntrezgeneSet + data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests", "data") data_file = os.path.join(data_dir, "entrezgeneset.xml.gz") egs = EntrezgeneSet(le.parse(data_file).getroot()) diff --git a/src/biocommons/eutils/_internal/xmlfacades/entrezgeneset.py b/src/biocommons/eutils/_internal/xmlfacades/entrezgeneset.py index 8818ecf..16a1fe7 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/entrezgeneset.py +++ b/src/biocommons/eutils/_internal/xmlfacades/entrezgeneset.py @@ -33,6 +33,7 @@ def _entrezgene_nodes(self): def __iter__(self): return (eg for eg in self.entrezgenes) + # # Copyright 2015 eutils Committers # diff --git a/src/biocommons/eutils/_internal/xmlfacades/esearchresult.py b/src/biocommons/eutils/_internal/xmlfacades/esearchresult.py index fee5517..56a4ba6 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/esearchresult.py +++ b/src/biocommons/eutils/_internal/xmlfacades/esearchresult.py @@ -4,7 +4,6 @@ class ESearchResult(Base): - _root_tag = "eSearchResult" @property diff --git a/src/biocommons/eutils/_internal/xmlfacades/gbseq.py b/src/biocommons/eutils/_internal/xmlfacades/gbseq.py index 89728f2..e58a865 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/gbseq.py +++ b/src/biocommons/eutils/_internal/xmlfacades/gbseq.py @@ -8,7 +8,6 @@ class GBSeq(Base): - _root_tag = "GBSeq" def __str__(self): @@ -57,8 +56,7 @@ def genes(self): @property def gi(self): seqids = self._xml_root.xpath("GBSeq_other-seqids/GBSeqid/text()") - d = {t: l.rstrip("|").split("|") - for t, _, l in [si.partition("|") for si in seqids]} + d = {t: l.rstrip("|").split("|") for t, _, l in [si.partition("|") for si in seqids]} gis = d["gi"] assert len(gis) == 1, "expected exactly one gi in XML" return int(gis[0]) @@ -83,8 +81,7 @@ def organism(self): def other_seqids(self): """returns a dictionary of sequence ids, like {'gi': ['319655736'], 'ref': ['NM_000551.3']}""" seqids = self._xml_root.xpath("GBSeq_other-seqids/GBSeqid/text()") - return {t: l.rstrip("|").split("|") - for t, _, l in [si.partition("|") for si in seqids]} + return {t: l.rstrip("|").split("|") for t, _, l in [si.partition("|") for si in seqids]} @property def sequence(self): @@ -95,9 +92,7 @@ def updated(self): return self._xml_root.findtext("GBSeq_update-date") - class GBFeatureTable(Base): - """Represents a collection of features associated with a genbank sequence @@ -132,7 +127,9 @@ def __iter__(self): def cds(self): key = "CDS" nodes = self._get_nodes_with_key(key) - assert len(nodes) <= 1, "Node has {n=n} {key} features! (expected <= 1)".format(n=len(nodes), key=key) + assert len(nodes) <= 1, "Node has {n=n} {key} features! (expected <= 1)".format( + n=len(nodes), key=key + ) return None if not nodes else GBFeatureCDS(nodes[0]) @property @@ -145,14 +142,18 @@ def exons(self): def gene(self): key = "gene" nodes = self._get_nodes_with_key(key) - assert len(nodes) <= 1, "Node has {n=n} {key} features! (expected <= 1)".format(n=len(nodes), key=key) + assert len(nodes) <= 1, "Node has {n=n} {key} features! (expected <= 1)".format( + n=len(nodes), key=key + ) return None if not nodes else GBFeature(nodes[0]) @property def source(self): key = "source" nodes = self._get_nodes_with_key(key) - assert len(nodes) == 1, "Got {n=n} {key} features! (expected exactly 1)".format(n=len(nodes), key=key) + assert len(nodes) == 1, "Got {n=n} {key} features! (expected exactly 1)".format( + n=len(nodes), key=key + ) return GBFeature(nodes[0]) def _get_nodes_with_key(self, key): @@ -161,7 +162,6 @@ def _get_nodes_with_key(self, key): class GBFeature(Base): - _root_tag = "GBFeature" def __init__(self, xml): @@ -180,23 +180,31 @@ def key(self): @property def qualifiers(self): - return {q.findtext("GBQualifier_name"): q.findtext("GBQualifier_value") - for q in self._xml_root.findall("GBFeature_quals/GBQualifier")} + return { + q.findtext("GBQualifier_name"): q.findtext("GBQualifier_value") + for q in self._xml_root.findall("GBFeature_quals/GBQualifier") + } def get_qualifiers(self, name): return self._xml_root.xpath( - 'GBFeature_quals/GBQualifier[GBQualifier_name/text()="'+name+'"]/GBQualifier_value/text()') + 'GBFeature_quals/GBQualifier[GBQualifier_name/text()="' + + name + + '"]/GBQualifier_value/text()' + ) def get_qualifier(self, name): nodes = self.get_qualifiers(name) - assert len(nodes) <= 1, "Node has {n=n} {key} features! (expected <= 1 when using get_qualifier)".format(n=len(nodes), key=name) + assert ( + len(nodes) <= 1 + ), "Node has {n=n} {key} features! (expected <= 1 when using get_qualifier)".format( + n=len(nodes), key=name + ) if not nodes: return None return self.get_qualifiers(name)[0] class GBFeatureCDS(GBFeature): - @property def translation(self): return self.get_qualifier("translation") @@ -211,11 +219,10 @@ def gene(self): @property def gene_synonyms(self): - return (self.get_qualifier('gene_synonym') or "").split("; ") + return (self.get_qualifier("gene_synonym") or "").split("; ") class GBFeatureExon(GBFeature): - @property def inference(self): return self.get_qualifier("inference") diff --git a/src/biocommons/eutils/_internal/xmlfacades/gbset.py b/src/biocommons/eutils/_internal/xmlfacades/gbset.py index c403e90..d5337b0 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/gbset.py +++ b/src/biocommons/eutils/_internal/xmlfacades/gbset.py @@ -5,7 +5,6 @@ class GBSet(Base): - _root_tag = "GBSet" def __str__(self): @@ -23,7 +22,6 @@ def __iter__(self): import os import lxml.etree as le - data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests", "data") relpath = "efetch.fcgi?db=nuccore&id=148536845&retmode=xml.xml" path = os.path.join(data_dir, relpath) diff --git a/src/biocommons/eutils/_internal/xmlfacades/genecommentary.py b/src/biocommons/eutils/_internal/xmlfacades/genecommentary.py index 19ab7d7..b424093 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/genecommentary.py +++ b/src/biocommons/eutils/_internal/xmlfacades/genecommentary.py @@ -20,7 +20,9 @@ class GeneCommentary(Base): _root_tag = "Gene-commentary" def __str__(self): - return "GeneCommentary(acv={self.acv},type={self.type},heading={self.heading},label={self.label})".format(self=self) + return "GeneCommentary(acv={self.acv},type={self.type},heading={self.heading},label={self.label})".format( + self=self + ) @property def accession(self): @@ -36,7 +38,11 @@ def acv(self): def genomic_coords(self): n = self._xml_root.find("Gene-commentary_genomic-coords") if n is None: - raise EutilsError("this object (type={self.type}) does not have genomic coordinates defined (mRNA and peptide typically do)".format(self=self)) + raise EutilsError( + "this object (type={self.type}) does not have genomic coordinates defined (mRNA and peptide typically do)".format( + self=self + ) + ) return GeneCommentaryGenomicCoords(n) @property @@ -49,11 +55,14 @@ def label(self): @property def products(self): - return [GeneCommentary(gc) for gc in self._xml_root.findall("Gene-commentary_products/Gene-commentary")] + return [ + GeneCommentary(gc) + for gc in self._xml_root.findall("Gene-commentary_products/Gene-commentary") + ] @property def type(self): - return self._xml_root.find('Gene-commentary_type').get("value") + return self._xml_root.find("Gene-commentary_type").get("value") @property def version(self): @@ -81,8 +90,10 @@ def gi(self): @property def intervals(self): - return [(i.interval_from, i.interval_to) - for i in (SeqInterval(n) for n in self._xml_root.findall(".//Seq-interval"))] + return [ + (i.interval_from, i.interval_to) + for i in (SeqInterval(n) for n in self._xml_root.findall(".//Seq-interval")) + ] @property def _interval_str(self): @@ -90,7 +101,6 @@ def _interval_str(self): class SeqInterval(Base): - _root_tag = "Seq-interval" def __str__(self): diff --git a/src/biocommons/eutils/_internal/xmlfacades/medlinecitation.py b/src/biocommons/eutils/_internal/xmlfacades/medlinecitation.py index 4145c5a..07063d1 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/medlinecitation.py +++ b/src/biocommons/eutils/_internal/xmlfacades/medlinecitation.py @@ -4,13 +4,17 @@ class MedlineCitation(Base): - _root_tag = "MedlineCitation" @property def abstract(self): # return self._xml_root.findtext("Article/Abstract/AbstractText") - return " ".join(["".join(at.itertext()) for at in self._xml_root.findall('Article/Abstract/AbstractText')]) + return " ".join( + [ + "".join(at.itertext()) + for at in self._xml_root.findall("Article/Abstract/AbstractText") + ] + ) @property def authors(self): @@ -36,8 +40,9 @@ def issue(self): @property def jrnl(self): - return self._xml_root.findtext("Article/Journal/ISOAbbreviation") or self._xml_root.findtext( - "Article/Journal/Title") + return self._xml_root.findtext( + "Article/Journal/ISOAbbreviation" + ) or self._xml_root.findtext("Article/Journal/Title") @property def mesh_headings(self): @@ -69,9 +74,11 @@ def volume(self): @property def year(self): - return self._xml_root.findtext("Article/Journal/JournalIssue/PubDate/Year") \ - or self._xml_root.findtext("Article/Journal/JournalIssue/PubDate/Year") \ - or self._xml_root.findtext("Article/Journal/JournalIssue/PubDate/MedlineDate") + return ( + self._xml_root.findtext("Article/Journal/JournalIssue/PubDate/Year") + or self._xml_root.findtext("Article/Journal/JournalIssue/PubDate/Year") + or self._xml_root.findtext("Article/Journal/JournalIssue/PubDate/MedlineDate") + ) if __name__ == "__main__": diff --git a/src/biocommons/eutils/_internal/xmlfacades/pubmedarticle.py b/src/biocommons/eutils/_internal/xmlfacades/pubmedarticle.py index a770238..0499f25 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/pubmedarticle.py +++ b/src/biocommons/eutils/_internal/xmlfacades/pubmedarticle.py @@ -6,11 +6,14 @@ class PubmedArticle(Base): - _root_tag = "PubmedArticle" def __str__(self): - return ("{pma.__class__.__name__}({pma.pmid}; {pma.jrnl}; {pma.title}; {pma.authors})".format(pma=self)) + return ( + "{pma.__class__.__name__}({pma.pmid}; {pma.jrnl}; {pma.title}; {pma.authors})".format( + pma=self + ) + ) @property def abstract(self): @@ -66,15 +69,21 @@ def year(self): @property def doi(self): - return xml_get_text_or_none(self._xml_root, 'PubmedData/ArticleIdList/ArticleId[@IdType="doi"]') + return xml_get_text_or_none( + self._xml_root, 'PubmedData/ArticleIdList/ArticleId[@IdType="doi"]' + ) @property def pii(self): - return xml_get_text_or_none(self._xml_root, 'PubmedData/ArticleIdList/ArticleId[@IdType="pii"]') + return xml_get_text_or_none( + self._xml_root, 'PubmedData/ArticleIdList/ArticleId[@IdType="pii"]' + ) @property def pmc(self): - pmc = xml_get_text_or_none(self._xml_root, 'PubmedData/ArticleIdList/ArticleId[@IdType="pmc"]') + pmc = xml_get_text_or_none( + self._xml_root, 'PubmedData/ArticleIdList/ArticleId[@IdType="pmc"]' + ) return None if pmc is None else pmc[3:] @property @@ -86,10 +95,12 @@ def _medline_citation(self): from .xmlfacades.pubmedarticleset import PubmedArticleSet import lxml.etree as le import os + data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests", "data") relpaths = [ - "efetch.fcgi?db=pubmed&id=20412080&rettype=xml.xml", "efetch.fcgi?db=pubmed&id=22351513&retmode=xml.xml", - "efetch.fcgi?db=pubmed&id=23121403&retmode=xml.xml" + "efetch.fcgi?db=pubmed&id=20412080&rettype=xml.xml", + "efetch.fcgi?db=pubmed&id=22351513&retmode=xml.xml", + "efetch.fcgi?db=pubmed&id=23121403&retmode=xml.xml", ] path = os.path.join(data_dir, relpaths[0]) pas = PubmedArticleSet(le.parse(path).getroot()) diff --git a/src/biocommons/eutils/_internal/xmlfacades/pubmedarticleset.py b/src/biocommons/eutils/_internal/xmlfacades/pubmedarticleset.py index f6ed12f..efdb557 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/pubmedarticleset.py +++ b/src/biocommons/eutils/_internal/xmlfacades/pubmedarticleset.py @@ -5,7 +5,6 @@ class PubmedArticleSet(Base): - _root_tag = "PubmedArticleSet" def __iter__(self): @@ -18,11 +17,15 @@ def __iter__(self): data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests", "data") relpaths = [ - "efetch.fcgi?db=pubmed&id=20412080&rettype=xml.xml", "efetch.fcgi?db=pubmed&id=22351513&retmode=xml.xml", - "efetch.fcgi?db=pubmed&id=23121403&retmode=xml.xml" + "efetch.fcgi?db=pubmed&id=20412080&rettype=xml.xml", + "efetch.fcgi?db=pubmed&id=22351513&retmode=xml.xml", + "efetch.fcgi?db=pubmed&id=23121403&retmode=xml.xml", ] - pmasets = [PubmedArticleSet(le.parse(os.path.join(data_dir, relpath)).getroot()) for relpath in relpaths] + pmasets = [ + PubmedArticleSet(le.parse(os.path.join(data_dir, relpath)).getroot()) + for relpath in relpaths + ] # # Copyright 2015 eutils Committers diff --git a/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticle.py b/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticle.py index 9d3247b..22e01c6 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticle.py +++ b/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticle.py @@ -5,50 +5,68 @@ class PubmedCentralArticle(Base): - _root_tag = "article" def __str__(self): - return ("{pmca.__class__.__name__}(pmc={pmca.pmc};pmid={pmca.pmid};doi={pmca.doi};{pmca.title})".format(pmca=self)) + return "{pmca.__class__.__name__}(pmc={pmca.pmc};pmid={pmca.pmid};doi={pmca.doi};{pmca.title})".format( + pmca=self + ) @property def title(self): - return ''.join([x for x in xml_get1(self._xml_root, "front/article-meta/title-group/article-title").itertext()]) + return "".join( + [ + x + for x in xml_get1( + self._xml_root, "front/article-meta/title-group/article-title" + ).itertext() + ] + ) @property def abstract_text(self): - return ''.join([x for x in xml_get1(self._xml_root, "front/article-meta/abstract").itertext()]) + return "".join( + [x for x in xml_get1(self._xml_root, "front/article-meta/abstract").itertext()] + ) @property def body_text(self): body = self._xml_root.xpath("body") if body: parts = [x for x in body[0].itertext()] - return ''.join(parts) + return "".join(parts) else: return None @property def doi(self): - return xml_get_text_or_none(self._xml_root, 'front/article-meta/article-id[@pub-id-type="doi"]') + return xml_get_text_or_none( + self._xml_root, 'front/article-meta/article-id[@pub-id-type="doi"]' + ) @property def pmid(self): - return xml_get_text_or_none(self._xml_root, 'front/article-meta/article-id[@pub-id-type="pmid"]') + return xml_get_text_or_none( + self._xml_root, 'front/article-meta/article-id[@pub-id-type="pmid"]' + ) @property def pmc(self): - return xml_get_text_or_none(self._xml_root, 'front/article-meta/article-id[@pub-id-type="pmc"]') + return xml_get_text_or_none( + self._xml_root, 'front/article-meta/article-id[@pub-id-type="pmc"]' + ) if __name__ == "__main__": from .xmlfacades.pubmedcentralarticleset import PubmedCentralArticleSet import lxml.etree as le import os + data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests", "data") relpaths = [ - "efetch.fcgi?db=pmc&id=3299399&rettype=xml.xml", "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml", - "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml" + "efetch.fcgi?db=pmc&id=3299399&rettype=xml.xml", + "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml", + "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml", ] path = os.path.join(data_dir, relpaths[0]) pmcas = PubmedCentralArticleSet(le.parse(path).getroot()) diff --git a/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticleset.py b/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticleset.py index f9e1104..00c8bb0 100644 --- a/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticleset.py +++ b/src/biocommons/eutils/_internal/xmlfacades/pubmedcentralarticleset.py @@ -5,7 +5,6 @@ class PubmedCentralArticleSet(Base): - _root_tag = "pmc-articleset" def __iter__(self): @@ -18,11 +17,15 @@ def __iter__(self): data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests", "data") relpaths = [ - "efetch.fcgi?db=pmc&id=3299399&rettype=xml.xml", "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml", - "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml" + "efetch.fcgi?db=pmc&id=3299399&rettype=xml.xml", + "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml", + "efetch.fcgi?db=pmc&id=3299399&retmode=xml.xml", ] - pmcasets = [PubmedCentralArticleSet(le.parse(os.path.join(data_dir, relpath)).getroot()) for relpath in relpaths] + pmcasets = [ + PubmedCentralArticleSet(le.parse(os.path.join(data_dir, relpath)).getroot()) + for relpath in relpaths + ] # # Copyright 2015 eutils Committers diff --git a/src/biocommons/eutils/sketchy/clientx.py b/src/biocommons/eutils/sketchy/clientx.py index e345c9e..1840c78 100644 --- a/src/biocommons/eutils/sketchy/clientx.py +++ b/src/biocommons/eutils/sketchy/clientx.py @@ -13,16 +13,23 @@ class ClientX(Client): """ def fetch_gene_by_hgnc(self, hgnc): - query = 'human[orgn] AND {hgnc}[preferred symbol] AND "current only"[Filter]'.format(hgnc=hgnc) + query = 'human[orgn] AND {hgnc}[preferred symbol] AND "current only"[Filter]'.format( + hgnc=hgnc + ) esr = self.esearch(db="gene", term=query) if esr.count != 1: - raise EutilsError("Received {n} search replies for gene {hgnc} (query: '{query}')".format( - n=esr.count, - hgnc=hgnc, - query=query)) + raise EutilsError( + "Received {n} search replies for gene {hgnc} (query: '{query}')".format( + n=esr.count, hgnc=hgnc, query=query + ) + ) gene = next(iter(self.efetch(db="gene", id=esr.ids[0]))) if hgnc != gene.hgnc: - raise EutilsError("Queried for {q_hgnc}, got reply for gene {r_hgnc}".format(q_hgnc=hgnc, r_hgnc=gene.hgnc)) + raise EutilsError( + "Queried for {q_hgnc}, got reply for gene {r_hgnc}".format( + q_hgnc=hgnc, r_hgnc=gene.hgnc + ) + ) return gene def fetch_nuccore_by_ac(self, acv): @@ -30,12 +37,20 @@ def fetch_nuccore_by_ac(self, acv): db = "nuccore" esr = self.esearch(db=db, term=query) if esr.count > 1: - raise EutilsError("Received {n} replies for {acv} in database {db}".format(n=esr.count, acv=acv, db=db)) + raise EutilsError( + "Received {n} replies for {acv} in database {db}".format( + n=esr.count, acv=acv, db=db + ) + ) if esr.count == 0: - raise EutilsNotFoundError("No results for {query} in database {db}".format(query=query, db=db)) + raise EutilsNotFoundError( + "No results for {query} in database {db}".format(query=query, db=db) + ) gbseq = next(iter(self.efetch(db=db, id=esr.ids[0]))) if acv != gbseq.acv: - raise EutilsNCBIError("Queried for {q_acv}, got reply for {r_acv}".format(q_acv=acv, r_acv=gbseq.acv)) + raise EutilsNCBIError( + "Queried for {q_acv}, got reply for {r_acv}".format(q_acv=acv, r_acv=gbseq.acv) + ) return gbseq fetch_gbseq_by_ac = fetch_nuccore_by_ac @@ -44,10 +59,11 @@ def fetch_snps_for_gene(self, hgnc, organism="human"): db = "snp" esr = self.esearch(db=db, term="%s[gene name] AND %s[organism]" % (hgnc, organism)) if esr.count == 0: - raise EutilsNotFoundError("No results for gene {hgnc} and organism {o} in database {db}".format( - hgnc=hgnc, - o=organism, - db=db)) + raise EutilsNotFoundError( + "No results for gene {hgnc} and organism {o} in database {db}".format( + hgnc=hgnc, o=organism, db=db + ) + ) return next(iter(self.efetch(db=db, id=",".join(map(str, esr.ids))))) diff --git a/tests/conftest.py b/tests/conftest.py index 41160d1..a04e57e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,7 @@ import biocommons.eutils import logging + logging.basicConfig() logger = logging.getLogger("vcr") logger.setLevel(logging.DEBUG) @@ -16,7 +17,7 @@ vcr.default_vcr = vcr.VCR( cassette_library_dir=test_data_dir, filter_headers=["Authorization"], - filter_post_data_parameters=['Authorization'], + filter_post_data_parameters=["Authorization"], record_mode=os.environ.get("VCR_RECORD_MODE", "once"), ) vcr.use_cassette = vcr.default_vcr.use_cassette diff --git a/tests/test_eutils_queryservice.py b/tests/test_eutils_queryservice.py index 9e2ae79..7fbecc5 100644 --- a/tests/test_eutils_queryservice.py +++ b/tests/test_eutils_queryservice.py @@ -22,15 +22,13 @@ def assert_in_xml(xml, item): def parse_related_pmids_result(xmlstr): - """helper function for elink test. - - """ + """helper function for elink test.""" outd = {} dom = etree.fromstring(xmlstr) - for linkset in dom.findall('LinkSet/LinkSetDb'): - heading = linkset.find('LinkName').text.split('_')[-1] + for linkset in dom.findall("LinkSet/LinkSetDb"): + heading = linkset.find("LinkName").text.split("_")[-1] outd[heading] = [] - for Id in linkset.findall('Link/Id'): + for Id in linkset.findall("Link/Id"): outd[heading].append(Id.text) return outd @@ -54,25 +52,25 @@ def tearDown(self): @vcr.use_cassette def test_efetch(self): - '''Testing efetch.fcgi by looking up a known pubmed article.''' + """Testing efetch.fcgi by looking up a known pubmed article.""" pmid = 1234567 - result = self.qs.efetch(args={'db': 'pubmed', 'id': pmid}) + result = self.qs.efetch(args={"db": "pubmed", "id": pmid}) assert_in_xml(result, str(pmid)) @vcr.use_cassette def test_esearch(self): - '''Testing esearch.fcgi by searching medgen db for concepts related to OCRL gene.''' + """Testing esearch.fcgi by searching medgen db for concepts related to OCRL gene.""" # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=medgen&term=OCRL - result = self.qs.esearch({'db': 'medgen', 'term': 'OCRL'}) + result = self.qs.esearch({"db": "medgen", "term": "OCRL"}) # eSearchResult should contain something like this: - #76375433686733632216805618145 - assert_in_xml(result, 'IdList') + # 76375433686733632216805618145 + assert_in_xml(result, "IdList") @vcr.use_cassette def test_elink(self): - '''Testing elink.fcgi by looking up related pmids in pubmed.''' + """Testing elink.fcgi by looking up related pmids in pubmed.""" # Expected response should contain the following information: # * pubmed (all related links) @@ -81,28 +79,28 @@ def test_elink(self): # * reviews (review papers that cite this paper) # * combined (?) - xmlstr = self.qs.elink({'dbfrom': 'pubmed', 'id': 1234567, 'cmd': 'neighbor'}) + xmlstr = self.qs.elink({"dbfrom": "pubmed", "id": 1234567, "cmd": "neighbor"}) resd = parse_related_pmids_result(xmlstr) - assert 'pubmed' in resd.keys() + assert "pubmed" in resd.keys() @vcr.use_cassette def test_esummary(self): - '''Testing esummary.fcgi by looking up a known medgen concept''' + """Testing esummary.fcgi by looking up a known medgen concept""" # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=medgen&id=336867 - result = self.qs.esummary({'db': 'medgen', 'id': 336867}) - assert_in_xml(result, 'ConceptId') + result = self.qs.esummary({"db": "medgen", "id": 336867}) + assert_in_xml(result, "ConceptId") @vcr.use_cassette - @patch('biocommons.eutils._internal.queryservice.requests') + @patch("biocommons.eutils._internal.queryservice.requests") def test_handles_malformed_xml_errors(self, mock_requests): post_return_value = MagicMock() post_return_value.status_code = 404 - post_return_value.reason = 'dunno' - post_return_value.test = 'Bad XML' + post_return_value.reason = "dunno" + post_return_value.test = "Bad XML" post_return_value.ok = False mock_requests.post.return_value = post_return_value with self.assertRaises(EutilsNCBIError): pmid = 1234569 - self.qs.efetch(args={'db': 'pubmed', 'id': pmid}) + self.qs.efetch(args={"db": "pubmed", "id": pmid}) diff --git a/tests/test_eutils_xmlfacades_einforesult.py b/tests/test_eutils_xmlfacades_einforesult.py index 8757388..e152360 100644 --- a/tests/test_eutils_xmlfacades_einforesult.py +++ b/tests/test_eutils_xmlfacades_einforesult.py @@ -5,7 +5,7 @@ def test_einfo_dblist(client): dblist_result = client.einfo() - assert 'protein' in dblist_result.databases + assert "protein" in dblist_result.databases assert 40 == len(dblist_result.databases) diff --git a/tests/test_eutils_xmlfacades_gi.py b/tests/test_eutils_xmlfacades_gi.py index 5fc9c88..69214f0 100644 --- a/tests/test_eutils_xmlfacades_gi.py +++ b/tests/test_eutils_xmlfacades_gi.py @@ -3,7 +3,6 @@ @vcr.use_cassette def test_esearchresult(client): - r = next(iter(client.efetch(db="nuccore", id="NM_152783.3"))) # in #150 fails with AttributeError: 'GBSeq' object has no attribute 'seqids' @@ -12,19 +11,21 @@ def test_esearchresult(client): # test for some additional bugs that were fixed together with #150 assert 4 == len(r.features.cds.db_xrefs) - for s in ['CCDS:CCDS33426.1', 'GeneID:728294', 'HGNC:HGNC:28358', 'MIM:609186']: + for s in ["CCDS:CCDS33426.1", "GeneID:728294", "HGNC:HGNC:28358", "MIM:609186"]: assert s in r.features.cds.db_xrefs - prot = "MLPRRPLAWPAWLLRGAPGAAGSWGRPVGPLARRGCCSAPGTPE" \ - "VPLTRERYPVRRLPFSTVSKQDLAAFERIVPGGVVTDPEALQAPNVDWLRTLRGCSKV" \ - "LLRPRTSEEVSHILRHCHERNLAVNPQGGNTGMVGGSVPVFDEIILSTARMNRVLSFH" \ - "SVSGILVCQAGCVLEELSRYVEERDFIMPLDLGAKGSCHIGGNVATNAGGLRFLRYGS" \ - "LHGTVLGLEVVLADGTVLDCLTSLRKDNTGYDLKQLFIGSEGTLGIITTVSILCPPKP" \ - "RAVNVAFLGCPGFAEVLQTFSTCKGMLGEILSAFEFMDAVCMQLVGRHLHLASPVQES" \ - "PFYVLIETSGSNAGHDAEKLGHFLEHALGSGLVTDGTMATDQRKVKMLWALRERITEA" \ - "LSRDGYVYKYDLSLPVERLYDIVTDLRARLGPHAKHVVGYGHLGDGNLHLNVTAEAFS" \ - "PSLLAALEPHVYEWTAGQQGSVSAEHGVGFRKRDVLGYSKPPGALQLMQQLKALLDPK" \ - "GILNPYKTLPSQA" + prot = ( + "MLPRRPLAWPAWLLRGAPGAAGSWGRPVGPLARRGCCSAPGTPE" + "VPLTRERYPVRRLPFSTVSKQDLAAFERIVPGGVVTDPEALQAPNVDWLRTLRGCSKV" + "LLRPRTSEEVSHILRHCHERNLAVNPQGGNTGMVGGSVPVFDEIILSTARMNRVLSFH" + "SVSGILVCQAGCVLEELSRYVEERDFIMPLDLGAKGSCHIGGNVATNAGGLRFLRYGS" + "LHGTVLGLEVVLADGTVLDCLTSLRKDNTGYDLKQLFIGSEGTLGIITTVSILCPPKP" + "RAVNVAFLGCPGFAEVLQTFSTCKGMLGEILSAFEFMDAVCMQLVGRHLHLASPVQES" + "PFYVLIETSGSNAGHDAEKLGHFLEHALGSGLVTDGTMATDQRKVKMLWALRERITEA" + "LSRDGYVYKYDLSLPVERLYDIVTDLRARLGPHAKHVVGYGHLGDGNLHLNVTAEAFS" + "PSLLAALEPHVYEWTAGQQGSVSAEHGVGFRKRDVLGYSKPPGALQLMQQLKALLDPK" + "GILNPYKTLPSQA" + ) assert prot == r.features.cds.translation # this returns the ranges diff --git a/tests/test_eutils_xmlfacades_pubmedarticle.py b/tests/test_eutils_xmlfacades_pubmedarticle.py index 7280da7..940d79c 100644 --- a/tests/test_eutils_xmlfacades_pubmedarticle.py +++ b/tests/test_eutils_xmlfacades_pubmedarticle.py @@ -7,23 +7,57 @@ def test_eutils_xmlfacades_pubmedarticle_22528466(client): pa = next(iter(pas)) assert pa.abstract.startswith( - u'Methods necessary for the successful transformation and regeneration of Aloe vera were developed and used to express the human protein, interferon alpha 2 (IFN\u03b12).' + "Methods necessary for the successful transformation and regeneration of Aloe vera were developed and used to express the human protein, interferon alpha 2 (IFN\u03b12)." ) assert set(pa.authors) == set(["Lowther W", "Lorick K", "Lawrence SD", "Yeow WS"]) assert set(pa.chemicals) == set( - ["Antiviral Agents", "IFNA2 protein, human", "Interferon-alpha", "Plant Extracts", "Glucuronidase"]) + [ + "Antiviral Agents", + "IFNA2 protein, human", + "Interferon-alpha", + "Plant Extracts", + "Glucuronidase", + ] + ) assert pa.doi == "10.1007/s11248-012-9616-0" assert pa.issue == "6" assert pa.jrnl == "Transgenic Res" - assert set(pa.mesh_headings) == set([ - "Aloe", "Antiviral Agents", "Encephalomyocarditis virus", "Genome, Plant", "Glucuronidase", "Humans", - "Immunoblotting", "Interferon-alpha", "Plant Extracts", "Plant Leaves", "Plants, Genetically Modified", "Seeds", - "Signal Transduction", "Transgenes" - ]) - assert set(pa.mesh_qualifiers) == set([ - "chemistry", "genetics", "pharmacology", "drug effects", "genetics", "metabolism", "genetics", "metabolism", - "pharmacology", "drug effects", "genetics", "chemistry", "drug effects", "physiology" - ]) + assert set(pa.mesh_headings) == set( + [ + "Aloe", + "Antiviral Agents", + "Encephalomyocarditis virus", + "Genome, Plant", + "Glucuronidase", + "Humans", + "Immunoblotting", + "Interferon-alpha", + "Plant Extracts", + "Plant Leaves", + "Plants, Genetically Modified", + "Seeds", + "Signal Transduction", + "Transgenes", + ] + ) + assert set(pa.mesh_qualifiers) == set( + [ + "chemistry", + "genetics", + "pharmacology", + "drug effects", + "genetics", + "metabolism", + "genetics", + "metabolism", + "pharmacology", + "drug effects", + "genetics", + "chemistry", + "drug effects", + "physiology", + ] + ) assert pa.pages == "1349-57" assert pa.pii is None assert pa.pmc is None @@ -40,40 +74,47 @@ def test_eutils_xmlfacades_pubmedarticle_20412080(client): pas = client.efetch(db="pubmed", id=20412080) pa = next(iter(pas)) - assert pa.abstract.startswith('A standardized, controlled vocabulary allows phenotypic') - assert set(pa.authors) == set(['Robinson PN', 'Mundlos S']) + assert pa.abstract.startswith("A standardized, controlled vocabulary allows phenotypic") + assert set(pa.authors) == set(["Robinson PN", "Mundlos S"]) assert set(pa.chemicals) == set([]) - assert pa.doi == '10.1111/j.1399-0004.2010.01436.x' - assert pa.issue == '6' - assert pa.jrnl == 'Clin Genet' - assert set(pa.mesh_headings) == set([ - 'Algorithms', 'Computational Biology', 'Databases, Genetic', 'Gene Expression', 'Humans', 'Phenotype', - 'Vocabulary, Controlled' - ]) + assert pa.doi == "10.1111/j.1399-0004.2010.01436.x" + assert pa.issue == "6" + assert pa.jrnl == "Clin Genet" + assert set(pa.mesh_headings) == set( + [ + "Algorithms", + "Computational Biology", + "Databases, Genetic", + "Gene Expression", + "Humans", + "Phenotype", + "Vocabulary, Controlled", + ] + ) assert set(pa.mesh_qualifiers) == set(["methods"]) - assert pa.pages == '525-34' - assert pa.pii == 'CGE1436' + assert pa.pages == "525-34" + assert pa.pii == "CGE1436" assert pa.pmc is None - assert pa.pmid == '20412080' + assert pa.pmid == "20412080" assert set(pa.pub_types) == set(["Journal Article", "Review"]) - assert pa.title == 'The human phenotype ontology.' - assert pa.volume == '77' - assert pa.year == '2010' - assert 'PubmedArticle(20412080' in str(pa) + assert pa.title == "The human phenotype ontology." + assert pa.volume == "77" + assert pa.year == "2010" + assert "PubmedArticle(20412080" in str(pa) @vcr.use_cassette def test_eutils_xmlfacades_pubmedarticle_23121403(client): pas = client.efetch(db="pubmed", id=23121403) pa = next(iter(pas)) - assert 'ASPIRE Investigators' in pa.authors + assert "ASPIRE Investigators" in pa.authors @vcr.use_cassette def test_eutils_xmlfacades_pubmedarticle_22351513(client): pas = client.efetch(db="pubmed", id=22351513) pa = next(iter(pas)) - assert 'Mahmooduzzafar' in pa.authors + assert "Mahmooduzzafar" in pa.authors assert pa.abstract.startswith("The oil content and fatty acid composition") assert pa.abstract.endswith("edible vegetable oil after toxicological studies.") @@ -82,7 +123,9 @@ def test_eutils_xmlfacades_pubmedarticle_22351513(client): def test_eutils_xmlfacades_pubmedarticle_29915538(client): pas = client.efetch(db="pubmed", id=29915538) pa = next(iter(pas)) - assert pa.abstract.startswith("Background: Semaglutide, a newly once-weekly glucagon like peptide-1 (GLP-1)") + assert pa.abstract.startswith( + "Background: Semaglutide, a newly once-weekly glucagon like peptide-1 (GLP-1)" + ) assert pa.abstract.endswith("GLP-1 receptor agonists of exenatide release and dulaglutide.") assert "Semaglutide" in pa.abstract assert "Results" in pa.abstract diff --git a/tests/test_eutils_xmlfacades_pubmedcentralarticle.py b/tests/test_eutils_xmlfacades_pubmedcentralarticle.py index 6992a69..3126e99 100644 --- a/tests/test_eutils_xmlfacades_pubmedcentralarticle.py +++ b/tests/test_eutils_xmlfacades_pubmedcentralarticle.py @@ -6,10 +6,15 @@ def test_eutils_xmlfacades_pubmedcentralarticle_PMC3299399(client): pas = client.efetch(db="pmc", id=3299399) pa = next(iter(pas)) - assert pa.pmc == '3299399' - assert pa.pmid == '22416237' - assert pa.doi == '10.3389/fpsyt.2012.00018' - assert pa.title == 'The Effects of Psychosis Risk Variants on Brain Connectivity: A Review' - assert 'It is characterized by hallucinations and delusions, reduced emotion and cognitive impairment' in pa.body_text - assert 'PubmedCentralArticle(pmc=3299399;pmid=22416237;doi=10.3389/fpsyt.2012.00018;The Effects of Psychosis Risk Variants on Brain Connectivity: A Review)' in str( - pa) + assert pa.pmc == "3299399" + assert pa.pmid == "22416237" + assert pa.doi == "10.3389/fpsyt.2012.00018" + assert pa.title == "The Effects of Psychosis Risk Variants on Brain Connectivity: A Review" + assert ( + "It is characterized by hallucinations and delusions, reduced emotion and cognitive impairment" + in pa.body_text + ) + assert ( + "PubmedCentralArticle(pmc=3299399;pmid=22416237;doi=10.3389/fpsyt.2012.00018;The Effects of Psychosis Risk Variants on Brain Connectivity: A Review)" + in str(pa) + ) diff --git a/tests/test_rcore_sqllitecache.py b/tests/test_rcore_sqllitecache.py index 89837f1..04a1d44 100644 --- a/tests/test_rcore_sqllitecache.py +++ b/tests/test_rcore_sqllitecache.py @@ -10,7 +10,7 @@ class Test_SQLiteCacheBase(unittest.TestCase): def setUp(self): - _, self._fn = tempfile.mkstemp(suffix='.db') + _, self._fn = tempfile.mkstemp(suffix=".db") atexit.register(lambda: os.remove(self._fn)) @@ -19,17 +19,17 @@ def setUp(self): class Test_SQLiteCache_AttrLookup(Test_SQLiteCacheBase): def test_str_str(self): - k, v = 'key1', 'text' + k, v = "key1", "text" self.cache[k] = v assert v == self.cache[k] def test_str_int(self): - k, v = 'key2', 2 + k, v = "key2", 2 self.cache[k] = v assert v == self.cache[k] def test_int_str(self): - k, v = 3, 'val4' + k, v = 3, "val4" self.cache[k] = v assert v == self.cache[k] @@ -52,33 +52,33 @@ def test_None_int(self): class Test_SQLiteCache_Dir(Test_SQLiteCacheBase): def setUp(self): super(Test_SQLiteCache_Dir, self).setUp() - self.cache['a'] = 'a' - self.cache['b'] = 'b' - self.cache['b'] = 'b2' - self.cache['c'] = 'c' + self.cache["a"] = "a" + self.cache["b"] = "b" + self.cache["b"] = "b2" + self.cache["c"] = "c" def test_dir(self): - assert set(['a', 'b', 'c']) == set(dir(self.cache)) + assert set(["a", "b", "c"]) == set(dir(self.cache)) def test_in(self): - assert 'a' in self.cache - assert 'b' in self.cache - assert 'c' in self.cache + assert "a" in self.cache + assert "b" in self.cache + assert "c" in self.cache class Test_SQLiteCache_Expire(Test_SQLiteCacheBase): def test_expire(self): - self.cache['a'] = 'a' - self.cache['b'] = 'b' + self.cache["a"] = "a" + self.cache["b"] = "b" time.sleep(5) - self.cache['b'] = 'b2' - self.cache['c'] = 'c' + self.cache["b"] = "b2" + self.cache["c"] = "c" - assert set(['a', 'b', 'c']) == set(dir(self.cache)) + assert set(["a", "b", "c"]) == set(dir(self.cache)) self.cache.expire(3) # b was updated and should be younger than 3 seconds old - assert set(['b', 'c']) == set(dir(self.cache)) + assert set(["b", "c"]) == set(dir(self.cache)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main()