diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9e3ce99..e52cff2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,17 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + exclude: "^.*drawio|.*.svg$" + - id: check-yaml + - id: check-added-large-files - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.263 + rev: v0.1.0 + hooks: + - id: ruff +- repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.10.0 hooks: - - id: ruff + - id: black diff --git a/pyalex/api.py b/pyalex/api.py index 9427be0..5b2649d 100644 --- a/pyalex/api.py +++ b/pyalex/api.py @@ -21,7 +21,7 @@ def __setattr__(self, key, value): config = AlexConfig( email=None, - api_key=None, + api_key=None, openalex_url="https://api.openalex.org", max_retries=0, retry_backoff_factor=0.1, @@ -30,9 +30,7 @@ def __setattr__(self, key, value): def _flatten_kv(d, prefix=""): - if isinstance(d, dict): - t = [] for k, v in d.items(): if isinstance(v, list): @@ -44,7 +42,6 @@ def _flatten_kv(d, prefix=""): return ",".join(t) else: - # workaround for bug https://groups.google.com/u/1/g/openalex-users/c/t46RWnzZaXc d = str(d).lower() if isinstance(d, bool) else d @@ -52,7 +49,6 @@ def _flatten_kv(d, prefix=""): def _params_merge(params, add_params): - for k, _v in add_params.items(): if ( k in params @@ -81,7 +77,6 @@ def _params_merge(params, add_params): def invert_abstract(inv_index): - if inv_index is not None: l_inv = [(w, p) for w, pos in inv_index.items() for p in pos] return " ".join(map(lambda x: x[0], sorted(l_inv, key=lambda x: x[1]))) @@ -94,13 +89,12 @@ def get_requests_session(): total=config.max_retries, backoff_factor=config.retry_backoff_factor, status_forcelist=config.retry_http_codes, - allowed_methods={'GET'}, + allowed_methods={"GET"}, ) requests_session.mount( - 'https://', - requests.adapters.HTTPAdapter(max_retries=retries) + "https://", requests.adapters.HTTPAdapter(max_retries=retries) ) - + return requests_session @@ -109,7 +103,6 @@ class QueryError(ValueError): class OpenAlexEntity(dict): - pass @@ -117,14 +110,12 @@ class Work(OpenAlexEntity): """OpenAlex work object.""" def __getitem__(self, key): - if key == "abstract": return invert_abstract(self["abstract_inverted_index"]) return super().__getitem__(key) def ngrams(self, return_meta=False): - openalex_id = self["id"].split("/")[-1] res = get_requests_session().get( @@ -164,11 +155,11 @@ class Publisher(OpenAlexEntity): class Funder(OpenAlexEntity): pass + # deprecated def Venue(*args, **kwargs): - # warn about deprecation warnings.warn( "Venue is deprecated. Use Sources instead.", @@ -181,20 +172,17 @@ def Venue(*args, **kwargs): class CursorPaginator: def __init__(self, alex_class=None, per_page=None, cursor="*", n_max=None): - self.alex_class = alex_class self.per_page = per_page self.cursor = cursor self.n_max = n_max def __iter__(self): - self.n = 0 return self def __next__(self): - if self.n_max and self.n >= self.n_max: raise StopIteration @@ -216,19 +204,15 @@ class BaseOpenAlex: """Base class for OpenAlex objects.""" def __init__(self, params=None): - self.params = params def _get_multi_items(self, record_list): - return self.filter(openalex_id="|".join(record_list)).get() def _full_collection_name(self): - return config.openalex_url + "/" + self.__class__.__name__.lower() def __getattr__(self, key): - if key == "groupby": raise AttributeError( "Object has no attribute 'groupby'. " "Did you mean 'group_by'?" @@ -243,7 +227,6 @@ def __getattr__(self, key): return getattr(self, key) def __getitem__(self, record_id): - if isinstance(record_id, list): return self._get_multi_items(record_id) @@ -261,13 +244,11 @@ def __getitem__(self, record_id): @property def url(self): - if not self.params: return self._full_collection_name() l_params = [] for k, v in self.params.items(): - if v is None: pass elif isinstance(v, list): @@ -289,7 +270,6 @@ def count(self): return m["count"] def get(self, return_meta=False, page=None, per_page=None, cursor=None): - if per_page is not None and (per_page < 1 or per_page > 200): raise ValueError("per_page should be a number between 1 and 200.") @@ -348,11 +328,9 @@ def paginate(self, per_page=None, cursor="*", n_max=10000): return CursorPaginator(self, per_page=per_page, cursor=cursor, n_max=n_max) def random(self): - return self.__getitem__("random") def _add_params(self, argument, new_params): - if self.params is None: self.params = {argument: new_params} elif argument in self.params and isinstance(self.params[argument], dict): @@ -363,38 +341,31 @@ def _add_params(self, argument, new_params): logging.debug("Params updated:", self.params) def filter(self, **kwargs): - self._add_params("filter", kwargs) return self def search_filter(self, **kwargs): - self._add_params("filter", {f"{k}.search": v for k, v in kwargs.items()}) return self def sort(self, **kwargs): - self._add_params("sort", kwargs) return self def group_by(self, group_key): - self._add_params("group-by", group_key) return self def search(self, s): - self._add_params("search", s) return self def sample(self, n, seed=None): - self._add_params("sample", n) self._add_params("seed", seed) return self def select(self, s): - self._add_params("select", s) return self @@ -426,11 +397,11 @@ class Publishers(BaseOpenAlex): class Funders(BaseOpenAlex): resource_class = Funder + # deprecated def Venues(*args, **kwargs): - # warn about deprecation warnings.warn( "Venues is deprecated. Use Sources instead.", diff --git a/tests/test_pyalex.py b/tests/test_pyalex.py index 05575fe..1002ba6 100644 --- a/tests/test_pyalex.py +++ b/tests/test_pyalex.py @@ -28,7 +28,6 @@ def test_config(): def test_meta_entities(): - _, m = Authors().get(return_meta=True) assert "count" in m _, m = Concepts().get(return_meta=True) @@ -42,44 +41,37 @@ def test_meta_entities(): _, m = Funders().get(return_meta=True) assert "count" in m -def test_works_params(): +def test_works_params(): assert len(Works(params={"filter": {"publication_year": "2020"}}).get()) == 25 def test_works(): - assert len(Works().filter(publication_year=2020).get()) == 25 def test_works_count(): - assert Works().filter(publication_year=2020).count() > 10_000_000 def test_per_page(): - assert len(Works().filter(publication_year=2020).get(per_page=200)) == 200 def test_W4238809453_works(): - assert isinstance(Works()["W4238809453"], Work) assert Works()["W4238809453"]["doi"] == "https://doi.org/10.1001/jama.264.8.944b" def test_W4238809453_works_abstract(): - assert Works()["W4238809453"]["abstract"] is None def test_W4238809453_works_no_abstract(): - assert "abstract" not in Works()["W4238809453"] def test_W3128349626_works_abstract(): - w = Works()["W3128349626"] assert w["abstract"] is not None @@ -87,7 +79,6 @@ def test_W3128349626_works_abstract(): def test_W3128349626_works_no_abstract(): - w = Works()["W3128349626"] assert w["abstract_inverted_index"] is not None @@ -95,18 +86,15 @@ def test_W3128349626_works_no_abstract(): def test_work_error(): - with pytest.raises(HTTPError): Works()["NotAWorkID"] def test_random_works(): - assert isinstance(Works().random(), dict) def test_multi_works(): - # the work to extract the referenced works of w = Works()["W2741809807"] @@ -114,7 +102,6 @@ def test_multi_works(): def test_works_multifilter(): - r = requests.get( "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true" ).json() @@ -137,7 +124,6 @@ def test_works_multifilter(): def test_works_url(): - url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true" assert url == Works().filter(publication_year=2020, is_oa=True).url @@ -147,7 +133,6 @@ def test_works_url(): def test_works_multifilter_meta(): - _, m1 = Works().filter(publication_year=2020, is_oa=True).get(return_meta=True) _, m2 = ( Works().filter(publication_year=2020).filter(is_oa=True).get(return_meta=True) @@ -157,13 +142,11 @@ def test_works_multifilter_meta(): def test_query_error(): - with pytest.raises(QueryError): Works().filter(publication_year_error=2020).get() def test_data_publications(): - w, _ = ( Works() .filter(authorships={"institutions": {"ror": "04pp8hn57"}}) @@ -176,7 +159,6 @@ def test_data_publications(): def test_search(): - w = ( Works() .search( @@ -190,7 +172,6 @@ def test_search(): def test_search_filter(): - r = requests.get( "https://api.openalex.org/authors?filter=display_name.search:einstein" ).json() @@ -201,7 +182,6 @@ def test_search_filter(): def test_cursor_by_hand(): - # example query query = Authors().search_filter(display_name="einstein") @@ -212,7 +192,6 @@ def test_cursor_by_hand(): # loop till next_cursor is None while next_cursor is not None: - # get the results r, m = query.get(return_meta=True, per_page=200, cursor=next_cursor) @@ -226,7 +205,6 @@ def test_cursor_by_hand(): def test_basic_paging(): - # example query query = Authors().search_filter(display_name="einstein") @@ -238,7 +216,6 @@ def test_basic_paging(): # loop till page is None while page is not None: - # get the results r, m = query.get(return_meta=True, per_page=200, page=page) @@ -250,17 +227,14 @@ def test_basic_paging(): def test_cursor_paging(): - # example query pager = Authors().search_filter(display_name="einstein").paginate(per_page=200) for page in pager: - assert len(page) >= 1 and len(page) <= 200 def test_cursor_paging_n_max(): - # example query pager = ( Authors() @@ -270,14 +244,12 @@ def test_cursor_paging_n_max(): n = 0 for page in pager: - n = n + len(page) assert n == 400 def test_cursor_paging_n_max_none(): - # example query pager = ( Authors() @@ -287,12 +259,10 @@ def test_cursor_paging_n_max_none(): n = 0 for page in pager: - n = n + len(page) def test_referenced_works(): - # the work to extract the referenced works of w = Works()["W2741809807"] @@ -307,7 +277,6 @@ def test_referenced_works(): @pytest.mark.xfail() def test_code_examples(): - # /works?filter=institutions.is_global_south:true,type:dataset&group-by=institutions.country_code # noqa # /works?filter=institutions.is_global_south:true,type:dataset&group-by=institutions.country_code&sort=count:desc # noqa @@ -330,7 +299,6 @@ def test_code_examples(): def test_serializable(tmpdir): - with open(Path(tmpdir, "test.json"), "w") as f: json.dump(Works()["W4238809453"], f) @@ -339,26 +307,22 @@ def test_serializable(tmpdir): def test_ngrams_without_metadata(): - r = Works()["W2023271753"].ngrams(return_meta=False) assert len(r) == 1068 def test_ngrams_with_metadata(): - r, meta = Works()["W2023271753"].ngrams(return_meta=True) assert meta["count"] == 1068 def test_random_publishers(): - assert isinstance(Publishers().random(), dict) def test_and_operator(): - # https://github.com/J535D165/pyalex/issues/11 url = "https://api.openalex.org/works?filter=institutions.country_code:tw,institutions.country_code:hk,institutions.country_code:us,publication_year:2022" # noqa @@ -390,13 +354,11 @@ def test_and_operator(): def test_sample(): - url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true&sample=50" assert url == Works().filter(publication_year=2020, is_oa=True).sample(50).url def test_sample_seed(): - url = "https://api.openalex.org/works?filter=publication_year:2020,is_oa:true&sample=50&seed=535" # noqa assert ( url @@ -405,6 +367,5 @@ def test_sample_seed(): def test_subset(): - url = "https://api.openalex.org/works?select=id,doi,display_name" assert url == Works().select(["id", "doi", "display_name"]).url