Merge pull request #52 from USEPA/develop

v0.3.3
USEPA · Apr 23, 2024 · e6f27d4 · e6f27d4
2 parents 6249841 + 8b9c005
commit e6f27d4
Show file tree

Hide file tree

Showing 6 changed files with 136 additions and 6 deletions.
diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
@@ -48,10 +48,13 @@ jobs:
         python -m pip install --upgrade pip setuptools wheel
         pip install pytest pytest-cov flake8
 
-    # install package & dependencies
     - name: Install package and dependencies
-      run: |
-        pip install .
+      if: ${{ matrix.py-version == '3.7' || matrix.py-version == '3.8' }}
+      run: pip install .
+
+    - name: Install package and dependencies
+      if: ${{ !(matrix.py-version == '3.7' || matrix.py-version == '3.8') }}
+      run: pip install .[bib]
 
     # linting & pytest
     - name: Lint with flake8

diff --git a/esupy/bibtex.py b/esupy/bibtex.py
@@ -0,0 +1,102 @@
+# bibtex.py (esupy)
+# !/usr/bin/env python3
+# coding=utf-8
+
+"""
+Module to support generating sources within the olca_schema.
+"""
+
+from pathlib import Path
+import logging as log
+
+from esupy.util import make_uuid
+
+def generate_sources(bib_path: Path,
+                     bibids: dict
+                     ) -> list:
+    """
+    Generates a list of olca_schema.Source based on requested bib_ids.
+    :param bib_path: Path object to a .bib file containing source information
+    :param bibids: dictionary in the format of {Name: bib_id}, where name is the
+        displayed in the openLCA dashboard.
+    :return: list of olca_schema.Source
+    """
+    if bibids == {}:
+        log.debug("No bibids passed. No sources generated.")
+        return []
+    try:
+        import bibtexparser
+        from bibtexparser.bparser import BibTexParser
+    except ImportError:
+        log.warning("Writing sources requires bibtexparser package")
+        return []
+    try:
+        import olca_schema as o
+    except ImportError:
+        log.warning("Writing sources requires olca_schema package")
+        return []
+
+    def customizations(record):
+        """Use some functions delivered by the library
+
+        :param record: a record
+        :returns: -- customized record
+        """
+        #record = bibtexparser.customization.author(record)
+        record = bibtexparser.customization.add_plaintext_fields(record)
+        record = bibtexparser.customization.doi(record)
+
+        return record
+
+    parser = BibTexParser(common_strings=True)
+    parser.ignore_nonstandard_types = False
+    parser.homogenize_fields = True
+    parser.customization = customizations
+
+    def read_bib_file(path: str):
+        with open(path) as bibtex_file:
+            bib_database = parser.parse_file(bibtex_file)
+
+        return bib_database.entries_dict
+
+
+    def parse_for_olca(bibids, d):
+
+        key_dict = {'description': ['plain_author',
+                                    'plain_publisher',
+                                    'plain_title',
+                                    'plain_journal',
+                                    'year'],
+                    'textReference': '',
+                    'year': 'plain_year',
+                    'url': 'url',
+                    }
+        s = []
+        for bibid, name in bibids.items():
+            try:
+                record = d[bibid]
+            except KeyError:
+                print(f'{bibid} not found')
+                continue
+            source = {}
+            source['name'] = bibids[bibid]
+            for key, value in key_dict.items():
+                try:
+                    if isinstance(value, list):
+                        source[key] = ', '.join([record[v] for v in value if v in record])
+                    else:
+                        source[key] = record[value]
+                except KeyError:
+                    source[key] = ''
+            source['@id'] = make_uuid(source['description'])
+            s.append(o.Source.from_dict(source))
+        return s
+
+    d = read_bib_file(bib_path)
+    source_list = parse_for_olca(bibids, d)
+    return source_list
+
+if __name__ == "__main__":
+    source_list = generate_sources(
+        bib_path = Path(__file__).parents[1] / 'tests' / 'test.bib',
+        bibids = {'bare_traci_2011': 'TRACI 2.1'})
diff --git a/esupy/processed_data_mgmt.py b/esupy/processed_data_mgmt.py
@@ -336,7 +336,7 @@ def parse_data_commons_index(df):
     df['ext'] = df['file_name'].str.rsplit('.', n=1, expand=True)[1]
     df['file'] = df['file_name'].str.rsplit('.', n=1, expand=True)[0]
     df['git_hash'] = df['file'].str.rsplit('_', n=1, expand=True)[1]
-    df['git_hash'].fillna('', inplace=True)
+    df['git_hash'] = df['git_hash'].fillna('')
     df.loc[df['git_hash'].map(len) != 7, 'git_hash'] = ''
     try:
         df['version'] = (df['file']

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='esupy',
-    version='0.3.2',
+    version='0.3.3',
     packages=['esupy'],
     include_package_data=True,
     python_requires=">=3.7",
@@ -16,7 +16,9 @@
                       'boto3>=1.23.0',
                       ],
     extras_require={"urban_rural": ['geopandas>=0.13.2',
-                                    'shapely>=2.0.1']},
+                                    'shapely>=2.0.1'],
+                    "bib": ['olca_schema>=0.0.11',
+                            'bibtexparser>=1.2']},
     url='http://github.com/usepa/esupy',
     license='CC0',
     author='Wesley Ingwersen',

diff --git a/tests/test.bib b/tests/test.bib
@@ -0,0 +1,10 @@
+@article{bare_traci_2011,
+	title = {{TRACI} 2.0: the tool for the reduction and assessment of chemical and other environmental impacts 2.0},
+	volume = {13},
+	pages = {687--696},
+	number = {5},
+	journal = {Clean Technologies and Environmental Policy},
+	author = {Bare, Jane},
+	doi = {10.1007/s10098-010-0338-9},
+	year = {2011}
+}
diff --git a/tests/test_esupy.py b/tests/test_esupy.py
@@ -1,6 +1,11 @@
 """Test functions"""
 
+import pytest
+import sys
+from pathlib import Path
+
 import esupy.processed_data_mgmt as es_dt
+import esupy.bibtex as bibtex
 
 
 def test_data_commons_access():
@@ -21,3 +26,11 @@ def test_data_commons_access():
     df2 = es_dt.load_preprocessed_output(meta, path)
 
     assert(df1 is not None and df2 is None)
+
+
+@pytest.mark.skipif(sys.version_info < (3, 9), reason="bibliographies require python3.9 or higher")
+def test_source_generation():
+    source_list = bibtex.generate_sources(
+        bib_path = Path(__file__).parents[1] / 'tests' / 'test.bib',
+        bibids = {'bare_traci_2011': 'TRACI 2.1'})
+    assert(len(source_list) == 1)