diff --git a/docs/usage.rst b/docs/usage.rst index 69fce4e4..410f0002 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -123,13 +123,21 @@ Additional resources, hosted either externally or locally, can be linked with th sub.add_additional_resource("Web page with auxiliary material", "https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/STDM-2012-02/") sub.add_additional_resource("Some file", "root_file.root", copy_file=True) + sub.add_additional_resource("Some file", "root_file.root", copy_file=True, resource_license={"name": "CC BY 4.0", "url": "https://creativecommons.org/licenses/by/4.0/", "description": "This license enables reusers to distribute, remix, adapt, and build upon the material in any medium or format, so long as attribution is given to the creator."}) sub.add_additional_resource("Archive of full likelihoods in the HistFactory JSON format", "Likelihoods.tar.gz", copy_file=True, file_type="HistFactory") The first argument is a ``description`` and the second is the ``location`` of the external link or local resource file. The optional argument ``copy_file=True`` (default value of ``False``) will copy a local file into the output directory. +The optional argument ``resource_license`` can be used to define a data license for an additional resource. +The ``resource_license`` is in the form of a dictionary with mandatory string ``name`` and ``url`` values, and an optional ``description``. The optional argument ``file_type="HistFactory"`` (default value of ``None``) can be used to identify statistical models provided in the HistFactory JSON format rather than relying on certain trigger words in the ``description`` (see `pyhf section of submission documentation`_). +**Please note:** The default license applied to all data uploaded to HEPData is `CC0`_. You do not +need to specify a license for a resource file unless it differs from `CC0`_. + +.. _`CC0`: https://creativecommons.org/public-domain/cc0/ + The ``add_link`` function can alternatively be used to add a link to an external resource: :: @@ -320,6 +328,20 @@ The documentation for this feature can be found here: `Linking tables`_. .. _`Linking tables`: https://hepdata-submission.readthedocs.io/en/latest/bidirectional.html#linking-tables +Adding a data license +^^^^^^^^^^^^^^^^^^^^^ + +You can add data license information to a table using the ``add_data_license`` function of the Table class. +This function takes mandatory ``name`` and ``url`` string arguments, as well as an optional ``description``. + +**Please note:** The default license applied to all data uploaded to HEPData is `CC0`_. You do not +need to specify a license for a data table unless it differs from `CC0`_. + +:: + + table.add_data_license("CC BY 4.0", "https://creativecommons.org/licenses/by/4.0/") + table.add_data_license("CC BY 4.0", "https://creativecommons.org/licenses/by/4.0/", "This license enables reusers to distribute, remix, adapt, and build upon the material in any medium or format, so long as attribution is given to the creator.") + Uncertainties +++++++++++++ diff --git a/examples/Getting_started.ipynb b/examples/Getting_started.ipynb index 7e8a7a56..767d57f1 100644 --- a/examples/Getting_started.ipynb +++ b/examples/Getting_started.ipynb @@ -27,8 +27,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Welcome to JupyROOT 6.26/06\n", - "hepdata_lib version 0.14.1\n" + "Welcome to JupyROOT 6.30/04\n", + "hepdata_lib version 0.15.0\n" ] } ], @@ -278,7 +278,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This is all that's needed for the table/figure. We still need it to the submission:" + "This is all that's needed for the table/figure. We still need to add it to the submission:" ] }, { @@ -372,6 +372,152 @@ "source": [ "!ls example_output" ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---\n", + "additional_resources:\n", + "- description: Created with hepdata_lib 0.15.0\n", + " location: https://doi.org/10.5281/zenodo.1217998\n", + "- description: Webpage with all figures and tables\n", + " location: https://cms-results.web.cern.ch/cms-results/public-results/publications/B2G-16-029/\n", + "- description: arXiv\n", + " location: http://arxiv.org/abs/arXiv:1802.09407\n", + "- description: Original abstract file\n", + " location: abstract.txt\n", + "comment: A search for a new heavy particle decaying to a pair of vector bosons (WW\n", + " or WZ) is presented using data from the CMS detector corresponding to an integrated\n", + " luminosity of $35.9~\\mathrm{fb}^{-1}$ collected in proton-proton collisions at a\n", + " centre-of-mass energy of 13~TeV in 2016. One of the bosons is required to be a W\n", + " boson decaying to $e\\nu$ or $mu\\nu$, while the other boson is required to be reconstructed\n", + " as a single massive jet with substructure compatible with that of a highly-energetic\n", + " quark pair from a W or Z boson decay. The search is performed in the resonance mass\n", + " range between 1.0 and 4.5~TeV. The largest deviation from the background-only hypothesis\n", + " is observed for a mass near 1.4~TeV and corresponds to a local significance of 2.5\n", + " standard deviations. The result is interpreted as an upper bound on the resonance\n", + " production cross section. Comparing the excluded cross section values and the expectations\n", + " from theoretical calculations in the bulk graviton and heavy vector triplet models,\n", + " spin-2 WW resonances with mass smaller than 1.07~TeV and spin-1 WZ resonances lighter\n", + " than 3.05~TeV, respectively, are excluded at 95\\% confidence level.\n", + "data_license:\n", + " description: CC0 enables reusers to distribute, remix, adapt, and build upon the\n", + " material in any medium or format, with no conditions.\n", + " name: CC0\n", + " url: https://creativecommons.org/publicdomain/zero/1.0/\n", + "record_ids:\n", + "- id: 1657397\n", + " type: inspire\n", + "---\n", + "additional_resources:\n", + "- description: Original data file\n", + " location: effacc_signal.txt\n", + "- description: Image file\n", + " location: signalEffVsMass.png\n", + "- description: Thumbnail image file\n", + " location: thumb_signalEffVsMass.png\n", + "data_file: additional_figure_1.yaml\n", + "description: Signal selection efficiency times acceptance as a function of resonance\n", + " mass for a spin-2 bulk graviton decaying to WW and a spin-1 W' decaying to WZ.\n", + "keywords:\n", + "- name: observables\n", + " values:\n", + " - ACC\n", + " - EFF\n", + "- name: reactions\n", + " values:\n", + " - P P --> GRAVITON --> W+ W-\n", + " - P P --> WPRIME --> W+/W- Z0\n", + "- name: cmenergies\n", + " values:\n", + " - 13000\n", + "location: Data from additional Figure 1\n", + "name: Additional Figure 1\n" + ] + } + ], + "source": [ + "!cat example_output/submission.yaml" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dependent_variables:\n", + "- header:\n", + " name: Efficiency times acceptance\n", + " qualifiers:\n", + " - name: Efficiency times acceptance\n", + " value: Bulk graviton --> WW\n", + " - name: SQRT(S)\n", + " units: TeV\n", + " value: 13\n", + " values:\n", + " - value: 0.4651\n", + " - value: 0.50336\n", + " - value: 0.5126\n", + " - value: 0.52474\n", + " - value: 0.531\n", + " - value: 0.5391\n", + " - value: 0.54943\n", + " - value: 0.55378\n", + " - value: 0.56216\n", + " - value: 0.56454\n", + " - value: 0.56682\n", + "- header:\n", + " name: Efficiency times acceptance\n", + " qualifiers:\n", + " - name: Efficiency times acceptance\n", + " value: Wprime --> WZ\n", + " - name: SQRT(S)\n", + " units: TeV\n", + " value: 13\n", + " values:\n", + " - value: 0.45136\n", + " - value: 0.5109\n", + " - value: 0.54016\n", + " - value: 0.5513\n", + " - value: 0.56724\n", + " - value: 0.5728\n", + " - value: 0.5856\n", + " - value: 0.58952\n", + " - value: 0.60324\n", + " - value: .nan\n", + " - value: 0.59978\n", + "independent_variables:\n", + "- header:\n", + " name: Resonance mass\n", + " units: GeV\n", + " values:\n", + " - value: 1000.0\n", + " - value: 1200.0\n", + " - value: 1400.0\n", + " - value: 1600.0\n", + " - value: 1800.0\n", + " - value: 2000.0\n", + " - value: 2500.0\n", + " - value: 3000.0\n", + " - value: 3500.0\n", + " - value: 4000.0\n", + " - value: 4500.0\n" + ] + } + ], + "source": [ + "!cat example_output/additional_figure_1.yaml" + ] } ], "metadata": { @@ -390,7 +536,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/hepdata_lib/__init__.py b/hepdata_lib/__init__.py index 84d50eda..e919b5f7 100644 --- a/hepdata_lib/__init__.py +++ b/hepdata_lib/__init__.py @@ -55,7 +55,8 @@ def __init__(self): self.files_to_copy = [] self.additional_resources = [] - def add_additional_resource(self, description, location, copy_file=False, file_type=None): + def add_additional_resource(self, description, location, copy_file=False, file_type=None, + resource_license=None): """ Add any kind of additional resource. If copy_file is set to False, the location and description will be added as-is. @@ -80,8 +81,13 @@ def add_additional_resource(self, description, location, copy_file=False, file_t :param file_type: Type of the resource file. Currently, only "HistFactory" has any effect. :type file_type: string + + :param resource_license: License information comprising name, url and optional description. + :type resource_license: dict """ + #pylint: disable=too-many-arguments + resource = {} resource["description"] = description if copy_file: @@ -95,6 +101,28 @@ def add_additional_resource(self, description, location, copy_file=False, file_t if file_type: resource["type"] = file_type + # Confirm that license does not contain extra keys, + # and has the mandatory name and description values + if resource_license: + + if not isinstance(resource_license, dict): + raise ValueError("resource_license must be a dictionary.") + + # Get the license dict keys as a set + license_keys = set(resource_license.keys()) + + # Create sets for both possibilities + mandatory_keys = {"name", "url"} + all_keys = mandatory_keys.union(["description"]) + + # If license matches either of the correct values + if license_keys in (mandatory_keys, all_keys): + resource["license"] = resource_license + else: + raise ValueError("Incorrect resource_license format: " + "resource_license must be a dictionary containing a " + "name, url and optional description.") + self.additional_resources.append(resource) def copy_files(self, outdir): @@ -308,6 +336,7 @@ def __init__(self, name): self.location = "Example location" self.keywords = {} self.image_files = set() + self.data_license = {} @property def name(self): @@ -365,6 +394,34 @@ def add_related_doi(self, doi): else: raise ValueError(f"DOI does not match the correct pattern: {pattern}.") + def add_data_license(self, name, url, description=None): + """ + Verify and store the given license data. + + :param name: The license name + :type name: string + :param url: The license URL + :type url: string + :param description: The (optional) license description + :type description: string + """ + license_data = {} + + if name: + license_data["name"] = name + else: + raise ValueError("You must insert a value for the license's name.") + + if url: + license_data["url"] = url + else: + raise ValueError("You must insert a value for the license's url.") + + if description: + license_data["description"] = description + + self.data_license = license_data + def write_output(self, outdir): """ Write the table files into the output directory. @@ -471,11 +528,14 @@ def write_yaml(self, outdir="."): submission["name"] = self.name submission["description"] = self.description submission["location"] = self.location - submission["related_to_table_dois"] = self.related_tables + if self.related_tables: + submission["related_to_table_dois"] = self.related_tables submission["data_file"] = f'{shortname}.yaml' submission["keywords"] = [] if self.additional_resources: submission["additional_resources"] = self.additional_resources + if self.data_license: + submission["data_license"] = self.data_license for name, values in list(self.keywords.items()): submission["keywords"].append({"name": name, "values": values}) @@ -509,11 +569,11 @@ def __init__(self): def get_license(): """Return the default license.""" data_license = {} - data_license["name"] = "cc-by-4.0" - data_license["url"] = "https://creativecommons.org/licenses/by/4.0/" - data_license[ - "description"] = "The content can be shared and adapted but you must\ - give appropriate credit and cannot restrict access to others." + data_license["name"] = "CC0" + data_license["url"] = "https://creativecommons.org/publicdomain/zero/1.0/" + data_license["description"] = ( + "CC0 enables reusers to distribute, remix, adapt, and build upon the material " + "in any medium or format, with no conditions.") return data_license def add_table(self, table): @@ -612,7 +672,8 @@ def create_files(self, outdir=".", validate=True, remove_old=False): submission = {} submission["data_license"] = self.get_license() submission["comment"] = self.comment - submission["related_to_hepdata_records"] = self.related_records + if self.related_records: + submission["related_to_hepdata_records"] = self.related_records if self.additional_resources: submission["additional_resources"] = self.additional_resources diff --git a/tests/test_submission.py b/tests/test_submission.py index d750e931..6bee2c01 100644 --- a/tests/test_submission.py +++ b/tests/test_submission.py @@ -71,11 +71,26 @@ def test_additional_resource_size(self): def test_create_files(self): """Test create_files() for Submission.""" + # Set test directory/file pat testdir = tmp_directory_name() + testpath = "./testfile.txt" + + with open(testpath, "a", encoding="utf-8") as f: + f.close() + + self.addCleanup(os.remove, testpath) + + # Create submission and set values for testing test_submission = Submission() test_submission.add_record_id(1657397, "inspire") - tab = Table("test") - test_submission.add_table(tab) + test_submission.add_related_recid(111) + test_submission.add_additional_resource("Some description", testpath, + copy_file=True, file_type="HistFactory") + # Create table and set test values + test_table = Table("test") + test_table.add_related_doi("10.17182/hepdata.1.v1/t1") + test_submission.add_table(test_table) + test_submission.create_files(testdir) self.doCleanups() @@ -157,7 +172,7 @@ def test_nested_files_to_copy(self): def test_add_related_doi(self): """Test insertion and retrieval of recid values in the Table object""" - # Possibly unneccessary boundary testing + # Possibly unnecessary boundary testing test_data = [ {"doi": "10.17182/hepdata.1.v1/t1", "error": False}, {"doi": "10.17182/hepdata.1", "error": ValueError}, @@ -191,3 +206,22 @@ def test_add_related_recid(self): sub.add_related_recid(test["recid"]) assert int(test["recid"]) == sub.related_records[-1] assert len(sub.related_records) == 2 + + def test_add_data_license(self): + """Test addition of data license entries to the Table class""" + test_data = [ + {"expected_err": None, "data_license": ["name", "url", "desc"]}, # Valid, full + {"expected_err": None, "data_license": ["name", "url", None]}, # Valid, no desc + {"expected_err": ValueError, "data_license": ["name", None, "desc"]}, # Error, no url + {"expected_err": ValueError, "data_license": [None, "url", "desc"]} # Error, no name + ] + tab = Table("Table") # Test table class + for test in test_data: + # Check if an error is expected here or not + if test["expected_err"]: + self.assertRaises(test["expected_err"], tab.add_data_license, *test["data_license"]) + else: + # Check data exists and is correct + tab.add_data_license(*test["data_license"]) + assert tab.data_license["name"] == test["data_license"][0] + assert tab.data_license["url"] == test["data_license"][1] diff --git a/tests/test_table.py b/tests/test_table.py index 3fb95d41..1fa434f9 100644 --- a/tests/test_table.py +++ b/tests/test_table.py @@ -4,7 +4,7 @@ import shutil from unittest import TestCase -from hepdata_lib import Table, Variable, Uncertainty +from hepdata_lib import Table, Variable, Uncertainty, helpers from .test_utilities import tmp_directory_name class TestTable(TestCase): @@ -61,6 +61,12 @@ def test_write_yaml(self): test_table = Table("Some Table") test_variable = Variable("Some Variable") test_table.add_variable(test_variable) + test_table.add_related_doi("10.17182/hepdata.1.v1/t1") + test_table.add_data_license( + name="data_license", + url="test_url" + ) + test_table.keywords = {"name": "keywords", "values": "1"} testdir = tmp_directory_name() self.addCleanup(shutil.rmtree, testdir) try: @@ -173,14 +179,113 @@ def test_write_images_multiple_executions(self): self.assertTrue(modified_time_main < os.path.getmtime(expected_main_file)) self.assertTrue(modified_time_thumbnail < os.path.getmtime(expected_thumbnail_file)) - - - - def test_add_additional_resource(self): """Test the add_additional_resource function.""" test_table = Table("Some Table") - test_table.add_additional_resource("some link","www.cern.ch") + test_data = [ + { + "description": "SomeLink", + "location": "www.cern.ch", + "type": None, + "license": None + }, + { + "description": "SomeLink", + "location": "www.cern.ch", + "type": "HistFactory", + "license": {"name": "licenseName", "url": "www.cern.ch", + "description": "licenseDesc"} + } + ] + + for test in test_data: + test_table.add_additional_resource( + test["description"], + test["location"], + file_type=test["type"], + resource_license=test["license"] + ) + resource = test_table.additional_resources[-1] + + # Check resource and mandatory arguments + assert resource + assert resource["description"] == test["description"] + assert resource["location"] == test["location"] + + # Check optional arguments type and license + if test["type"]: + assert resource["type"] == test["type"] + + if test["license"]: + assert resource["license"] == test["license"] + + def test_add_additional_resource_license_check(self): + """ Test the license value check in Table.add_additional_resource """ + # First two pass, last two fail + license_data = [ + { + "error": None, + "license_data": { + "name": "Name", + "url": "URL" + } + }, + { + "error": None, + "license_data": { + "name": "Name", + "url": "URL", + "description": "Desc" + } + }, + { + "error": ValueError, + "license_data": { + "name": "Name", + "url": "URL", + "shouldnotbehere": "shouldnotbehere" + } + }, + { + "error": ValueError, + "license_data": { + "name": "Name", + "url": "URL", + "description": "Desc", + "toomany": "toomany" + } + }, + { + "error": ValueError, + "license_data": "a string not a dictionary" + }] + + # Create test table and get the test pdf + test_table = Table("Some Table") + some_pdf = f"{os.path.dirname(__file__)}/minimal.pdf" + + # Set default description, location, copy_file and file_type arguments for a resource file + resource_args = ["Description", some_pdf, True, "Type"] + + for data in license_data: + # If error is expected, we check for the error + # Otherwise, just add and check length later + if data["error"]: + with self.assertRaises(ValueError): + test_table.add_additional_resource( + *resource_args, + resource_license=data["license_data"] + ) + else: + # Check for lack of failure + try: + test_table.add_additional_resource( + *resource_args, + resource_license=data["license_data"] + ) + except ValueError: + self.fail("Table.add_additional_resource raised an unexpected ValueError.") + def test_copy_files(self): """Test the copy_files function.""" @@ -189,6 +294,23 @@ def test_copy_files(self): testdir = tmp_directory_name() self.addCleanup(shutil.rmtree, testdir) os.makedirs(testdir) + test_table.add_additional_resource("a plot", some_pdf, copy_file=True) + + # Check that the file has been created + assert helpers.check_file_existence(some_pdf) + + # Explicitly test for lack of an error + try: + # No boundaries + helpers.check_file_size(some_pdf) + # Between boundaries + helpers.check_file_size(some_pdf, upper_limit=1, lower_limit=0.001) + except RuntimeError: + self.fail("Table.check_file_size raised an unexpected RuntimeError.") + + # Check that both boundaries function correctly + with self.assertRaises(RuntimeError): + helpers.check_file_size(some_pdf, upper_limit=0.001) - test_table.add_additional_resource("a plot",some_pdf, copy_file=True) - test_table.copy_files(testdir) + with self.assertRaises(RuntimeError): + helpers.check_file_size(some_pdf, lower_limit=1)