From eca8a6f39741179b257b1aefb192939e295b57f3 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sat, 27 Apr 2024 05:43:05 +0200 Subject: [PATCH 1/9] test: add tests for default and custom keys --- .github/workflows/python-publish.yml | 4 ++ ...ta.jsonld => test_data_custom_keys.jsonld} | 0 tests/data/test_data_default_keys.jsonld | 33 +++++++++ tests/deserialization_test.py | 69 +++++++++++++++++++ 4 files changed, 106 insertions(+) rename tests/data/{test_data.jsonld => test_data_custom_keys.jsonld} (100%) create mode 100644 tests/data/test_data_default_keys.jsonld create mode 100644 tests/deserialization_test.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index bdaab28..fb10236 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -30,6 +30,10 @@ jobs: run: | python -m pip install --upgrade pip pip install build + - name: Test with pytest + run: | + pip install pytest + pytest - name: Build package run: python -m build - name: Publish package diff --git a/tests/data/test_data.jsonld b/tests/data/test_data_custom_keys.jsonld similarity index 100% rename from tests/data/test_data.jsonld rename to tests/data/test_data_custom_keys.jsonld diff --git a/tests/data/test_data_default_keys.jsonld b/tests/data/test_data_default_keys.jsonld new file mode 100644 index 0000000..3a57ae3 --- /dev/null +++ b/tests/data/test_data_default_keys.jsonld @@ -0,0 +1,33 @@ +{ + "@context": { + "qudt": "http://qudt.org/schema/qudt/", + "qunit": "http://qudt.org/vocab/unit/", + "qkind": "http://qudt.org/vocab/quantkind/", + "unit": { + "@id": "qudt:hasUnit", + "@type": "@id" + }, + "quantity": { + "@id": "qudt:hasQuantityKind", + "@type": "@id" + }, + "value": "qudt:value", + "band_gap": "http://example.org/BandGap", + "mass": "http://example.org/Mass", + "type": "@type", + "sample_id": "@id" + }, + "sample_id": "HUB_HaNa_20240424_Sample-1", + "band_gap" : { + "type": "qudt:Quantity", + "quantity": "qkind:GapEnergy", + "value": 1.9, + "unit": "qunit:EV" + }, + "mass": { + "type": "qudt:Quantity", + "quantity": "qkind:Mass", + "value": 3201.3, + "unit": "qunit:MilliGM" + } + } \ No newline at end of file diff --git a/tests/deserialization_test.py b/tests/deserialization_test.py new file mode 100644 index 0000000..9f92d69 --- /dev/null +++ b/tests/deserialization_test.py @@ -0,0 +1,69 @@ +import os +import ontopint +import json +import pint + +def _load_test_data(file_name: str) -> dict: + """loads a json file from the test data folder + + Parameters + ---------- + file_name + the name of the json file + + Returns + ------- + the content of the file as dict + """ + data = {} + with open(os.path.join(os.path.dirname(__file__), "data", file_name)) as f: + data = json.load(f) + return data + +def _recursive_items(dictionary: dict): + """Returns a generator of tuples for every key-value pair in the dict + + Parameters + ---------- + dictionary + any (nested) dict + + Yields + ------ + iterator for key-value tuples of the dict + """ + for key, value in dictionary.items(): + if type(value) is dict: + yield (key, value) + yield from _recursive_items(value) + else: + yield (key, value) + +def test_default_keys(): + """test input data with default keys 'value' and 'unit' + """ + input_jsonld = _load_test_data("test_data_default_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + del parsed_jsonld["@context"] + parse_values_count = 0 + for key, value in _recursive_items(parsed_jsonld): + if key == "value": + assert(isinstance(value, pint.Quantity)) + parse_values_count += 1 + if key == "unit": assert False, "unit key should not be present" + assert parse_values_count == 2, "result should contain 2 parsed values" + +def test_custom_keys(): + """test input data with custom keys 'my_value' and 'my_unit' + """ + input_jsonld = _load_test_data("test_data_custom_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + del parsed_jsonld["@context"] + parse_values_count = 0 + for key, value in _recursive_items(parsed_jsonld): + if key == "my_value": + assert(isinstance(value, pint.Quantity)) + parse_values_count += 1 + if key == "my_unit": assert False, "my_unit key should not be present" + assert parse_values_count == 2, "result should contain 2 parsed values" + \ No newline at end of file From 0398bd1cb215ef62ce4470eca0e858780b51a291 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sat, 27 Apr 2024 05:43:05 +0200 Subject: [PATCH 2/9] test: add tests for default and custom keys --- .github/workflows/python-publish.yml | 4 ++ .github/workflows/python-test.yml | 32 +++++++++ ...ta.jsonld => test_data_custom_keys.jsonld} | 0 tests/data/test_data_default_keys.jsonld | 33 +++++++++ tests/deserialization_test.py | 69 +++++++++++++++++++ 5 files changed, 138 insertions(+) create mode 100644 .github/workflows/python-test.yml rename tests/data/{test_data.jsonld => test_data_custom_keys.jsonld} (100%) create mode 100644 tests/data/test_data_default_keys.jsonld create mode 100644 tests/deserialization_test.py diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index bdaab28..fb10236 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -30,6 +30,10 @@ jobs: run: | python -m pip install --upgrade pip pip install build + - name: Test with pytest + run: | + pip install pytest + pytest - name: Build package run: python -m build - name: Publish package diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml new file mode 100644 index 0000000..e4a082a --- /dev/null +++ b/.github/workflows/python-test.yml @@ -0,0 +1,32 @@ +# This workflow will run pytest only + +name: Run pytest + +on: + push: + pull_request: + workflow_dispatch: + + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Test with pytest + run: | + pip install pytest + pytest diff --git a/tests/data/test_data.jsonld b/tests/data/test_data_custom_keys.jsonld similarity index 100% rename from tests/data/test_data.jsonld rename to tests/data/test_data_custom_keys.jsonld diff --git a/tests/data/test_data_default_keys.jsonld b/tests/data/test_data_default_keys.jsonld new file mode 100644 index 0000000..3a57ae3 --- /dev/null +++ b/tests/data/test_data_default_keys.jsonld @@ -0,0 +1,33 @@ +{ + "@context": { + "qudt": "http://qudt.org/schema/qudt/", + "qunit": "http://qudt.org/vocab/unit/", + "qkind": "http://qudt.org/vocab/quantkind/", + "unit": { + "@id": "qudt:hasUnit", + "@type": "@id" + }, + "quantity": { + "@id": "qudt:hasQuantityKind", + "@type": "@id" + }, + "value": "qudt:value", + "band_gap": "http://example.org/BandGap", + "mass": "http://example.org/Mass", + "type": "@type", + "sample_id": "@id" + }, + "sample_id": "HUB_HaNa_20240424_Sample-1", + "band_gap" : { + "type": "qudt:Quantity", + "quantity": "qkind:GapEnergy", + "value": 1.9, + "unit": "qunit:EV" + }, + "mass": { + "type": "qudt:Quantity", + "quantity": "qkind:Mass", + "value": 3201.3, + "unit": "qunit:MilliGM" + } + } \ No newline at end of file diff --git a/tests/deserialization_test.py b/tests/deserialization_test.py new file mode 100644 index 0000000..9f92d69 --- /dev/null +++ b/tests/deserialization_test.py @@ -0,0 +1,69 @@ +import os +import ontopint +import json +import pint + +def _load_test_data(file_name: str) -> dict: + """loads a json file from the test data folder + + Parameters + ---------- + file_name + the name of the json file + + Returns + ------- + the content of the file as dict + """ + data = {} + with open(os.path.join(os.path.dirname(__file__), "data", file_name)) as f: + data = json.load(f) + return data + +def _recursive_items(dictionary: dict): + """Returns a generator of tuples for every key-value pair in the dict + + Parameters + ---------- + dictionary + any (nested) dict + + Yields + ------ + iterator for key-value tuples of the dict + """ + for key, value in dictionary.items(): + if type(value) is dict: + yield (key, value) + yield from _recursive_items(value) + else: + yield (key, value) + +def test_default_keys(): + """test input data with default keys 'value' and 'unit' + """ + input_jsonld = _load_test_data("test_data_default_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + del parsed_jsonld["@context"] + parse_values_count = 0 + for key, value in _recursive_items(parsed_jsonld): + if key == "value": + assert(isinstance(value, pint.Quantity)) + parse_values_count += 1 + if key == "unit": assert False, "unit key should not be present" + assert parse_values_count == 2, "result should contain 2 parsed values" + +def test_custom_keys(): + """test input data with custom keys 'my_value' and 'my_unit' + """ + input_jsonld = _load_test_data("test_data_custom_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + del parsed_jsonld["@context"] + parse_values_count = 0 + for key, value in _recursive_items(parsed_jsonld): + if key == "my_value": + assert(isinstance(value, pint.Quantity)) + parse_values_count += 1 + if key == "my_unit": assert False, "my_unit key should not be present" + assert parse_values_count == 2, "result should contain 2 parsed values" + \ No newline at end of file From cb547a9db1b7f2a395e2898f774d705f58cebb29 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sat, 27 Apr 2024 06:51:12 +0200 Subject: [PATCH 3/9] feat: use processing context --- src/ontopint/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index a842e1a..b6b56e3 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -18,10 +18,6 @@ 'value': 'qudt:value', } -HAS_UNIT = 'http://qudt.org/schema/qudt/hasUnit' -VALUE = 'http://qudt.org/schema/qudt/value' - - class UnitDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) @@ -51,9 +47,13 @@ def object_hook(self, obj): def _replace_units(obj, context, original_key_lookup_dict): if isinstance(obj, dict): - expanded_obj = jsonld.expand({**obj, '@context': context}, context) - if HAS_UNIT in expanded_obj[0] and VALUE in expanded_obj[0]: - unit_iri = expanded_obj[0][HAS_UNIT][0]['@id'] + expanded_obj = jsonld.expand({**obj, "@context": context}, context) + compacted_obj = jsonld.compact(expanded_obj, processing_context) + if 'unit' in compacted_obj and 'value' in compacted_obj: + # note: "urn:ontopint:iri" is just any iri not existing in the input data + unit_iri = jsonld.expand( + {"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": compacted_obj["unit"]}, {} + )[0]["urn:ontopint:iri"][0]["@id"] obj.pop(original_key_lookup_dict['unit']) graph = rdflib.Graph() graph.parse(unit_iri) From 2b9afd2819d15d3ec991e663345b0c3d7d2aa5c9 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sat, 27 Apr 2024 12:15:42 +0200 Subject: [PATCH 4/9] refactor: move ucum lookup to function --- src/ontopint/__init__.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index b6b56e3..d36c0db 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -18,6 +18,14 @@ 'value': 'qudt:value', } +def get_ucum_code_from_unit_iri(unit_iri): + graph = rdflib.Graph() + graph.parse(unit_iri) + result = graph.query( + f'SELECT * WHERE {{<{unit_iri}> ?ucumCode}}' + ) + ucum_code = str(result.bindings[0]['ucumCode']) + return ucum_code class UnitDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) @@ -55,14 +63,9 @@ def _replace_units(obj, context, original_key_lookup_dict): {"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": compacted_obj["unit"]}, {} )[0]["urn:ontopint:iri"][0]["@id"] obj.pop(original_key_lookup_dict['unit']) - graph = rdflib.Graph() - graph.parse(unit_iri) - result = graph.query( - f'SELECT * WHERE {{<{unit_iri}> ?ucumCode}}' - ) - unit = result.bindings[0]['ucumCode'] + ucum_code = get_ucum_code_from_unit_iri(unit_iri) obj[original_key_lookup_dict['value']] = ureg.Quantity( - obj[original_key_lookup_dict['value']], ureg.from_ucum(unit) + obj[original_key_lookup_dict['value']], ureg.from_ucum(ucum_code) ) for key, value in obj.items(): obj[key] = _replace_units(value, context, original_key_lookup_dict) From de99abfa18b3524a759dd90f786b856b357930f9 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sat, 27 Apr 2024 12:17:18 +0200 Subject: [PATCH 5/9] feat: add iri lookup and tests --- pyproject.toml | 1 + src/ontopint/__init__.py | 24 ++++++++++++++++++++++++ tests/010_api_test.py | 15 +++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 tests/010_api_test.py diff --git a/pyproject.toml b/pyproject.toml index ce10a88..23cd397 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ classifiers = [ ] dependencies = [ "rdflib", + "sparqlwrapper", "pint", "pyld", "ucumvert", diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index d36c0db..3b7889d 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -1,5 +1,6 @@ import json +import SPARQLWrapper import rdflib from pyld import jsonld @@ -26,6 +27,29 @@ def get_ucum_code_from_unit_iri(unit_iri): ) ucum_code = str(result.bindings[0]['ucumCode']) return ucum_code + +def get_qunit_iri_from_unit_code(code, is_ucum_code = False): + sparql = SPARQLWrapper.SPARQLWrapper("https://www.qudt.org/fuseki/qudt/sparql") + + sparql.setMethod(SPARQLWrapper.POST) + code = "'" + code + "'" + query = """ + SELECT ?subject + WHERE { + ?subject <{{{predicate}}}> {{{code}}} . + } + LIMIT 1 + """.replace( + "{{{predicate}}}", "http://qudt.org/schema/qudt/ucumCode" if is_ucum_code else "http://qudt.org/schema/qudt/symbol" + ).replace( + "{{{code}}}", code + "^^" if is_ucum_code else code + ) + sparql.setQuery(query) + sparql.setReturnFormat(SPARQLWrapper.JSON) + result = sparql.query().convert() + result = result['results']['bindings'][0]['subject']['value'] + return result + class UnitDecoder(json.JSONDecoder): def __init__(self, *args, **kwargs): json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) diff --git a/tests/010_api_test.py b/tests/010_api_test.py new file mode 100644 index 0000000..97d85df --- /dev/null +++ b/tests/010_api_test.py @@ -0,0 +1,15 @@ + +import pint +from ontopint import get_qunit_iri_from_unit_code, get_ucum_code_from_unit_iri +import ontopint + +def test_pint_print_formats(): + # see https://pint.readthedocs.io/en/stable/user/formatting.html + q : pint.Quantity = pint.Quantity(1.0, ontopint.ureg.from_ucum("kg")) + assert( float(format(q, 'f#~').split(' ')[0]) == 1.0) + assert( format(q.u, '~') == "kg") + +def test_qudt_sparql_api(): + assert (get_qunit_iri_from_unit_code("kg") == "http://qudt.org/vocab/unit/KiloGM") + assert (get_qunit_iri_from_unit_code("kg", True) == "http://qudt.org/vocab/unit/KiloGM") + assert (get_ucum_code_from_unit_iri("http://qudt.org/vocab/unit/KiloGM") == "kg") \ No newline at end of file From 04ae420c2400f6fbd4b5da720e0e72378f4be9d3 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sat, 27 Apr 2024 12:20:48 +0200 Subject: [PATCH 6/9] feat: implement serialization Refs: #2 --- pyproject.toml | 1 + src/ontopint/__init__.py | 46 +++++++++++++++++- ...on_test.py => 020_deserialization_test.py} | 38 +-------------- tests/030_serialization_test.py | 47 +++++++++++++++++++ tests/040_roundtrip_test.py | 24 ++++++++++ tests/common.py | 39 +++++++++++++++ 6 files changed, 157 insertions(+), 38 deletions(-) rename tests/{deserialization_test.py => 020_deserialization_test.py} (58%) create mode 100644 tests/030_serialization_test.py create mode 100644 tests/040_roundtrip_test.py create mode 100644 tests/common.py diff --git a/pyproject.toml b/pyproject.toml index 23cd397..0434cd8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dependencies = [ [project.optional-dependencies] dev = [ "pytest", + "deepdiff", ] [project.license] diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index 3b7889d..0712e95 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -6,6 +6,7 @@ # from pint import UnitRegistry from ucumvert import PintUcumRegistry +import pint # ureg = UnitRegistry() ureg = PintUcumRegistry() @@ -100,6 +101,33 @@ def _replace_units(obj, context, original_key_lookup_dict): ] else: return obj + +def _serialize_units(obj, context, original_key_lookup_dict): + if isinstance(obj, dict): + for key in list(obj.keys()): # make a list copy in order to delete keys while iterating + value = obj[key] + if (isinstance(value, pint.Quantity)): + # see https://pint.readthedocs.io/en/stable/user/formatting.html + quantity_value = float(format(value, 'f#~').split(' ')[0]) + unit_code = format(value.u, '~') + # ToDo: use ucum code + unit_iri = get_qunit_iri_from_unit_code(unit_code) + # note: "urn:ontopint:iri" is just any iri not existing in the input data + unit_compact_iri = jsonld.compact( + {"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": unit_iri}, + {**context, "urn:ontopint:iri": {"@type": "@id"}} + )["urn:ontopint:iri"] + obj[original_key_lookup_dict['value']] = quantity_value + obj[original_key_lookup_dict['unit']] = unit_compact_iri + + else: obj[key] = _serialize_units(value, context, original_key_lookup_dict) + return obj + elif isinstance(obj, list): + return [ + _serialize_units(value, context, original_key_lookup_dict) for value in obj + ] + else: + return obj def parse_units(json_ld: dict) -> dict: @@ -113,5 +141,21 @@ def parse_units(json_ld: dict) -> dict: # reverse the dict original_key_lookup_dict = {v: k for k, v in compacted.items()} parsed_json = _replace_units(json_ld, original_context, original_key_lookup_dict) - parsed_json['@context'] = original_context + parsed_json = {'@context': original_context, **parsed_json} + json_ld['@context'] = original_context # restore context + return parsed_json + +def export_units(json_ld: dict, context = processing_context) -> dict: + original_context = json_ld.pop('@context', context) + key_dict = {'@context': processing_context, 'unit': 'unit', 'value': 'value'} + # inverse expand-reverse cycle + expanded = jsonld.expand(key_dict, processing_context) + compacted = jsonld.compact(expanded, original_context) + # remove the context + del compacted['@context'] + # reverse the dict + original_key_lookup_dict = {v: k for k, v in compacted.items()} + parsed_json = _serialize_units(json_ld, original_context, original_key_lookup_dict) + parsed_json = {'@context': original_context, **parsed_json} + json_ld['@context'] = original_context # restore context return parsed_json diff --git a/tests/deserialization_test.py b/tests/020_deserialization_test.py similarity index 58% rename from tests/deserialization_test.py rename to tests/020_deserialization_test.py index 9f92d69..a401932 100644 --- a/tests/deserialization_test.py +++ b/tests/020_deserialization_test.py @@ -1,43 +1,7 @@ -import os import ontopint -import json import pint -def _load_test_data(file_name: str) -> dict: - """loads a json file from the test data folder - - Parameters - ---------- - file_name - the name of the json file - - Returns - ------- - the content of the file as dict - """ - data = {} - with open(os.path.join(os.path.dirname(__file__), "data", file_name)) as f: - data = json.load(f) - return data - -def _recursive_items(dictionary: dict): - """Returns a generator of tuples for every key-value pair in the dict - - Parameters - ---------- - dictionary - any (nested) dict - - Yields - ------ - iterator for key-value tuples of the dict - """ - for key, value in dictionary.items(): - if type(value) is dict: - yield (key, value) - yield from _recursive_items(value) - else: - yield (key, value) +from common import _load_test_data, _recursive_items def test_default_keys(): """test input data with default keys 'value' and 'unit' diff --git a/tests/030_serialization_test.py b/tests/030_serialization_test.py new file mode 100644 index 0000000..e5d9bca --- /dev/null +++ b/tests/030_serialization_test.py @@ -0,0 +1,47 @@ +import ontopint +import deepdiff + +def test_default_keys(): + """test input data with default keys 'value' and 'unit' + """ + + test = { + "value": ontopint.ureg.Quantity( + 1.123, ontopint.ureg.from_ucum("eV") + ) + } + expected = { + "value": 1.123, + "unit": "qunit:EV" + } + result = ontopint.export_units(test) + del result["@context"] + assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff + +def test_custom_keys(): + """test input data with custom keys 'my_value' and 'my_unit' + """ + test = { + "@context": { + "qudt": "http://qudt.org/schema/qudt/", + "qunit": "http://qudt.org/vocab/unit/", + "qkind": "http://qudt.org/vocab/quantkind/", + "my_unit": { + "@id": "qudt:hasUnit", + "@type": "@id" + }, + "my_value": "qudt:value", + }, + "my_value": ontopint.ureg.Quantity( + 1.123, ontopint.ureg.from_ucum("eV") + ) + } + expected = { + "my_value": 1.123, + "my_unit": "qunit:EV" + } + result = ontopint.export_units(test) + del result["@context"] + assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff + + \ No newline at end of file diff --git a/tests/040_roundtrip_test.py b/tests/040_roundtrip_test.py new file mode 100644 index 0000000..9dbbde0 --- /dev/null +++ b/tests/040_roundtrip_test.py @@ -0,0 +1,24 @@ +import ontopint +import deepdiff + +from common import _load_test_data, _recursive_items + +def test_default_keys(): + """test input data with default keys 'value' and 'unit' + """ + input_jsonld = _load_test_data("test_data_default_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + result = ontopint.export_units(parsed_jsonld) + assert (len(deepdiff.DeepDiff(input_jsonld, result).keys()) == 0) # no diff + +def test_custom_keys(): + """test input data with custom keys 'my_value' and 'my_unit' + """ + input_jsonld = _load_test_data("test_data_custom_keys.jsonld") + parsed_jsonld = ontopint.parse_units(input_jsonld) + result = ontopint.export_units(parsed_jsonld) + diff = deepdiff.DeepDiff(input_jsonld, result) + print(deepdiff.DeepDiff(input_jsonld, result)) + assert (len(deepdiff.DeepDiff(input_jsonld, result).keys()) == 0) # no diff + +test_custom_keys() \ No newline at end of file diff --git a/tests/common.py b/tests/common.py new file mode 100644 index 0000000..874bf6d --- /dev/null +++ b/tests/common.py @@ -0,0 +1,39 @@ +import json +import os + + +def _load_test_data(file_name: str) -> dict: + """loads a json file from the test data folder + + Parameters + ---------- + file_name + the name of the json file + + Returns + ------- + the content of the file as dict + """ + data = {} + with open(os.path.join(os.path.dirname(__file__), "data", file_name)) as f: + data = json.load(f) + return data + +def _recursive_items(dictionary: dict): + """Returns a generator of tuples for every key-value pair in the dict + + Parameters + ---------- + dictionary + any (nested) dict + + Yields + ------ + iterator for key-value tuples of the dict + """ + for key, value in dictionary.items(): + if type(value) is dict: + yield (key, value) + yield from _recursive_items(value) + else: + yield (key, value) \ No newline at end of file From cb9715892bfaf3804f8e701b55c610c2069ddb44 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sun, 28 Apr 2024 17:48:13 +0200 Subject: [PATCH 7/9] fix: restrict type for symbol query reason: e. g. http://qudt.org/vocab/unit/M and http://qudt.org/vocab/quantitykind/Mass share the symbol 'm' --- src/ontopint/__init__.py | 2 ++ tests/010_api_test.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index 0712e95..29fc8a5 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -30,6 +30,7 @@ def get_ucum_code_from_unit_iri(unit_iri): return ucum_code def get_qunit_iri_from_unit_code(code, is_ucum_code = False): + # testing: https://www.qudt.org/fuseki/#/dataset/qudt/query sparql = SPARQLWrapper.SPARQLWrapper("https://www.qudt.org/fuseki/qudt/sparql") sparql.setMethod(SPARQLWrapper.POST) @@ -37,6 +38,7 @@ def get_qunit_iri_from_unit_code(code, is_ucum_code = False): query = """ SELECT ?subject WHERE { + ?subject . ?subject <{{{predicate}}}> {{{code}}} . } LIMIT 1 diff --git a/tests/010_api_test.py b/tests/010_api_test.py index 97d85df..f554ecf 100644 --- a/tests/010_api_test.py +++ b/tests/010_api_test.py @@ -12,4 +12,7 @@ def test_pint_print_formats(): def test_qudt_sparql_api(): assert (get_qunit_iri_from_unit_code("kg") == "http://qudt.org/vocab/unit/KiloGM") assert (get_qunit_iri_from_unit_code("kg", True) == "http://qudt.org/vocab/unit/KiloGM") - assert (get_ucum_code_from_unit_iri("http://qudt.org/vocab/unit/KiloGM") == "kg") \ No newline at end of file + assert (get_ucum_code_from_unit_iri("http://qudt.org/vocab/unit/KiloGM") == "kg") + + assert (get_qunit_iri_from_unit_code("m") == "http://qudt.org/vocab/unit/M") + assert (get_qunit_iri_from_unit_code("m", True) == "http://qudt.org/vocab/unit/M") \ No newline at end of file From c44f6e34ea45d7be18cdfe80743c83b3ff9f9667 Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sun, 28 Apr 2024 18:39:06 +0200 Subject: [PATCH 8/9] fix: unit code extraction '#' format code may convert to base unit, remove it --- src/ontopint/__init__.py | 3 ++- tests/010_api_test.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index 29fc8a5..7df3e50 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -110,7 +110,8 @@ def _serialize_units(obj, context, original_key_lookup_dict): value = obj[key] if (isinstance(value, pint.Quantity)): # see https://pint.readthedocs.io/en/stable/user/formatting.html - quantity_value = float(format(value, 'f#~').split(' ')[0]) + # value = value.to_base_units() # this will not work until we have ucum support + quantity_value = float(format(value, 'f~').split(' ')[0]) unit_code = format(value.u, '~') # ToDo: use ucum code unit_iri = get_qunit_iri_from_unit_code(unit_code) diff --git a/tests/010_api_test.py b/tests/010_api_test.py index f554ecf..fa15e6a 100644 --- a/tests/010_api_test.py +++ b/tests/010_api_test.py @@ -5,9 +5,15 @@ def test_pint_print_formats(): # see https://pint.readthedocs.io/en/stable/user/formatting.html - q : pint.Quantity = pint.Quantity(1.0, ontopint.ureg.from_ucum("kg")) - assert( float(format(q, 'f#~').split(' ')[0]) == 1.0) - assert( format(q.u, '~') == "kg") + q : pint.Quantity = pint.Quantity(1.0, ontopint.ureg.from_ucum("kg")).to_base_units() + assert( float(format(q, 'f~').split(' ')[0]) == 1.0) + assert( format(q.u, '~') == "kg") + q : pint.Quantity = pint.Quantity(304, ontopint.ureg.from_ucum("cm")) + assert( float(format(q, 'f~').split(' ')[0]) == 304) + assert( format(q, 'f~').split(' ')[1] == "cm") + q : pint.Quantity = pint.Quantity(10, ontopint.ureg.from_ucum("eV")) + assert( float(format(q, 'f~').split(' ')[0]) == 10) + assert( format(q.u, '~') == "eV") def test_qudt_sparql_api(): assert (get_qunit_iri_from_unit_code("kg") == "http://qudt.org/vocab/unit/KiloGM") From 908501aba9f25a40650321485148e513d5e0589e Mon Sep 17 00:00:00 2001 From: SimonTaurus Date: Sun, 28 Apr 2024 18:39:28 +0200 Subject: [PATCH 9/9] docs: add example in README --- README.md | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 15375cb..56a3760 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,75 @@ # ontopint -A python package for reading units from a JSON-LD files and generating pint quantities. +A python package for reading & writing units from a JSON-LD files and generating pint quantities. + +## How it works + +```python +import ontopint + +# jsonld input with 'value' and 'unit' mapped to qudt terms +data = { + "@context": { + "qudt": "http://qudt.org/schema/qudt/", + "qunit": "http://qudt.org/vocab/unit/", + "qkind": "http://qudt.org/vocab/quantkind/", + "unit": { + "@id": "qudt:hasUnit", + "@type": "@id" + }, + "quantity": { + "@id": "qudt:hasQuantityKind", + "@type": "@id" + }, + "value": "qudt:value" + }, + "value": 4.0, + "unit": "qunit:CentiM" +} + +# convert the value + unit pairs to pint.Quantity +data = ontopint.parse_units(data) +print(data) +""" +{ + '@context': {...}, + 'value': +} +""" + +# do something with pint +data["value"] += 3 * ontopint.ureg.meter +data["value"] = data["value"].to(ontopint.ureg.meter) +print(data) +""" +{ + '@context': {...}, + 'value': +} +""" + +# export the result as jsonld +data = ontopint.export_units(data) +print(data) +""" +{ + "@context": { + "qudt": "http://qudt.org/schema/qudt/", + "qunit": "http://qudt.org/vocab/unit/", + "qkind": "http://qudt.org/vocab/quantkind/", + "unit": { + "@id": "qudt:hasUnit", + "@type": "@id" + }, + "quantity": { + "@id": "qudt:hasQuantityKind", + "@type": "@id" + }, + "value": "qudt:value" + }, + "value": 3.04, + "unit": "qunit:M" +} +""" +``` + +Note: more complex examples can be found at [tests/data](https://github.com/hampusnasstrom/ontopint/tree/main/tests/data)