Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add serializer #3

Merged
merged 12 commits into from
Apr 29, 2024
Merged
4 changes: 4 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install build
- name: Test with pytest
run: |
pip install pytest
pytest
- name: Build package
run: python -m build
- name: Publish package
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/python-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# This workflow will run pytest only

name: Run pytest

on:
push:
pull_request:
workflow_dispatch:


permissions:
contents: read

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[dev]
- name: Test with pytest
run: |
pytest
75 changes: 74 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,75 @@
# ontopint
A python package for reading units from a JSON-LD files and generating pint quantities.
A python package for reading & writing units from a JSON-LD files and generating pint quantities.

## How it works

```python
import ontopint

# jsonld input with 'value' and 'unit' mapped to qudt terms
data = {
"@context": {
"qudt": "http://qudt.org/schema/qudt/",
"qunit": "http://qudt.org/vocab/unit/",
"qkind": "http://qudt.org/vocab/quantkind/",
"unit": {
"@id": "qudt:hasUnit",
"@type": "@id"
},
"quantity": {
"@id": "qudt:hasQuantityKind",
"@type": "@id"
},
"value": "qudt:value"
},
"value": 4.0,
"unit": "qunit:CentiM"
}

# convert the value + unit pairs to pint.Quantity
data = ontopint.parse_units(data)
print(data)
"""
{
'@context': {...},
'value': <Quantity(4.0, 'centimeter')>
}
"""

# do something with pint
data["value"] += 3 * ontopint.ureg.meter
data["value"] = data["value"].to(ontopint.ureg.meter)
print(data)
"""
{
'@context': {...},
'value': <Quantity(3.04, 'meter')>
}
"""

# export the result as jsonld
data = ontopint.export_units(data)
print(data)
"""
{
"@context": {
"qudt": "http://qudt.org/schema/qudt/",
"qunit": "http://qudt.org/vocab/unit/",
"qkind": "http://qudt.org/vocab/quantkind/",
"unit": {
"@id": "qudt:hasUnit",
"@type": "@id"
},
"quantity": {
"@id": "qudt:hasQuantityKind",
"@type": "@id"
},
"value": "qudt:value"
},
"value": 3.04,
"unit": "qunit:M"
}
"""
```

Note: more complex examples can be found at [tests/data](https://github.com/hampusnasstrom/ontopint/tree/main/tests/data)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ classifiers = [
]
dependencies = [
"rdflib",
"sparqlwrapper",
"pint",
"pyld",
"ucumvert",
]
[project.optional-dependencies]
dev = [
"pytest",
"deepdiff",
]

[project.license]
Expand Down
100 changes: 87 additions & 13 deletions src/ontopint/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import json

import SPARQLWrapper
import rdflib
from pyld import jsonld

# from pint import UnitRegistry
from ucumvert import PintUcumRegistry
import pint

# ureg = UnitRegistry()
ureg = PintUcumRegistry()
Expand All @@ -18,9 +20,38 @@
'value': 'qudt:value',
}

HAS_UNIT = 'http://qudt.org/schema/qudt/hasUnit'
VALUE = 'http://qudt.org/schema/qudt/value'
def get_ucum_code_from_unit_iri(unit_iri):
graph = rdflib.Graph()
graph.parse(unit_iri)
result = graph.query(
f'SELECT * WHERE {{<{unit_iri}> <http://qudt.org/schema/qudt/ucumCode> ?ucumCode}}'
)
ucum_code = str(result.bindings[0]['ucumCode'])
return ucum_code

def get_qunit_iri_from_unit_code(code, is_ucum_code = False):
# testing: https://www.qudt.org/fuseki/#/dataset/qudt/query
sparql = SPARQLWrapper.SPARQLWrapper("https://www.qudt.org/fuseki/qudt/sparql")

sparql.setMethod(SPARQLWrapper.POST)
code = "'" + code + "'"
query = """
SELECT ?subject
WHERE {
?subject <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://qudt.org/schema/qudt/Unit> .
?subject <{{{predicate}}}> {{{code}}} .
}
LIMIT 1
""".replace(
"{{{predicate}}}", "http://qudt.org/schema/qudt/ucumCode" if is_ucum_code else "http://qudt.org/schema/qudt/symbol"
).replace(
"{{{code}}}", code + "^^<http://qudt.org/schema/qudt/UCUMcs>" if is_ucum_code else code
)
sparql.setQuery(query)
sparql.setReturnFormat(SPARQLWrapper.JSON)
result = sparql.query().convert()
result = result['results']['bindings'][0]['subject']['value']
return result

class UnitDecoder(json.JSONDecoder):
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -51,18 +82,17 @@ def object_hook(self, obj):

def _replace_units(obj, context, original_key_lookup_dict):
if isinstance(obj, dict):
expanded_obj = jsonld.expand({**obj, '@context': context}, context)
if HAS_UNIT in expanded_obj[0] and VALUE in expanded_obj[0]:
unit_iri = expanded_obj[0][HAS_UNIT][0]['@id']
expanded_obj = jsonld.expand({**obj, "@context": context}, context)
compacted_obj = jsonld.compact(expanded_obj, processing_context)
if 'unit' in compacted_obj and 'value' in compacted_obj:
# note: "urn:ontopint:iri" is just any iri not existing in the input data
unit_iri = jsonld.expand(
{"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": compacted_obj["unit"]}, {}
)[0]["urn:ontopint:iri"][0]["@id"]
obj.pop(original_key_lookup_dict['unit'])
graph = rdflib.Graph()
graph.parse(unit_iri)
result = graph.query(
f'SELECT * WHERE {{<{unit_iri}> <http://qudt.org/schema/qudt/symbol> ?ucumCode}}'
)
unit = result.bindings[0]['ucumCode']
ucum_code = get_ucum_code_from_unit_iri(unit_iri)
obj[original_key_lookup_dict['value']] = ureg.Quantity(
obj[original_key_lookup_dict['value']], ureg.from_ucum(unit)
obj[original_key_lookup_dict['value']], ureg.from_ucum(ucum_code)
)
for key, value in obj.items():
obj[key] = _replace_units(value, context, original_key_lookup_dict)
Expand All @@ -73,6 +103,34 @@ def _replace_units(obj, context, original_key_lookup_dict):
]
else:
return obj

def _serialize_units(obj, context, original_key_lookup_dict):
if isinstance(obj, dict):
for key in list(obj.keys()): # make a list copy in order to delete keys while iterating
value = obj[key]
if (isinstance(value, pint.Quantity)):
# see https://pint.readthedocs.io/en/stable/user/formatting.html
# value = value.to_base_units() # this will not work until we have ucum support
quantity_value = float(format(value, 'f~').split(' ')[0])
unit_code = format(value.u, '~')
# ToDo: use ucum code
unit_iri = get_qunit_iri_from_unit_code(unit_code)
# note: "urn:ontopint:iri" is just any iri not existing in the input data
unit_compact_iri = jsonld.compact(
{"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": unit_iri},
{**context, "urn:ontopint:iri": {"@type": "@id"}}
)["urn:ontopint:iri"]
obj[original_key_lookup_dict['value']] = quantity_value
obj[original_key_lookup_dict['unit']] = unit_compact_iri

else: obj[key] = _serialize_units(value, context, original_key_lookup_dict)
return obj
elif isinstance(obj, list):
return [
_serialize_units(value, context, original_key_lookup_dict) for value in obj
]
else:
return obj


def parse_units(json_ld: dict) -> dict:
Expand All @@ -86,5 +144,21 @@ def parse_units(json_ld: dict) -> dict:
# reverse the dict
original_key_lookup_dict = {v: k for k, v in compacted.items()}
parsed_json = _replace_units(json_ld, original_context, original_key_lookup_dict)
parsed_json['@context'] = original_context
parsed_json = {'@context': original_context, **parsed_json}
json_ld['@context'] = original_context # restore context
return parsed_json

def export_units(json_ld: dict, context = processing_context) -> dict:
original_context = json_ld.pop('@context', context)
key_dict = {'@context': processing_context, 'unit': 'unit', 'value': 'value'}
# inverse expand-reverse cycle
expanded = jsonld.expand(key_dict, processing_context)
compacted = jsonld.compact(expanded, original_context)
# remove the context
del compacted['@context']
# reverse the dict
original_key_lookup_dict = {v: k for k, v in compacted.items()}
parsed_json = _serialize_units(json_ld, original_context, original_key_lookup_dict)
parsed_json = {'@context': original_context, **parsed_json}
json_ld['@context'] = original_context # restore context
return parsed_json
24 changes: 24 additions & 0 deletions tests/010_api_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

import pint
from ontopint import get_qunit_iri_from_unit_code, get_ucum_code_from_unit_iri
import ontopint

def test_pint_print_formats():
# see https://pint.readthedocs.io/en/stable/user/formatting.html
q : pint.Quantity = pint.Quantity(1.0, ontopint.ureg.from_ucum("kg")).to_base_units()
assert( float(format(q, 'f~').split(' ')[0]) == 1.0)
assert( format(q.u, '~') == "kg")
q : pint.Quantity = pint.Quantity(304, ontopint.ureg.from_ucum("cm"))
assert( float(format(q, 'f~').split(' ')[0]) == 304)
assert( format(q, 'f~').split(' ')[1] == "cm")
q : pint.Quantity = pint.Quantity(10, ontopint.ureg.from_ucum("eV"))
assert( float(format(q, 'f~').split(' ')[0]) == 10)
assert( format(q.u, '~') == "eV")

def test_qudt_sparql_api():
assert (get_qunit_iri_from_unit_code("kg") == "http://qudt.org/vocab/unit/KiloGM")
assert (get_qunit_iri_from_unit_code("kg", True) == "http://qudt.org/vocab/unit/KiloGM")
assert (get_ucum_code_from_unit_iri("http://qudt.org/vocab/unit/KiloGM") == "kg")

assert (get_qunit_iri_from_unit_code("m") == "http://qudt.org/vocab/unit/M")
assert (get_qunit_iri_from_unit_code("m", True) == "http://qudt.org/vocab/unit/M")
33 changes: 33 additions & 0 deletions tests/020_deserialization_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import ontopint
import pint

from common import _load_test_data, _recursive_items

def test_default_keys():
"""test input data with default keys 'value' and 'unit'
"""
input_jsonld = _load_test_data("test_data_default_keys.jsonld")
parsed_jsonld = ontopint.parse_units(input_jsonld)
del parsed_jsonld["@context"]
parse_values_count = 0
for key, value in _recursive_items(parsed_jsonld):
if key == "value":
assert(isinstance(value, pint.Quantity))
parse_values_count += 1
if key == "unit": assert False, "unit key should not be present"
assert parse_values_count == 2, "result should contain 2 parsed values"

def test_custom_keys():
"""test input data with custom keys 'my_value' and 'my_unit'
"""
input_jsonld = _load_test_data("test_data_custom_keys.jsonld")
parsed_jsonld = ontopint.parse_units(input_jsonld)
del parsed_jsonld["@context"]
parse_values_count = 0
for key, value in _recursive_items(parsed_jsonld):
if key == "my_value":
assert(isinstance(value, pint.Quantity))
parse_values_count += 1
if key == "my_unit": assert False, "my_unit key should not be present"
assert parse_values_count == 2, "result should contain 2 parsed values"

47 changes: 47 additions & 0 deletions tests/030_serialization_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import ontopint
import deepdiff

def test_default_keys():
"""test input data with default keys 'value' and 'unit'
"""

test = {
"value": ontopint.ureg.Quantity(
1.123, ontopint.ureg.from_ucum("eV")
)
}
expected = {
"value": 1.123,
"unit": "qunit:EV"
}
result = ontopint.export_units(test)
del result["@context"]
assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff

def test_custom_keys():
"""test input data with custom keys 'my_value' and 'my_unit'
"""
test = {
"@context": {
"qudt": "http://qudt.org/schema/qudt/",
"qunit": "http://qudt.org/vocab/unit/",
"qkind": "http://qudt.org/vocab/quantkind/",
"my_unit": {
"@id": "qudt:hasUnit",
"@type": "@id"
},
"my_value": "qudt:value",
},
"my_value": ontopint.ureg.Quantity(
1.123, ontopint.ureg.from_ucum("eV")
)
}
expected = {
"my_value": 1.123,
"my_unit": "qunit:EV"
}
result = ontopint.export_units(test)
del result["@context"]
assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff


Loading
Loading