Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BREAKING CHANGE: eliminate rdflib.term.Genid and rdflib.term.RDFLibGenid #2459

Open
wants to merge 6 commits into
base: 8.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,14 +285,13 @@
from rdflib.store import Store
from rdflib.term import (
BNode,
Genid,
IdentifiedNode,
Identifier,
Literal,
Node,
RDFLibGenid,
URIRef,
Variable,
_Deskolemizer,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -475,6 +474,7 @@ def __init__(
self.context_aware = False
self.formula_aware = False
self.default_union = False
self._deskolemizer = _Deskolemizer()

def __getnewargs__(self) -> tuple[Any, ...]:
return (
Expand Down Expand Up @@ -1859,38 +1859,38 @@ def do_skolemize2(t: _TripleType) -> _TripleType:
def de_skolemize(
self, new_graph: Graph | None = None, uriref: URIRef | None = None
) -> Graph:
"""
Return a new graph with skolem IRIs replaced with their blank node mappings.

:param new_graph: An optional target graph where the de-skolemization results
will be stored. If not provided, a new graph is created.
:param uriref: The skolem IRI to be de-skolemized. If not provided,
de-skolemization is applied to all skolem IRIs in the graph.
"""

def do_de_skolemize(uriref: URIRef, t: _TripleType) -> _TripleType:
(s, p, o) = t
if s == uriref:
if TYPE_CHECKING:
assert isinstance(s, URIRef)
s = s.de_skolemize()
s = self._deskolemizer(s)
if o == uriref:
if TYPE_CHECKING:
assert isinstance(o, URIRef)
o = o.de_skolemize()
o = self._deskolemizer(o)
return s, p, o

def do_de_skolemize2(t: _TripleType) -> _TripleType:
(s, p, o) = t

if RDFLibGenid._is_rdflib_skolem(s):
s = RDFLibGenid(s).de_skolemize()
elif Genid._is_external_skolem(s):
s = Genid(s).de_skolemize()

if RDFLibGenid._is_rdflib_skolem(o):
o = RDFLibGenid(o).de_skolemize()
elif Genid._is_external_skolem(o):
o = Genid(o).de_skolemize()

return s, p, o
if TYPE_CHECKING:
assert isinstance(s, URIRef) and isinstance(o, URIRef)
return self._deskolemizer(s), p, self._deskolemizer(o)

retval = Graph() if new_graph is None else new_graph

if uriref is None:
self._process_skolem_tuples(retval, do_de_skolemize2)
elif isinstance(uriref, Genid):
else:
# type error: Argument 1 to "do_de_skolemize" has incompatible type "Optional[URIRef]"; expected "URIRef"
self._process_skolem_tuples(retval, lambda t: do_de_skolemize(uriref, t)) # type: ignore[arg-type, unused-ignore]

Expand Down Expand Up @@ -2539,11 +2539,11 @@ def graph(
base: str | None = None,
) -> Graph:
if identifier is None:
from rdflib.term import _SKOLEM_DEFAULT_AUTHORITY, rdflib_skolem_genid
from rdflib.term import _RDFLIB_GENID_PATH, _SKOLEM_DEFAULT_AUTHORITY

self.bind(
"genid",
_SKOLEM_DEFAULT_AUTHORITY + rdflib_skolem_genid,
_SKOLEM_DEFAULT_AUTHORITY + _RDFLIB_GENID_PATH,
override=False,
)
identifier = BNode().skolemize()
Expand Down
83 changes: 26 additions & 57 deletions rdflib/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
TypeVar,
overload,
)
from urllib.parse import urldefrag, urljoin, urlparse
from urllib.parse import urldefrag, urljoin, urlparse, urlsplit
from uuid import uuid4

import rdflib
Expand Down Expand Up @@ -93,9 +93,9 @@
_SKOLEM_DEFAULT_AUTHORITY = "https://rdflib.github.io"

logger = logging.getLogger(__name__)
skolem_genid = "/.well-known/genid/"
rdflib_skolem_genid = "/.well-known/genid/rdflib/"
skolems: dict[str, BNode] = {}
_WELL_KNOWN_GENID = "/.well-known/genid/"
_RDFLIB_GENID_SUFFIX = "rdflib/"
_RDFLIB_GENID_PATH = "/.well-known/genid/" + _RDFLIB_GENID_SUFFIX


_invalid_uri_chars = '<>" {}|\\^`'
Expand Down Expand Up @@ -383,61 +383,30 @@ def __radd__(self, other) -> URIRef:
def __mod__(self, other) -> URIRef:
return self.__class__(str(self) % other)

def de_skolemize(self) -> BNode:
"""Create a Blank Node from a skolem URI, in accordance
with http://www.w3.org/TR/rdf11-concepts/#section-skolemization.
This function accepts only rdflib type skolemization, to provide
a round-tripping within the system.

.. versionadded:: 4.0
"""
if isinstance(self, RDFLibGenid):
parsed_uri = urlparse(f"{self}")
return BNode(value=parsed_uri.path[len(rdflib_skolem_genid) :])
elif isinstance(self, Genid):
bnode_id = f"{self}"
if bnode_id in skolems:
return skolems[bnode_id]
else:
retval = BNode()
skolems[bnode_id] = retval
return retval
else:
raise Exception(f"<{self}> is not a skolem URI")
class _Deskolemizer:
__slots__ = "_skolems"

def __init__(self) -> None:
self._skolems: dict[str, BNode] = {}

class Genid(URIRef):
__slots__ = ()

@staticmethod
def _is_external_skolem(uri: Any) -> bool:
if not isinstance(uri, str):
uri = str(uri)
parsed_uri = urlparse(uri)
gen_id = parsed_uri.path.rfind(skolem_genid)
if gen_id != 0:
return False
return True


class RDFLibGenid(Genid):
__slots__ = ()
def __call__(self, uri: URIRef) -> URIRef | BNode:
parsed_uri = urlsplit(uri)
if parsed_uri.query != "" or parsed_uri.fragment != "":
# Behaviour is undefined for skolem URIs with query or fragment, so just return the URI
return uri
if parsed_uri.path.startswith(_WELL_KNOWN_GENID):
genid_suffix = parsed_uri.path[len(_WELL_KNOWN_GENID) :]
if genid_suffix.startswith(_RDFLIB_GENID_SUFFIX):
return BNode(value=parsed_uri.path[len(_RDFLIB_GENID_PATH) :])
else:
if uri in self._skolems:
return self._skolems[uri]

@staticmethod
def _is_rdflib_skolem(uri: Any) -> bool:
if not isinstance(uri, str):
uri = str(uri)
parsed_uri = urlparse(uri)
if (
parsed_uri.params != ""
or parsed_uri.query != ""
or parsed_uri.fragment != ""
):
return False
gen_id = parsed_uri.path.rfind(rdflib_skolem_genid)
if gen_id != 0:
return False
return True
retval = BNode()
self._skolems[uri] = retval
return retval
return uri


def _unique_id() -> str:
Expand Down Expand Up @@ -536,8 +505,8 @@ def skolemize(
if authority is None:
authority = _SKOLEM_DEFAULT_AUTHORITY
if basepath is None:
basepath = rdflib_skolem_genid
skolem = basepath + str(self)
basepath = _RDFLIB_GENID_PATH
skolem = "%s%s" % (basepath, str(self))
return URIRef(urljoin(authority, skolem))


Expand Down
4 changes: 2 additions & 2 deletions test/test_issues/test_issue1808.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from rdflib import Graph
from rdflib.term import BNode, URIRef, rdflib_skolem_genid
from rdflib.term import _RDFLIB_GENID_PATH, BNode, URIRef


def test():
Expand All @@ -15,7 +15,7 @@ def test():

gs = g.skolemize()
for s, p, o in gs:
assert isinstance(s, URIRef) and s.__contains__(rdflib_skolem_genid)
assert isinstance(s, URIRef) and _RDFLIB_GENID_PATH in s

query_with_iri = "select ?p ?o {{ <{}> ?p ?o }}".format(s)
query_for_all = "select ?s ?p ?o { ?s ?p ?o }"
Expand Down
43 changes: 36 additions & 7 deletions test/test_skolem_genid.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,46 @@
from rdflib import URIRef
from rdflib.term import Genid, RDFLibGenid
from rdflib import RDF, SDO, BNode, Graph, URIRef
from rdflib.term import _Deskolemizer


def test_skolem_genid_and_rdflibgenid():
def test_skolem_genid_and_rdflib_genid():
rdflib_genid = URIRef(
"https://rdflib.github.io/.well-known/genid/rdflib/N97c39b957bc444949a82793519348dc2"
)
custom_genid = URIRef(
"http://example.com/.well-known/genid/example/Ne864c0e3684044f381d518fdac652f2e"
)

assert RDFLibGenid._is_rdflib_skolem(rdflib_genid) is True
assert Genid._is_external_skolem(rdflib_genid) is True
_deskolemizer = _Deskolemizer()
rdflib_bnode = _deskolemizer(rdflib_genid)
assert isinstance(rdflib_bnode, BNode)
assert rdflib_bnode.n3() == "_:N97c39b957bc444949a82793519348dc2"

assert RDFLibGenid._is_rdflib_skolem(custom_genid) is False
assert Genid._is_external_skolem(custom_genid) is True
custom_bnode = _deskolemizer(custom_genid)
assert isinstance(custom_bnode, BNode)
assert custom_bnode.n3().startswith("_:")


def test_graph_de_skolemize():
graph = Graph()

rdflib_genid = URIRef(
"https://rdflib.github.io/.well-known/genid/rdflib/N97c39b957bc444949a82793519348dc2"
)
custom_genid = URIRef(
"http://example.com/.well-known/genid/example/Ne864c0e3684044f381d518fdac652f2e"
)

rdflib_statement = (rdflib_genid, RDF.type, SDO.Thing)
custom_statement = (custom_genid, RDF.type, SDO.Person)

graph.add(rdflib_statement)
graph.add(custom_statement)
graph = graph.de_skolemize(uriref=rdflib_genid)

assert rdflib_statement not in graph
assert (BNode("N97c39b957bc444949a82793519348dc2"), RDF.type, SDO.Thing) in graph
assert custom_statement in graph

graph = graph.de_skolemize(uriref=custom_genid)
assert custom_statement not in graph
assert isinstance(graph.value(predicate=RDF.type, object=SDO.Person), BNode)
Loading