Skip to content

Commit

Permalink
WIP - updated SHACL parsing, shape parsing complete
Browse files Browse the repository at this point in the history
  • Loading branch information
recalcitrantsupplant committed Nov 11, 2024
1 parent 85a4a10 commit d18ffbe
Show file tree
Hide file tree
Showing 7 changed files with 331 additions and 185 deletions.
259 changes: 123 additions & 136 deletions poetry.lock

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions prez/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import httpx
from fastapi import Depends, HTTPException, Request
from pyoxigraph import Store
from rdflib import Dataset, URIRef, SKOS, RDF
from rdflib import Dataset, URIRef, SKOS, RDF, DCTERMS
from rdflib import RDF, SKOS, Dataset, URIRef
from sparql_grammar_pydantic import IRI, Var

Expand Down Expand Up @@ -193,6 +193,12 @@ async def cql_get_parser_dependency(
)


async def get_jena_fts_shacl_predicates(
system_repo: Repo = Depends(get_system_repo),
):
query = """DESCRIBE ?fts_shape WHERE {?fts_shape a <https://prez.dev/ont/JenaFTSPropertyShape}"""
return await system_repo.rdf_query_to_graph(query)

async def generate_search_query(
request: Request,
system_repo: Repo = Depends(get_system_repo),
Expand All @@ -215,6 +221,8 @@ async def generate_search_query(
)
elif settings.search_method == SearchMethod.FTS_FUSEKI:
predicates = predicates if predicates else settings.search_predicates
shacl_shapes = await get_jena_fts_shacl_predicates()
shacl_shape_ids = list(shacl_shapes.objects(subject=None, predicate=DCTERMS.identifier))
shacl_shape_predicates = []
non_shacl_predicates = []
for pred in predicates:
Expand All @@ -233,7 +241,7 @@ async def generate_search_query(
# pass
fts_gpnt = []
return SearchQueryFusekiFTS(
term=escaped_term, predicates=predicates, limit=limit, offset=offset, fts_gpnt=fts_gpnt
term=escaped_term, predicates=non_shacl_predicates, limit=limit, offset=offset, fts_gpnt=fts_gpnt
)
else:
raise NotImplementedError(
Expand Down
188 changes: 171 additions & 17 deletions prez/services/query_generation/shacl.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,17 +507,167 @@ def to_grammar(self):
)
self.gpnt_list.append(gpnt)

# def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
# for property_path in property_paths:
# if f"{path_or_prop}_node_{pp_i + 1}" in self.path_nodes:
# path_node_1 = self.path_nodes[f"{path_or_prop}_node_{pp_i + 1}"]
# else:
# path_node_1 = Var(value=f"{path_or_prop}_node_{pp_i + 1}")
#
# if f"{path_or_prop}_node_{pp_i + 2}" in self.path_nodes:
# path_node_2 = self.path_nodes[f"{path_or_prop}_node_{pp_i + 2}"]
# else:
# path_node_2 = Var(value=f"{path_or_prop}_node_{pp_i + 2}")
#
# current_tssp = []
#
# if isinstance(property_path, Path):
# if property_path.value == SHEXT.allPredicateValues:
# pred = Var(value="preds")
# obj = Var(value="vals")
# else:
# pred = IRI(value=property_path.value)
# obj = path_node_1
# if self.kind == "fts":
# triple = (self.focus_node, pred, Var(value="fts_search_node"))
# else:
# triple = (self.focus_node, pred, obj)
# self.tss_list.append(TriplesSameSubject.from_spo(*triple))
# current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
# pp_i += 1
#
# elif isinstance(property_path, InversePath):
# if self.kind == "fts":
# triple = (
# Var(value="fts_search_node"),
# IRI(value=property_path.value.value),
# self.focus_node,
# )
# else:
# triple = (
# path_node_1,
# IRI(value=property_path.value.value),
# self.focus_node,
# )
# self.tss_list.append(TriplesSameSubject.from_spo(*triple))
# current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
# pp_i += 1
#
# elif isinstance(
# property_path, Union[ZeroOrMorePath, OneOrMorePath, ZeroOrOnePath]
# ):
# # triple = (self.focus_node, IRI(value=property_path.value), path_node_1)
# # self.tss_list.append(TriplesSameSubject.from_spo(*triple))
# # remove TSS as it cannot capture the full set of triples possibly created by the path expression
# self.tssp_list.append(
# _tssp_for_pathmods(
# self.focus_node,
# IRI(value=property_path.value.value),
# path_node_1,
# property_path.operand,
# )
# )
# pp_i += 1
#
# elif isinstance(property_path, SequencePath):
# preds_pathmods_inverse = []
# seq_path_len = len(property_path.value)
# inner_path_type = None
# for j, path in enumerate(property_path.value):
# if isinstance(path, Path):
# inner_path_type = "path"
# if self.kind == "endpoint":
# preds_pathmods_inverse.append(
# (IRI(value=path.value), None, False)
# )
# elif (self.kind == "profile") or (self.kind == "fts"):
# if j == 0:
# triple = (
# self.focus_node,
# IRI(value=path.value),
# path_node_1,
# )
# else:
# triple = (
# path_node_1,
# IRI(value=path.value),
# path_node_2,
# )
# elif isinstance(path, InversePath):
# inner_path_type = "inverse"
# if self.kind == "endpoint":
# preds_pathmods_inverse.append(
# (IRI(value=path.value.value), None, True)
# )
# elif (self.kind == "profile") or (self.kind == "fts"):
# if j == 0:
# triple = (
# path_node_1,
# IRI(value=path.value.value),
# self.focus_node,
# )
# else:
# triple = (
# path_node_2,
# IRI(value=path.value.value),
# path_node_1,
# )
# elif isinstance(
# path, Union[ZeroOrMorePath, OneOrMorePath, ZeroOrOnePath]
# ):
# inner_path_type = "zero_one_more"
# if isinstance(path.value, Path):
# preds_pathmods_inverse.append(
# (IRI(value=path.value.value), path.operand, False)
# )
# elif isinstance(path.value, InversePath):
# preds_pathmods_inverse.append(
# (IRI(value=path.value.value.value), path.operand, True)
# )
# if self.kind == "profile":
# self.tss_list.append(TriplesSameSubject.from_spo(*triple))
# current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
# elif self.kind == "fts":
# if j == seq_path_len - 1: # we're at the end of the sequence path
# if inner_path_type != "inverse":
# new_triple = triple[:2] + (Var(value="fts_search_node"),)
# else:
# new_triple = (Var(value="fts_search_node"),) + triple[1:]
# self.tss_list.append(TriplesSameSubject.from_spo(*new_triple))
# current_tssp.append(TriplesSameSubjectPath.from_spo(*new_triple))
# else:
# self.tss_list.append(TriplesSameSubject.from_spo(*triple))
# current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
# pp_i += len(property_path.value)
# if self.kind == "endpoint":
# tssp = _tssp_for_sequence(
# self.focus_node, preds_pathmods_inverse, path_node_2
# )
# current_tssp.append(tssp)
#
# if current_tssp:
# tssp_list.append(current_tssp)
#
# return pp_i

def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
for property_path in property_paths:
# Always create path_node_1 as it's needed everywhere
if f"{path_or_prop}_node_{pp_i + 1}" in self.path_nodes:
path_node_1 = self.path_nodes[f"{path_or_prop}_node_{pp_i + 1}"]
else:
path_node_1 = Var(value=f"{path_or_prop}_node_{pp_i + 1}")

if f"{path_or_prop}_node_{pp_i + 2}" in self.path_nodes:
path_node_2 = self.path_nodes[f"{path_or_prop}_node_{pp_i + 2}"]
else:
path_node_2 = Var(value=f"{path_or_prop}_node_{pp_i + 2}")
# Create additional nodes only if we have a sequence path
path_nodes = {0: path_node_1} # Start with path_node_1
if isinstance(property_path, SequencePath):
seq_path_len = len(property_path.value)
for i in range(1, seq_path_len):
node_key = f"{path_or_prop}_node_{pp_i + i + 1}"
if node_key in self.path_nodes:
path_nodes[i] = self.path_nodes[node_key]
else:
path_nodes[i] = Var(value=node_key)

current_tssp = []

Expand Down Expand Up @@ -556,9 +706,6 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
elif isinstance(
property_path, Union[ZeroOrMorePath, OneOrMorePath, ZeroOrOnePath]
):
# triple = (self.focus_node, IRI(value=property_path.value), path_node_1)
# self.tss_list.append(TriplesSameSubject.from_spo(*triple))
# remove TSS as it cannot capture the full set of triples possibly created by the path expression
self.tssp_list.append(
_tssp_for_pathmods(
self.focus_node,
Expand All @@ -572,8 +719,10 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
elif isinstance(property_path, SequencePath):
preds_pathmods_inverse = []
seq_path_len = len(property_path.value)
inner_path_type = None
for j, path in enumerate(property_path.value):
if isinstance(path, Path):
inner_path_type = "path"
if self.kind == "endpoint":
preds_pathmods_inverse.append(
(IRI(value=path.value), None, False)
Expand All @@ -583,35 +732,37 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
triple = (
self.focus_node,
IRI(value=path.value),
path_node_1,
path_nodes[0], # This is path_node_1
)
else:
triple = (
path_node_1,
path_nodes[j-1], # Previous node
IRI(value=path.value),
path_node_2,
path_nodes[j], # Current node
)
elif isinstance(path, InversePath):
inner_path_type = "inverse"
if self.kind == "endpoint":
preds_pathmods_inverse.append(
(IRI(value=path.value.value), None, True)
)
elif (self.kind == "profile") or (self.kind == "fts"):
if j == 0:
triple = (
path_node_1,
IRI(value=path.value),
path_nodes[0], # path_node_1
IRI(value=path.value.value),
self.focus_node,
)
else:
triple = (
path_node_2,
IRI(value=path.value),
path_node_1,
path_nodes[j], # Current node
IRI(value=path.value.value),
path_nodes[j-1], # Previous node
)
elif isinstance(
path, Union[ZeroOrMorePath, OneOrMorePath, ZeroOrOnePath]
):
inner_path_type = "zero_one_more"
if isinstance(path.value, Path):
preds_pathmods_inverse.append(
(IRI(value=path.value.value), path.operand, False)
Expand All @@ -625,7 +776,10 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
elif self.kind == "fts":
if j == seq_path_len - 1: # we're at the end of the sequence path
new_triple = triple[:2] + (Var(value="fts_search_node"),)
if inner_path_type != "inverse":
new_triple = triple[:2] + (Var(value="fts_search_node"),)
else:
new_triple = (Var(value="fts_search_node"),) + triple[1:]
self.tss_list.append(TriplesSameSubject.from_spo(*new_triple))
current_tssp.append(TriplesSameSubjectPath.from_spo(*new_triple))
else:
Expand All @@ -634,7 +788,7 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
pp_i += len(property_path.value)
if self.kind == "endpoint":
tssp = _tssp_for_sequence(
self.focus_node, preds_pathmods_inverse, path_node_2
self.focus_node, preds_pathmods_inverse, path_nodes[seq_path_len-1] # Last node
)
current_tssp.append(tssp)

Expand Down
3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ server = ["uvicorn"]
[tool.black]
line-length = 88

[tool.isort]
profile = "black"

[tool.semantic_release]
version_toml = ["pyproject.toml:tool.poetry.version"]

Expand Down
19 changes: 5 additions & 14 deletions test_data/fts_property_shapes.ttl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@prefix fts: <https://prez.dev/ont/fts#> .
@prefix ont: <https://prez.dev/ont/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix dwc: <http://rs.tdwg.org/dwc/terms/> .
@prefix ex: <http://example.com/> .
Expand All @@ -9,37 +9,28 @@
@prefix prez: <https://prez.dev/> .


ex:FTSSequenceShape
a sh:PropertyShape ;
a fts:PropertyShape ;
sh:path
(
ex:hasSimpleResult ex:hasFeatureOfInterest ex:testProp ex:testProp2
) ;
.

ex:FTSInverseShape
a sh:PropertyShape ;
a fts:PropertyShape ;
a ont:JenaFTSPropertyShape ;
sh:path [ sh:inversePath ex:hasFeatureOfInterest ] ;
sh:name "Inverse" ;
dcterms:identifier "inv" ;
.

ex:FTSSequenceShape
a sh:PropertyShape ;
a fts:PropertyShape ;
a ont:JenaFTSPropertyShape ;
sh:path
(
ex:hasSimpleResult ex:hasFeatureOfInterest ex:testProp ex:testProp2
ex:prop1 ex:prop2 ex:labelProp
) ;
sh:name "Sequence" ;
dcterms:identifier "seq" ;
.

ex:FTSInverseSequenceShape
a sh:PropertyShape ;
a fts:PropertyShape ;
a ont:JenaFTSPropertyShape ;
sh:path ( ex:hasSimpleResult [ sh:inversePath ex:hasFeatureOfInterest ] ) ;
sh:name "Sequence Inverse" ;
dcterms:identifier "seqinv" ;
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cql_queryable.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_ps_1():
)
assert (
ps.tssp_list[0].to_string()
== "?focus_node ^<http://example.com/hasFeatureOfInterest>/^<http://example.com/hasMember>*/<http://example.com/hasSimpleResult> ?path_node_2"
== "?focus_node ^<http://example.com/hasFeatureOfInterest>/^<http://example.com/hasMember>*/<http://example.com/hasSimpleResult> ?path_node_3"
)


Expand All @@ -33,5 +33,5 @@ def test_ps_2():
)
assert (
ps.tssp_list[0].to_string()
== "?focus_node ^<http://www.w3.org/ns/sosa/hasFeatureOfInterest>/<http://www.w3.org/ns/sosa/hasMember>/<http://www.w3.org/ns/sosa/hasResult>/<http://rs.tdwg.org/dwc/terms/scientificNameID> ?path_node_2"
== "?focus_node ^<http://www.w3.org/ns/sosa/hasFeatureOfInterest>/<http://www.w3.org/ns/sosa/hasMember>/<http://www.w3.org/ns/sosa/hasResult>/<http://rs.tdwg.org/dwc/terms/scientificNameID> ?path_node_4"
)
Loading

0 comments on commit d18ffbe

Please sign in to comment.