Skip to content

Commit

Permalink
add framework, WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
recalcitrantsupplant committed Nov 1, 2024
1 parent 79edc87 commit e68914b
Show file tree
Hide file tree
Showing 8 changed files with 501 additions and 373 deletions.
27 changes: 24 additions & 3 deletions prez/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from prez.services.query_generation.concept_hierarchy import ConceptHierarchyQuery
from prez.services.query_generation.cql import CQLParser
from prez.services.query_generation.search_default import SearchQueryRegex
from prez.services.query_generation.search_fts import SearchQueryFusekiFTS
from prez.services.query_generation.search_fuseki_fts import SearchQueryFusekiFTS
from prez.services.query_generation.shacl import NodeShape
from prez.services.query_generation.sparql_escaping import escape_for_lucene_and_sparql

Expand Down Expand Up @@ -188,7 +188,10 @@ async def cql_get_parser_dependency(
)


async def generate_search_query(request: Request):
async def generate_search_query(
request: Request,
system_repo: Repo = Depends(get_system_repo),
):
term = request.query_params.get("q")
if term:
escaped_term = escape_for_lucene_and_sparql(term)
Expand All @@ -206,8 +209,26 @@ async def generate_search_query(request: Request):
offset=offset,
)
elif settings.search_method == SearchMethod.FTS_FUSEKI:
predicates = predicates if predicates else settings.search_predicates
shacl_shape_predicates = []
non_shacl_predicates = []
for pred in predicates:
try:
URIRef(pred)
non_shacl_predicates.append(pred)
except ValueError:
shacl_shape_predicates.append(pred)
# print('9')
# shacl_pred_query = f"""
# DESCRIBE ?shape {{
# ?shape a <https://prez.dev/ont/fts#> ;
# <http://purl.org/dc/terms/identifier> {}.
# }}"""
# for predicate in shacl_shape_predicates:
# pass
fts_gpnt = []
return SearchQueryFusekiFTS(
term=escaped_term, predicates=predicates, limit=limit, offset=offset
term=escaped_term, predicates=predicates, limit=limit, offset=offset, fts_gpnt=fts_gpnt
)
else:
raise NotImplementedError(
Expand Down
359 changes: 0 additions & 359 deletions prez/services/query_generation/search_fts.py

This file was deleted.

366 changes: 366 additions & 0 deletions prez/services/query_generation/search_fuseki_fts.py

Large diffs are not rendered by default.

39 changes: 29 additions & 10 deletions prez/services/query_generation/shacl.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def optional_gpnt(depth):
class PropertyShape(Shape):
uri: URIRef | BNode # URI of the shape
graph: Graph
kind: TypingLiteral["endpoint", "profile"]
kind: TypingLiteral["endpoint", "profile", "fts"]
focus_node: Union[IRI, Var]
# inputs
shape_number: int = 0
Expand Down Expand Up @@ -385,7 +385,7 @@ def to_grammar(self):
# label nodes in the inner select and profile part of the query differently for clarity.
if self.kind == "endpoint":
path_or_prop = "path"
elif self.kind == "profile":
elif (self.kind == "profile") or (self.kind == "fts"):
path_or_prop = f"prof_{self.shape_number + 1}"

# set up the path nodes - either from supplied values or set as variables
Expand Down Expand Up @@ -535,17 +535,27 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
else:
pred = IRI(value=property_path.value)
obj = path_node_1
triple = (self.focus_node, pred, obj)
if self.kind == "fts":
triple = (self.focus_node, pred, Var(value="fts_search_node"))
else:
triple = (self.focus_node, pred, obj)
self.tss_list.append(TriplesSameSubject.from_spo(*triple))
current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
pp_i += 1

elif isinstance(property_path, InversePath):
triple = (
path_node_1,
IRI(value=property_path.value.value),
self.focus_node,
)
if self.kind == "fts":
triple = (
Var(value="fts_search_node"),
IRI(value=property_path.value.value),
self.focus_node,
)
else:
triple = (
path_node_1,
IRI(value=property_path.value.value),
self.focus_node,
)
self.tss_list.append(TriplesSameSubject.from_spo(*triple))
current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
pp_i += 1
Expand All @@ -568,13 +578,14 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):

elif isinstance(property_path, SequencePath):
preds_pathmods_inverse = []
seq_path_len = len(property_path.value)
for j, path in enumerate(property_path.value):
if isinstance(path, Path):
if self.kind == "endpoint":
preds_pathmods_inverse.append(
(IRI(value=path.value), None, False)
)
elif self.kind == "profile":
elif (self.kind == "profile") or (self.kind == "fts"):
if j == 0:
triple = (
self.focus_node,
Expand All @@ -592,7 +603,7 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
preds_pathmods_inverse.append(
(IRI(value=path.value.value), None, True)
)
elif self.kind == "profile":
elif (self.kind == "profile") or (self.kind == "fts"):
if j == 0:
triple = (
path_node_1,
Expand All @@ -619,6 +630,14 @@ def process_property_paths(self, property_paths, path_or_prop, tssp_list, pp_i):
if self.kind == "profile":
self.tss_list.append(TriplesSameSubject.from_spo(*triple))
current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
elif self.kind == "fts":
if j == seq_path_len - 1: # we're at the end of the sequence path
new_triple = triple[:2] + (Var(value="fts_search_node"),)
self.tss_list.append(TriplesSameSubject.from_spo(*new_triple))
current_tssp.append(TriplesSameSubjectPath.from_spo(*new_triple))
else:
self.tss_list.append(TriplesSameSubject.from_spo(*triple))
current_tssp.append(TriplesSameSubjectPath.from_spo(*triple))
pp_i += len(property_path.value)
if self.kind == "endpoint":
tssp = _tssp_for_sequence(
Expand Down
2 changes: 1 addition & 1 deletion prez/services/query_generation/umbrella.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from prez.services.query_generation.cql import CQLParser
from prez.services.query_generation.datetime_filter import generate_datetime_filter
from prez.services.query_generation.search_default import SearchQueryRegex
from prez.services.query_generation.search_fts import SearchQueryFusekiFTS
from prez.services.query_generation.search_fuseki_fts import SearchQueryFusekiFTS
from prez.services.query_generation.shacl import NodeShape


Expand Down
46 changes: 46 additions & 0 deletions test_data/fts_property_shapes.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
@prefix fts: <https://prez.dev/ont/fts#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix dwc: <http://rs.tdwg.org/dwc/terms/> .
@prefix ex: <http://example.com/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix sname: <https://fake-scientific-name-id.com/name/afd/> .
@prefix sosa: <http://www.w3.org/ns/sosa/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix prez: <https://prez.dev/> .


ex:FTSSequenceShape
a sh:PropertyShape ;
a fts:PropertyShape ;
sh:path
(
ex:hasSimpleResult ex:hasFeatureOfInterest ex:testProp ex:testProp2
) ;
.

ex:FTSInverseShape
a sh:PropertyShape ;
a fts:PropertyShape ;
sh:path [ sh:inversePath ex:hasFeatureOfInterest ] ;
sh:name "Inverse" ;
dcterms:identifier "inv" ;
.

ex:FTSSequenceShape
a sh:PropertyShape ;
a fts:PropertyShape ;
sh:path
(
ex:hasSimpleResult ex:hasFeatureOfInterest ex:testProp ex:testProp2
) ;
sh:name "Sequence" ;
dcterms:identifier "seq" ;
.

ex:FTSInverseSequenceShape
a sh:PropertyShape ;
a fts:PropertyShape ;
sh:path ( ex:hasSimpleResult [ sh:inversePath ex:hasFeatureOfInterest ] ) ;
sh:name "Sequence Inverse" ;
dcterms:identifier "seqinv" ;
.
File renamed without changes.
35 changes: 35 additions & 0 deletions tests/test_search_fuseki_fts_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pathlib import Path

from rdflib import RDFS, URIRef, Graph
from sparql_grammar_pydantic import Var

from prez.services.query_generation.search_fuseki_fts import SearchQueryFusekiFTS
from prez.services.query_generation.shacl import PropertyShape


def test_query_gen():
query_obj = SearchQueryFusekiFTS(term="test", limit=10, offset=0, predicates=[RDFS.label, RDFS.comment])
query_string = query_obj.to_string()
print(query_string)

def test_combo_query_gen():
file = Path(__file__).parent.parent / "test_data" / "fts_property_shapes.ttl"
ps_g = Graph().parse(file)
ps1 = PropertyShape(
uri=URIRef("FTSInverseSequenceShape"),
graph=ps_g,
kind="fts", # "profile" would expand these out to plain triple pattern matches
focus_node=Var(value="focus_node"),
shape_number=100
)
ps2 = PropertyShape(
uri=URIRef("FTSInverseSequenceShape"),
)
query = SearchQueryFusekiFTS(
term="test",
limit=10,
offset=0,
predicates=[RDFS.label, RDFS.comment],
gpnt_list=gpnt_list
)
print('')

0 comments on commit e68914b

Please sign in to comment.