diff --git a/prez/dependencies.py b/prez/dependencies.py index 154b2f2f..6d1747ce 100755 --- a/prez/dependencies.py +++ b/prez/dependencies.py @@ -24,7 +24,7 @@ JSONMediaType, GeoJSONMediaType, ) -from prez.exceptions.model_exceptions import NoEndpointNodeshapeException +from prez.exceptions.model_exceptions import NoEndpointNodeshapeException, URINotFoundException from prez.models.query_params import QueryParams from prez.reference_data.prez_ns import ALTREXT, ONT, EP, OGCE, OGCFEAT from prez.repositories import PyoxigraphRepo, RemoteSparqlRepo, OxrdflibRepo, Repo @@ -489,7 +489,22 @@ async def get_endpoint_uri( async def get_ogc_features_path_params( request: Request, ): - return request.path_params + collection_id = request.path_params.get("collectionId") + feature_id = request.path_params.get("featureId") + path_params = {} + if feature_id: + try: + feature_uri = await get_uri_for_curie_id(feature_id) + except ValueError: + raise URINotFoundException(curie=feature_id) + path_params["feature_uri"] = feature_uri + if collection_id: + try: + collection_uri = await get_uri_for_curie_id(collection_id) + except ValueError: + raise URINotFoundException(curie=collection_id) + path_params["collection_uri"] = collection_uri + return path_params async def get_ogc_features_mediatype( diff --git a/prez/exceptions/model_exceptions.py b/prez/exceptions/model_exceptions.py index 55263938..533cd706 100755 --- a/prez/exceptions/model_exceptions.py +++ b/prez/exceptions/model_exceptions.py @@ -19,8 +19,11 @@ class URINotFoundException(Exception): Raised when a URI is not found in the triplestore. """ - def __init__(self, uri: URIRef): - self.message = f"URI {uri} not found at endpoint {settings.sparql_endpoint}." + def __init__(self, uri: URIRef = None, curie: str = None): + if uri: + self.message = f"URI \"{uri}\" not found at endpoint {settings.sparql_endpoint}." + if curie: + self.message = f"URI for curie \"{curie}\" not found at endpoint {settings.sparql_endpoint}." super().__init__(self.message) diff --git a/prez/reference_data/endpoints/features/features_nodeshapes.ttl b/prez/reference_data/endpoints/features/features_nodeshapes.ttl index 736300c0..f32d13d3 100644 --- a/prez/reference_data/endpoints/features/features_nodeshapes.ttl +++ b/prez/reference_data/endpoints/features/features_nodeshapes.ttl @@ -32,12 +32,12 @@ ex:Object ex:QueryablesGlobal a sh:NodeShape ; - sh:targetClass prez:Queryable ; + sh:targetClass geo:Feature ; ont:hierarchyLevel 1 ; . ex:QueryablesLocal a sh:NodeShape ; - sh:targetClass prez:Queryable ; + sh:targetClass geo:Feature ; ont:hierarchyLevel 2 ; . diff --git a/prez/routers/ogc_features_router.py b/prez/routers/ogc_features_router.py index 4ba8bd31..b70bd826 100755 --- a/prez/routers/ogc_features_router.py +++ b/prez/routers/ogc_features_router.py @@ -160,7 +160,7 @@ async def listings_with_feature_collection( system_repo, cql_parser, query_params, - **path_params, + path_params, ) except Exception as e: raise e @@ -202,7 +202,7 @@ async def objects( url, data_repo, system_repo, - **path_params, + path_params, ) except Exception as e: raise e diff --git a/prez/services/curie_functions.py b/prez/services/curie_functions.py index cb954696..485f9447 100755 --- a/prez/services/curie_functions.py +++ b/prez/services/curie_functions.py @@ -1,4 +1,5 @@ import logging +import re from urllib.parse import urlparse from aiocache import caches @@ -31,6 +32,19 @@ def namespace_registered(namespace): return False +def valid_prefix(prefix: str): + """For turtle serialization, as per https://www.w3.org/TR/turtle/#grammar-production-PN_PREFIX""" + valid = True + PN_CHARS_BASE = r"([A-Z]|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF])" + PN_CHARS_U = rf"({PN_CHARS_BASE}|_)" + PN_CHARS = rf"({PN_CHARS_U}|-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])" + PN_PREFIX = rf"({PN_CHARS_BASE}(({PN_CHARS}|.)*{PN_CHARS})?)" + matches = re.match(PN_PREFIX, prefix) + if not matches: + valid = False + return valid + + def generate_new_prefix(uri): """ Generates a new prefix for a uri @@ -52,8 +66,11 @@ def generate_new_prefix(uri): return # otherwise, remove vowels to reduce length proposed_prefix = "".join( - [c for c in to_generate_prefix_from if c not in "aeiou"] + [c for c in to_generate_prefix_from if c not in "aeiou!@#$%^&*()_+-=,."] ) + if not valid_prefix(proposed_prefix): + # if we still can't get a nice prefix. use an ugly but valid one using a hash of the IRI + proposed_prefix = f"ns{hash(to_generate_prefix_from)}" if not prefix_registered(proposed_prefix): prefix_graph.bind(proposed_prefix, ns) return @@ -95,6 +112,9 @@ async def get_uri_for_curie_id(curie_id: str): else: separator = settings.curie_separator curie = curie_id.replace(separator, ":") - uri = prefix_graph.namespace_manager.expand_curie(curie) + try: + uri = prefix_graph.namespace_manager.expand_curie(curie) + except ValueError: + raise await curie_cache.set(curie_id, uri) return uri diff --git a/prez/services/listings.py b/prez/services/listings.py index af6e6428..630d698c 100755 --- a/prez/services/listings.py +++ b/prez/services/listings.py @@ -156,11 +156,11 @@ async def ogc_features_listing_function( system_repo, cql_parser, query_params, - **path_params, + path_params, ): count_query = None count = 0 - collectionId = path_params.get("collectionId") + collection_uri = path_params.get("collection_uri") subselect_kwargs = merge_listing_query_grammar_inputs( endpoint_nodeshape=endpoint_nodeshape, cql_parser=cql_parser, @@ -199,6 +199,7 @@ async def ogc_features_listing_function( TriplesSameSubjectPath.from_spo(*innser_select_triple) ) subselect_kwargs["inner_select_vars"] = [queryable_var] + subselect_kwargs["limit"] = 100 construct_triple = ( queryable_var, IRI(value=RDF.type), @@ -211,7 +212,7 @@ async def ogc_features_listing_function( **subselect_kwargs, ).to_string() queries.append(query) - elif not collectionId: # list Feature Collections + elif not collection_uri: # list Feature Collections query = PrezQueryConstructor( construct_tss_list=construct_tss_list, profile_triples=profile_nodeshape.tssp_list, @@ -240,7 +241,6 @@ async def ogc_features_listing_function( # Features listing requires CBD of the Feature Collection as well; reuse items profile to get all props/bns to # depth two. - collection_uri = await get_uri_for_curie_id(collectionId) gpnt = GraphPatternNotTriples( content=Bind( expression=Expression.from_primary_expression( diff --git a/prez/services/objects.py b/prez/services/objects.py index c92ccf14..de82eed9 100755 --- a/prez/services/objects.py +++ b/prez/services/objects.py @@ -8,7 +8,7 @@ from fastapi.responses import PlainTextResponse, RedirectResponse from rdf2geojson import convert -from rdflib import RDF, URIRef, RDFS +from rdflib import RDF, URIRef from rdflib.namespace import GEO from sparql_grammar_pydantic import TriplesSameSubject, IRI, Var, TriplesSameSubjectPath @@ -129,25 +129,20 @@ async def ogc_features_object_function( url, data_repo, system_repo, - **path_params, + path_params, ): - collectionId = path_params.get("collectionId") - featureId = path_params.get("featureId") - if featureId: - feature_uri = await get_uri_for_curie_id(featureId) - else: - feature_uri = None - collection_uri = await get_uri_for_curie_id(collectionId) + collection_uri = path_params.get("collection_uri") + feature_uri = path_params.get("feature_uri") if template_query: - if featureId: - focus_uri = await get_uri_for_curie_id(featureId) + if feature_uri: + focus_uri = feature_uri else: focus_uri = collection_uri query = template_query.replace( "VALUES ?focusNode { UNDEF }", f"VALUES ?focusNode {{ {focus_uri.n3()} }}" ) else: - if featureId is None: # feature collection + if feature_uri is None: # feature collection collection_iri = IRI(value=collection_uri) construct_tss_list = None tssp_list = [ @@ -156,7 +151,6 @@ async def ogc_features_object_function( ) ] else: # feature - feature_uri = await get_uri_for_curie_id(featureId) feature_iri = IRI(value=feature_uri) triples = [ (feature_iri, Var(value="prop"), Var(value="val")), @@ -180,7 +174,7 @@ async def ogc_features_object_function( item_graph, _ = await data_repo.send_queries([query], []) if len(item_graph) == 0: uri = feature_uri if feature_uri else collection_uri - raise URINotFoundException(uri) + raise URINotFoundException(uri=uri) annotations_graph = await return_annotated_rdf(item_graph, data_repo, system_repo) log.debug(f"Query time: {time.time() - query_start_time}") @@ -188,6 +182,7 @@ async def ogc_features_object_function( if selected_mediatype == "application/sparql-query": content = io.BytesIO(query.encode("utf-8")) elif selected_mediatype == "application/json": + collectionId = get_curie_id_for_uri(collection_uri) collection = create_collection_json( collectionId, collection_uri, annotations_graph, url )