Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic datalink #212

Merged
merged 6 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 81 additions & 4 deletions daiquiri/datalink/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from daiquiri.core.constants import ACCESS_LEVEL_PUBLIC
from daiquiri.core.utils import get_doi_url, import_class

from .constants import DATALINK_RELATION_TYPES
from .constants import DATALINK_RELATION_TYPES, DATALINK_FIELDS
from .models import Datalink


Expand All @@ -30,6 +30,14 @@ class BaseDatalinkAdapter(object):
* get_<resource_type>_links(self, resource): returns a resource into a list
of rows of the datalink table (list of python dicts)

There are two further adapters, which do not declare resources:

* DynamicDatalinkAdapter: the latter does not declare a resource, but will inject on the fly
extra datalink entries according to the method: get_dyn_datalink_links()

* QueryJobDatalinkAdapterMixin: The latter does not declare a resource either, but it injects
extra context information for the datalink viewer.

See the mixins below for an example.
"""

Expand All @@ -46,6 +54,8 @@ def __init__(self):
raise NotImplementedError(message)

def get_list(self):
'''This is only used by rebuild_datalink_table, so it needs to gather only the tabular datalink entries.
'''
for resource_type in self.resource_types:
yield from getattr(self, 'get_%s_list' % resource_type)()

Expand All @@ -56,14 +66,58 @@ def get_links(self, resource_type, resource):
return getattr(self, 'get_%s_links' % resource_type)(resource)

def get_context_data(self, request, **kwargs):
'''Get the datalink related context data for a given request
'''
context = {}

if 'ID' in kwargs:
context['datalinks'] = Datalink.objects.filter(ID=kwargs['ID']).order_by('semantics')
field_names = [field['name'] for field in DATALINK_FIELDS]
# more precise would be to use a serializer instead of list(QuerySet.values())
context['datalinks'] = list(Datalink.objects.filter(ID=kwargs['ID']).order_by('semantics').values())
context['ID'] = kwargs['ID']

return context

def get_datalink_rows(self, identifiers, **kwargs):
'''Get the list of datalink entries for the provided identifiers (incl. table- and dynamic- datalink)
'''

# get the datalink entries from Datalink Table and metadata (Table and Schema)
field_names = [field['name'] for field in DATALINK_FIELDS]
rows = list(Datalink.objects.filter(ID__in=identifiers).values_list(*field_names))

# get the dynamic datalink entries
try:
dyn_rows = [(
link['ID'],
link['access_url'],
link['service_def'],
link['error_message'],
link['description'],
link['semantics'],
link['content_type'],
link['content_length']) for link in self.get_dyn_datalink_links(identifiers)
]

# in case of malformation give some hints to the developper
except KeyError as e:
class_name = str(self.__class__)
raise KeyError(f"The key '{e.args[0]}' is missing in one of the dictionaries returned by {class_name}.get_dyn_datalink_links(id)")

# otherwise just raise
except Exception as e:
raise e

rows = rows + dyn_rows

# check for missing IDs and return error message
for identifier in identifiers:
if not any(filter(lambda row: row[0] == identifier, rows)):
rows.append((identifier, None, None, 'NotFoundFault: {}'.format(identifier), None, None, None, None))

return rows



class TablesDatalinkAdapterMixin(object):
'''
Expand Down Expand Up @@ -226,6 +280,29 @@ def get_table_links(self, table):
return table_links


class DynamicDatalinkAdapterMixin(object):
'''Define the interface to dynamically add datalink entries
'''

def get_dyn_datalink_links(self, IDs, **kwargs):
'''No dynamically generated entries. Can be overwriten.

this method should return a list of dict with the following keys:
ID, access_url, service_def, error_message, description, semantics, content_type, content_length
'''
return([])

def get_context_data(self, request, **kwargs):
'''Inject dynamically generated Datalink entries into the context for the daiquiri.datalinks.views.datalink View
'''
context = super().get_context_data(request, **kwargs)

if 'ID' in kwargs:
context['datalinks'] = context['datalinks'] + self.get_dyn_datalink_links([kwargs['ID']], **kwargs)

return context


class QueryJobDatalinkAdapterMixin(object):
'''
Injects the query job into the context data for the daiquiri.datalinks.views.datalink view
Expand All @@ -244,8 +321,8 @@ def get_context_data(self, request, **kwargs):

return context


class DefaultDatalinkAdapter(MetadataDatalinkAdapterMixin,
class DefaultDatalinkAdapter(DynamicDatalinkAdapterMixin,
MetadataDatalinkAdapterMixin,
TablesDatalinkAdapterMixin,
QueryJobDatalinkAdapterMixin,
BaseDatalinkAdapter):
Expand Down
16 changes: 7 additions & 9 deletions daiquiri/datalink/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
from daiquiri.core.generators import generate_votable

from .constants import DATALINK_FIELDS, DATALINK_CONTENT_TYPE

from .adapter import DatalinkAdapter
from .models import Datalink


class SyncDatalinkJobViewSet(viewsets.GenericViewSet):
'''Generate the datalink VOTable
'''

def list(self, request):
return self.perform_sync_job(request, request.GET)
Expand All @@ -20,17 +21,14 @@ def create(self, request):
return self.perform_sync_job(request, request.POST)

def perform_sync_job(self, request, data):
rows = []

if 'ID' in data:
identifiers = data.getlist('ID')
field_names = [field['name'] for field in DATALINK_FIELDS]
rows = list(Datalink.objects.filter(ID__in=identifiers).values_list(*field_names))

# check for missing IDs and return error message
for identifier in identifiers:
if not any(filter(lambda row: row[0] == identifier, rows)):
rows.append((identifier, None, None, 'NotFoundFault: {}'.format(identifier), None, None, None, None))
adapter = DatalinkAdapter()

# get all datalink entries (DatalinkTable, Metadata and Dynamic)
rows = adapter.get_datalink_rows(identifiers)

if data.get('RESPONSEFORMAT') == 'application/json':
return JsonResponse({
Expand Down
15 changes: 8 additions & 7 deletions daiquiri/oai/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from daiquiri.core.adapter import DatabaseAdapter
from daiquiri.core.constants import ACCESS_LEVEL_PUBLIC
from daiquiri.core.utils import get_doi, import_class
from daiquiri.datalink.adapter import DatalinkAdapter


def OaiAdapter():
Expand Down Expand Up @@ -219,6 +220,8 @@ def get_datacite_datalink_serializer_class(self):
return import_class('daiquiri.datalink.serializers.DataciteSerializer')

def get_datalink_list(self):
kimakan marked this conversation as resolved.
Show resolved Hide resolved
'''This function is used by rebuild_oai_schema only, it only needs to gather the doi objects declared via datalink (no other entries).
'''
for table in self.tables:
schema_name, table_name = table.split('.')
rows = DatabaseAdapter().fetch_rows(schema_name, table_name, column_names=['ID', 'access_url'],
Expand All @@ -228,12 +231,10 @@ def get_datalink_list(self):
yield 'datalink', {'id': str(ID), 'doi': get_doi(access_url)}

def get_datalink(self, pk):
rows = []
for table in self.tables:
schema_name, table_name = table.split('.')
rows += DatabaseAdapter().fetch_rows(schema_name, table_name, column_names=[
'access_url', 'description', 'semantics', 'content_type', 'content_length'
], filters={'ID': str(pk)})

adapter = DatalinkAdapter()
# get all datalink entries: DatalinkTables, Metadata and Dynamic
rows = adapter.get_datalink_rows([pk])

datalink = {
'formats': [],
Expand All @@ -245,7 +246,7 @@ def get_datalink(self, pk):
],
'related_identifiers': []
}
for access_url, description, semantics, content_type, content_length in rows:
for _, access_url, _, _, description, semantics, content_type, content_length in rows:
if semantics == '#doi':
datalink['doi'] = get_doi(access_url)
datalink['title'] = description
Expand Down
28 changes: 20 additions & 8 deletions daiquiri/query/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,14 @@ def run(self):
def run_sync(self):
adapter = DatabaseAdapter()

self.actual_query = adapter.build_sync_query(
adapter.create_user_schema_if_not_exists(self.schema_name)

self.actual_query = adapter.build_query(
self.schema_name,
self.table_name,
self.native_query,
settings.QUERY_SYNC_TIMEOUT,
self.max_records
self.max_records,
)

job_sources = get_job_sources(self)
Expand All @@ -237,15 +241,23 @@ def run_sync(self):
user=self.owner
)

adapter.submit_query(self.actual_query)

try:
download_adapter = DownloadAdapter()

yield from generate_votable(adapter.fetchall(self.actual_query), get_job_columns(self),
table=download_adapter.get_table_name(self.schema_name, self.table_name),
infos=download_adapter.get_infos('OK', self.query, self.query_language, job_sources),
links=download_adapter.get_links(job_sources),
services=download_adapter.get_services())
yield from DownloadAdapter().generate(
'votable',
get_job_columns(self),
sources=self.metadata.get("sources", []),
schema_name=self.schema_name,
table_name=self.table_name,
nrows=self.nrows,
query_status=self.result_status,
query=self.native_query,
query_language=self.query_language)

self.drop_uploads()
self.drop_table()

except (OperationalError, ProgrammingError, InternalError, DataError):
raise StopIteration()
Expand Down
2 changes: 1 addition & 1 deletion daiquiri/query/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def get_job_column(job, display_column_name):
def get_job_columns(job):
columns = []

if job.phase == job.PHASE_COMPLETED:
if job.phase == job.PHASE_COMPLETED or job.job_type == job.JOB_TYPE_SYNC:
database_columns = DatabaseAdapter().fetch_columns(job.schema_name, job.table_name)

for database_column in database_columns:
Expand Down
Loading