Skip to content

Commit

Permalink
Allow versioned resource access (core/staging)
Browse files Browse the repository at this point in the history
Nextstrain URLs are extended to allow <path>@<version> syntax. Currently
the version must be in YYYY-MM-DD format. If a version is available for
that day then that resource is used. If a version is not available for
that day then we redirect to the closest available YYYY-MM-DD, or
redirect to the latest (non-versioned) URL where appropriate.

Note that the current implementation (via previous commits) uses S3
versioning, not our datestamped datasets, although the two concepts may
appear similar in the URL.
  • Loading branch information
jameshadfield committed Oct 4, 2023
1 parent 88d3fb7 commit 39f79c8
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 55 deletions.
5 changes: 3 additions & 2 deletions src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ const coreBuildRoutes = coreBuildPaths.map(path => [
path,
`${path}/*`,
`${path}:*`, // Tangletrees at top-level, e.g. /a:/a/b
`${path}@*`, // version (date) descriptors for a top-level core build
]);

app.use([coreBuildRoutes, "/narratives/*"], setSource(req => new CoreSource())); // eslint-disable-line no-unused-vars
Expand Down Expand Up @@ -290,13 +291,13 @@ app.use(["/fetch/narratives/:authority", "/fetch/:authority"],
setSource(req => new UrlDefinedSource(req.params.authority)));

app.routeAsync("/fetch/narratives/:authority/*")
.all(setNarrative(req => req.params[0]))
.all(setNarrative(req => req.params[0], false)) // Don't attempt to extract version descriptors
.getAsync(getNarrative)
.optionsAsync(optionsNarrative)
;

app.routeAsync("/fetch/:authority/*")
.all(setDataset(req => req.params[0]))
.all(setDataset(req => req.params[0], false)) // Don't attempt to extract version descriptors
.getAsync(getDataset)
.optionsAsync(optionsDataset)
;
Expand Down
65 changes: 46 additions & 19 deletions src/endpoints/sources.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ import { contentTypesProvided, contentTypesConsumed } from '../negotiate.js';
import * as options from './options.js';
import { sendAuspiceEntrypoint } from './static.js';
import { deleteByUrls, proxyFromUpstream, proxyToUpstream } from "../upstream.js";

import * as utils from '../utils/index.js';
import { LATEST } from "../manifest.js";

/**
* Generate Express middleware that extracts a {@link Source} instance from the
Expand Down Expand Up @@ -35,28 +36,37 @@ const setSource = (sourceExtractor) => (req, res, next) => {
* @param {pathExtractor} pathExtractor - Function to extract a dataset path from the request
* @returns {expressMiddleware}
*/
const setDataset = (pathExtractor) => (req, res, next) => {
req.context.dataset = req.context.source.dataset(pathParts(pathExtractor(req)));
const setDataset = (pathExtractor, lookForVersion=true) => (req, res, next) => {
console.log("middleware::setDataset")
req.context.dataset = req.context.source.dataset(...pathParts(pathExtractor(req), lookForVersion));
next();
};


/**
* Generate Express middleware that redirects to the canonical path for the
* current {@link Dataset} if it is not fully resolved.
* current {@link Dataset} if it is not fully resolved. This includes redirecting
* to a more exact version, where appropriate.
*
* @param {pathBuilder} pathBuilder - Function to build a fully-specified path
* @returns {expressMiddleware}
*/
const canonicalizeDataset = (pathBuilder) => (req, res, next) => {
console.log("sources.js::canonicalizeDataset")

const dataset = req.context.dataset;
const resolvedDataset = dataset.resolve();

if (dataset === resolvedDataset) return next();

let versionSuffix = '';
if (resolvedDataset.versionDescriptor && resolvedDataset.versionDescriptor!==LATEST) {
versionSuffix = `@${resolvedDataset.versionDescriptor}`;
}
console.log("\tCanonicalizing to ", resolvedDataset.pathParts.join("/")+versionSuffix)

const canonicalPath = pathBuilder.length >= 2
? pathBuilder(req, resolvedDataset.pathParts.join("/"))
: pathBuilder(resolvedDataset.pathParts.join("/"));
? pathBuilder(req, resolvedDataset.pathParts.join("/")+versionSuffix)
: pathBuilder(resolvedDataset.pathParts.join("/")+versionSuffix);

/* 307 Temporary Redirect preserves request method, unlike 302 Found, which
* is important since this middleware function may be used in non-GET routes.
Expand Down Expand Up @@ -194,8 +204,8 @@ const optionsNarrative = options.forAuthzObject(req => req.context.narrative);
* @param {pathExtractor} pathExtractor - Function to extract a narrative path from the request
* @returns {expressMiddleware}
*/
const setNarrative = (pathExtractor) => (req, res, next) => {
req.context.narrative = req.context.source.narrative(pathParts(pathExtractor(req)));
const setNarrative = (pathExtractor, lookForVersion=true) => (req, res, next) => {
req.context.narrative = req.context.source.narrative(...pathParts(pathExtractor(req), lookForVersion));
next();
};

Expand Down Expand Up @@ -256,18 +266,33 @@ const putNarrative = contentTypesConsumed([
* the parts for the first dataset.
*
* @param {String} path
* @returns {String[]}
* @returns {[String[], String]}
*/
function pathParts(path = "") {
const normalizedPath = path
function pathParts(path = "", lookForVersion) {
let normalizedPath = path
.split(":")[0] // Use only the first dataset in a tangletree (dual dataset) path.
.replace(/^\/+/, "") // Ignore leading slashes
.replace(/\/+$/, "") // …and trailing slashes.
;

if (!normalizedPath) return [];

return normalizedPath.split("/");
if (!normalizedPath) return [[], LATEST]; // TODO - check where this occurs

/* A part of the path starting with "@" is the version descriptor - this
will later be translated to the appropriate S3 version ID / GitHub SHA,
but we use human readable descriptors in the URL path */
let versionDescriptor = LATEST;
if (lookForVersion) {
const versionParts = normalizedPath.split('@');
if (versionParts.length===2) {
[normalizedPath, versionDescriptor] = versionParts;
} else if (versionParts.length>2) {
utils.warn("Request URL path included multiple `@`. Falling back to latest version of associated resource.")
}
}

console.log(`function::pathParts(${path}) -> `, normalizedPath.split("/"), versionDescriptor)

return [normalizedPath.split("/"), versionDescriptor]
}


Expand All @@ -282,12 +307,14 @@ function sendSubresource(subresourceExtractor) {
return async (req, res) => {
const subresource = subresourceExtractor(req);

console.log("handler::sendSubresource ")

authz.assertAuthorized(req.user, authz.actions.Read, subresource.resource);

return await proxyFromUpstream(req, res,
await subresource.url(),
subresource.accept
);
const upstreamUrl = await subresource.url()
if (!upstreamUrl) throw new NotFound("ResourceKnownNotToExist");

return await proxyFromUpstream(req, res, upstreamUrl, subresource.accept);
};
}

Expand Down
21 changes: 17 additions & 4 deletions src/sources/community.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { fetch } from '../fetch.js';
import { NotFound } from '../httpErrors.js';
import * as utils from '../utils/index.js';
import { Source, Dataset, Narrative } from './models.js';
import { LATEST } from '../manifest.js';

const authorization = process.env.GITHUB_TOKEN
? `token ${process.env.GITHUB_TOKEN}`
Expand Down Expand Up @@ -60,11 +61,11 @@ class CommunitySource extends Source {
return `${this.repoName}@${branch}`;
}

dataset(pathParts) {
return new CommunityDataset(this, pathParts);
dataset(pathParts, versionDescriptor) {
return new CommunityDataset(this, pathParts, versionDescriptor);
}
narrative(pathParts) {
return new CommunityNarrative(this, pathParts);
narrative(pathParts, versionDescriptor) {
return new CommunityNarrative(this, pathParts, versionDescriptor);
}

async availableDatasets() {
Expand Down Expand Up @@ -190,6 +191,12 @@ class CommunityDataset extends Dataset {
// name in the file basename.
return [`auspice/${this.source.repoName}`, ...this.pathParts];
}
lookupVersionInfo(versionDescriptor) {
if (versionDescriptor!==LATEST) {
utils.verbose(`versions not implemented for community datasets (version descriptor: "${versionDescriptor}")`)
}
return [LATEST, undefined];
}
}

class CommunityNarrative extends Narrative {
Expand All @@ -198,6 +205,12 @@ class CommunityNarrative extends Narrative {
// repo name in the file basename.
return [`narratives/${this.source.repoName}`, ...this.pathParts];
}
lookupVersionInfo(versionDescriptor) {
if (versionDescriptor!==LATEST) {
utils.verbose(`versions not implemented for community narratives (version descriptor: "${versionDescriptor}")`)
}
return [LATEST, undefined];
}
}

export {
Expand Down
63 changes: 53 additions & 10 deletions src/sources/core.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ import * as authz from '../authz/index.js';
import { fetch } from '../fetch.js';
import { NotFound } from '../httpErrors.js';
import * as utils from '../utils/index.js';
import { Source, Dataset } from './models.js';
import { Source, Dataset, Narrative } from './models.js';
import { ResourceVersions, LATEST } from "../manifest.js";

const authorization = process.env.GITHUB_TOKEN
? `token ${process.env.GITHUB_TOKEN}`
Expand All @@ -17,6 +18,7 @@ class CoreSource extends Source {

// eslint-disable-next-line no-unused-vars
async urlFor(path, method = 'GET', headers = {}) {
console.log(`CoreSource::urlFor(${path})`)
const baseUrl = path.endsWith(".md")
? `https://raw.githubusercontent.com/${this.repo}/${await this.branch}/`
: await this.baseUrl();
Expand All @@ -25,8 +27,12 @@ class CoreSource extends Source {
return url.toString();
}

dataset(pathParts) {
return new CoreDataset(this, pathParts);
dataset(pathParts, versionDescriptor) {
return new CoreDataset(this, pathParts, versionDescriptor);
}

narrative(pathParts, versionDescriptor) {
return new CoreNarrative(this, pathParts, versionDescriptor);
}

// The computation of these globals should move here.
Expand Down Expand Up @@ -94,6 +100,7 @@ class CoreStagingSource extends CoreSource {

class CoreDataset extends Dataset {
resolve() {
console.log("CoreDataset::resolve()", this.versionDescriptor, this.versionId)
/* XXX TODO: Reimplement this in terms of methods on the source, not by
* breaking encapsulation by using a process-wide global.
* -trs, 26 Oct 2021 (based on a similar comment 5 Sept 2019)
Expand All @@ -112,20 +119,56 @@ class CoreDataset extends Dataset {

const prefix = prefixParts.join("/");

if (doesPathExist(prefix)) {
return this;
}
if (!doesPathExist(prefix)) {
/* the path doesn't match any available datasets exactly */
const nextDefaultPart = global.availableDatasets.defaults[sourceName][prefix];

/* if we are here, then the path doesn't match any available datasets exactly */
const nextDefaultPart = global.availableDatasets.defaults[sourceName][prefix];
if (nextDefaultPart) {
const dataset = new this.constructor(this.source, [...prefixParts, nextDefaultPart], this.versionDescriptor);
return dataset.resolve();
}
}

if (nextDefaultPart) {
const dataset = new this.constructor(this.source, [...prefixParts, nextDefaultPart]);
/**
* If we have a versionDescriptor (i.e. from the requesting URL) in YYYY-MM-DD format
* but the matching versionId (also in YYYY-MM-DD format) is different then redirect.
*/
if (this.versionDescriptor.match(/^\d{4}-\d{2}-\d{2}$/) && this.versionDescriptor!==this.versionId) {
console.log("\tresolve -> ", this.versionId)
const dataset = new this.constructor(this.source, [...prefixParts], this.versionId);
return dataset.resolve();
}

return this;
}

/**
* Parse a human-readable versionDescriptor (currently only in YYYY-MM-DD format)
* and return the closest available versionID (YYYY-MM-DD string) and the associated
* versionInfo (an object linking available file types to their access URLs)
* @param {string} versionDescriptor
* @returns [versionId, versionInfo]
*/
lookupVersionInfo(versionDescriptor) {
if (versionDescriptor===LATEST) return [LATEST, undefined];
const urlPath = this.source.name==='staging'
? ['staging', ...this.pathParts].join("/")
: this.pathParts.join("/");
const versions = new ResourceVersions('dataset', urlPath);
const versionDate = versions.closestDate(versionDescriptor);
if (!versionDate || versionDate===LATEST) return [LATEST, undefined];
return [versionDate, versions.subresourceUrls(versionDate)];
}

}

class CoreNarrative extends Narrative {
lookupVersionInfo(versionDescriptor) {
if (versionDescriptor!==LATEST) {
utils.verbose(`versions not implemented for ${this.source.name} narratives (version descriptor: "${versionDescriptor}")`)
}
return [LATEST, undefined];
}
}

export {
Expand Down
24 changes: 20 additions & 4 deletions src/sources/fetch.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import * as authz from '../authz/index.js';

import { Source, Dataset, DatasetSubresource, Narrative, NarrativeSubresource } from './models.js';
import { LATEST } from '../manifest.js';
import * as utils from '../utils/index.js';

class UrlDefinedSource extends Source {
constructor(authority) {
Expand All @@ -14,11 +16,11 @@ class UrlDefinedSource extends Source {
async baseUrl() {
return `https://${this.authority}`;
}
dataset(pathParts) {
return new UrlDefinedDataset(this, pathParts);
dataset(pathParts, versionDescriptor) {
return new UrlDefinedDataset(this, pathParts, versionDescriptor);
}
narrative(pathParts) {
return new UrlDefinedNarrative(this, pathParts);
narrative(pathParts, versionDescriptor) {
return new UrlDefinedNarrative(this, pathParts, versionDescriptor);
}

// available datasets & narratives are unknown when the dataset is specified by the URL
Expand Down Expand Up @@ -66,6 +68,13 @@ class UrlDefinedDataset extends Dataset {
*/
return true;
}
lookupVersionInfo(versionDescriptor) {
/* We don't parse out the versionDescriptor for fetch URLs, so it should always be LATEST */
if (versionDescriptor!==LATEST) {
utils.warn(`Internal error: Fetch source has an unexpected versionDescriptor: "${versionDescriptor}"`)
}
return [LATEST, undefined];
}
}

class UrlDefinedDatasetSubresource extends DatasetSubresource {
Expand Down Expand Up @@ -105,6 +114,13 @@ class UrlDefinedNarrative extends Narrative {
*/
return true;
}
lookupVersionInfo(versionDescriptor) {
/* We don't parse out the versionDescriptor for fetch URLs, so it should always be LATEST */
if (versionDescriptor!==LATEST) {
utils.warn(`Internal error: Fetch source has an unexpected versionDescriptor: "${versionDescriptor}"`)
}
return [LATEST, undefined];
}
}

class UrlDefinedNarrativeSubresource extends NarrativeSubresource {
Expand Down
Loading

0 comments on commit 39f79c8

Please sign in to comment.