From cdc233fc5f2e2886bc162e657be60588ead8f10d Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 16 Mar 2023 17:18:03 +0100 Subject: [PATCH] feat: Initial commit PROV from gitlab https://github.com/zazuko/barnard59-rdf/issues/30 --- lib/appendGitlabProv.js | 60 ++++++++++++++++++ lib/metadata/produceProv.js | 118 ++++++++++++++++++++++++++++++++++++ lib/namespaces.js | 3 +- metadata.js | 3 +- 4 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 lib/appendGitlabProv.js create mode 100644 lib/metadata/produceProv.js diff --git a/lib/appendGitlabProv.js b/lib/appendGitlabProv.js new file mode 100644 index 0000000..dbe5204 --- /dev/null +++ b/lib/appendGitlabProv.js @@ -0,0 +1,60 @@ +import rdf from 'rdf-ext' +import { Transform } from 'readable-stream' +import { provFromGitlab } from './metadata/produceProv.js' +import { prov } from './namespaces.js' + +const typePredicate = rdf.namedNode( + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type') + +class ProvMetadata extends Transform { + constructor (context, { subjectsWithClass, graph }) { + super({ objectMode: true }) + + this.type = subjectsWithClass + this.provPointer = provFromGitlab() + this.graph = graph + } + + _transform (quad, encoding, callback) { + if (quad.predicate.equals(typePredicate) && quad.object.equals(this.type)) { + this.provPointer.addOut(prov.generates, quad.subject) + } + + callback(null, quad) + } + + async _flush (callback) { + try { + for (const quad of [...this.provPointer.dataset]) { + if (this.graph) { + this.push( + rdf.quad(quad.subject, quad.predicate, quad.object, this.graph)) + } else { + this.push(quad) + } + } + } catch (err) { + this.destroy(err) + } finally { + callback() + } + } +} + +function toNamedNode (item) { + if (item && item.term) { + return item.term + } + return typeof item === 'string' ? rdf.namedNode(item) : item +} + +async function appendGitlabProv ({ + subjectsWithClass, graph = undefined +} = {}) { + return new ProvMetadata(this, { + subjectsWithClass: toNamedNode(subjectsWithClass), + graph: toNamedNode(graph) + }) +} + +export default appendGitlabProv diff --git a/lib/metadata/produceProv.js b/lib/metadata/produceProv.js new file mode 100644 index 0000000..c5755e0 --- /dev/null +++ b/lib/metadata/produceProv.js @@ -0,0 +1,118 @@ +import namespace from '@rdfjs/namespace' +import rdf from 'rdf-ext' +import { xsd, schema, prov } from '../namespaces.js' + +const withoutLastSegment = url => url.split('/') + .splice(0, url.split('/').length - 1) + .join('/') + +const ex = namespace('http://example.org/') + +const type = rdf.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type') + +function validateVars () { + if (!process.env.CI_JOB_URL) { + throw Error('required environment variable CI_JOB_URL') + } + if (!process.env.CI_JOB_STARTED_AT) { + throw Error('required environment variable CI_JOB_STARTED_AT') + } + if (!process.env.CI_PROJECT_URL) { + throw Error('required environment variable CI_PROJECT_URL') + } + if (!process.env.CI_COMMIT_SHA) { + throw Error('required environment variable CI_COMMIT_SHA') + } + if (!process.env.CI_PIPELINE_URL) { + throw Error('required environment variable CI_PIPELINE_URL') + } + if (!process.env.CI_PIPELINE_CREATED_AT) { + throw Error('required environment variable CI_PIPELINE_CREATED_AT') + } +} + +function provFromGitlab () { + validateVars() + + // Job + const jobUrl = process.env.CI_JOB_URL + const jobStartTime = process.env.CI_JOB_STARTED_AT + const jobUri = rdf.namedNode(jobUrl) + + // Codebase + const projectUrl = process.env.CI_PROJECT_URL + const codebaseUri = rdf.namedNode(projectUrl) + + // Commit + const commitSha = process.env.CI_COMMIT_SHA + const commitUri = rdf.namedNode(`${projectUrl}/-/commit/${commitSha}`) + + // All the jobs that were triggered by this commit, the pipelineRun. Might include download files + const pipelineRun = process.env.CI_PIPELINE_URL + const pipelineRunStartTime = process.env.CI_PIPELINE_CREATED_AT + const pipelineRunUri = rdf.namedNode(pipelineRun) + + // all the pipelines for the codebase + const pipelinesUri = rdf.namedNode(withoutLastSegment(pipelineRun)) + + const pointer = rdf.clownface({ dataset: rdf.dataset(), term: jobUri }) + + pointer.node(codebaseUri) + .addOut(type, ex.Codebase) + .addOut(ex.hasPipelines, pipelinesUri) + + pointer.node(commitUri) + .addOut(type, ex.Commit) + .addOut(ex.triggered, pipelineRunUri) + + pointer.node(pipelinesUri).addOut(ex.contains, pipelineRunUri) + + pointer.node(pipelineRunUri) + .addOut(type, ex.PipelineRun) + .addOut(prov.startedAtTime, rdf.literal(pipelineRunStartTime, xsd.dateTime)) + .addOut(ex.hasJob, jobUri) + + pointer.node(jobUri) + .addOut(type, ex.Activity) + .addOut(prov.startedAtTime, rdf.literal(jobStartTime, xsd.dateTime)) + .addOut(prov.wasTriggeredBy, commitUri) + + // Job Optionals + const environment = process.env.CI_BUILD_REF_SLUG + if (environment) { + const environmentUri = ex[`environment/${environment}`] + pointer.node(jobUri).addOut(ex.hasEnvironment, environmentUri) + } + + // Codebase optionals + const codebaseDescription = process.env.CI_PROJECT_DESCRIPTION + if (codebaseDescription) { + pointer.node(codebaseUri).addOut(schema.description, codebaseDescription) + } + + const codebaseName = process.env.CI_PROJECT_NAME + if (codebaseName) { + pointer.node(codebaseUri).addOut(schema.name, codebaseName) + } + + // Commit Optionals + const commitName = process.env.CI_COMMIT_TITLE + if (commitName) { + pointer.node(commitUri).addOut(schema.name, commitName) + } + + const commitAuthor = process.env.CI_COMMIT_AUTHOR + if (commitAuthor) { + pointer.node(commitUri).addOut(ex.author, commitAuthor) + } + + const commitTime = process.env.CI_COMMIT_TIMESTAMP + if (commitTime) { + pointer.node(commitUri) + .addOut(prov.atTime, rdf.literal(commitTime, xsd.dateTime)) + } + + return pointer +} + +export { provFromGitlab } diff --git a/lib/namespaces.js b/lib/namespaces.js index 0885b18..feab445 100644 --- a/lib/namespaces.js +++ b/lib/namespaces.js @@ -9,5 +9,6 @@ const _void = namespace('http://rdfs.org/ns/void#') const dcat = namespace('http://www.w3.org/ns/dcat#') const schema = namespace('http://schema.org/') const dcterms = namespace('http://purl.org/dc/terms/') +const prov = namespace('http://www.w3.org/ns/prov#') -export { cube, rdf, rdfs, sh, xsd, _void, dcat, schema, dcterms } +export { cube, rdf, rdfs, sh, xsd, _void, dcat, schema, dcterms, prov } diff --git a/metadata.js b/metadata.js index 4177ef0..08f7698 100644 --- a/metadata.js +++ b/metadata.js @@ -1,4 +1,5 @@ import append from './lib/append.js' +import appendGitlabProv from './lib/appendGitlabProv.js' import voidStats from './lib/voidStats.js' -export { append, voidStats } +export { append, voidStats, appendGitlabProv }