Skip to content

Commit

Permalink
WIP: Attempt to stream JSON
Browse files Browse the repository at this point in the history
Since we search JSON.parse package.json files, and they can be large, I thought I'd explore streaming to see if there was any significant speed up.

Yet to be seen, but as a side effect, I've started gathering the parent dependent data from the packagelock.

It's WIP at the moment: I'm not sure what to do about cases where there's multiple parents yet, and I haven't looked at yarn.
  • Loading branch information
domoscargin committed Dec 20, 2024
1 parent b658603 commit 7c0a6bb
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 13 deletions.
67 changes: 60 additions & 7 deletions build-filtered-data.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { writeFileSync } from 'fs'
import { Readable } from 'stream'
import { json2csv } from 'json-2-csv'
import { RequestError } from 'octokit'
import JSONStream from 'JSONStream'
import es from 'event-stream'

import * as yarnLock from '@yarnpkg/lockfile'
import checkDenyList from './helpers/check-deny-list.mjs'
Expand Down Expand Up @@ -84,6 +87,7 @@ async function analyseRepo(repo) {
let couldntAccess = false
let lastUpdated = null
let repoCreated = null
let parentDependency = null

try {
if (checkDenyList(repoName, repoOwner)) {
Expand Down Expand Up @@ -156,6 +160,7 @@ async function analyseRepo(repo) {
if (!('govuk-frontend' in packageObject.dependencies)) {
indirectDependency = true
throw new IndirectDependencyError()
// TODO: Create a findIndirectDependencies function, add an array of the parents to the output column
}

frontendVersion = packageObject.dependencies['govuk-frontend']
Expand All @@ -164,12 +169,18 @@ async function analyseRepo(repo) {
repo.name
} is using GOV.UK Frontend version ${frontendVersion}`
)
// TODO: Since we only search the Packagelock file if we find a frontend version
// we don't need to do anything but search for the `node_modules/govuk-frontend` entry
// in the getExactFrontendVersion function.
// If however, we don't find govuk-frontend in the dependencies, then we have an indirect dependency
// and we should search the lockfile for the govuk-frontend sub-dependencies
if (frontendVersion.includes('^') || frontendVersion.includes('~')) {
frontendVersion = await getExactFrontendVersion(
repoOwner,
repoName,
frontendVersion,
lockfileType
lockfileType,
parentDependency
)
versionDoubt =
frontendVersion.includes('^') || frontendVersion.includes('~')
Expand All @@ -193,14 +204,16 @@ async function analyseRepo(repo) {
isPrototype,
lastUpdated,
repoCreated,
parentDependency,
}
}

async function getExactFrontendVersion(
repoOwner,
repoName,
frontendVersion,
lockfileType
lockfileType,
parentDependency
) {
try {
if (lockfileType === 'package-lock.json') {
Expand All @@ -209,12 +222,12 @@ async function getExactFrontendVersion(
repoName,
'package-lock.json'
)
const packageLockObject = JSON.parse(packageLockFile.data)
return (
packageLockObject.packages?.['node_modules/govuk-frontend']?.version ||
packageLockObject.dependencies?.['govuk-frontend']?.version ||
frontendVersion
const versionAndParent = await getFrontendVersionFromPackageLock(
packageLockFile.data
)
// eslint-disable-next-line no-unused-vars
parentDependency = versionAndParent.parent
return versionAndParent.version || frontendVersion
} else if (lockfileType === 'yarn.lock') {
const yarnLockFile = await getFileContent(
repoOwner,
Expand All @@ -233,6 +246,46 @@ async function getExactFrontendVersion(
return frontendVersion.replace('^', '').replace('~', '')
}

// TODO: Streaming is probably overkill.
async function getFrontendVersionFromPackageLock(packageLockText) {
const stream = Readable.from([packageLockText])

// Parse top-level keys to track parents
const parser = JSONStream.parse('*')

return new Promise((resolve, reject) => {
let result = { version: null, parent: null }

stream.pipe(parser).pipe(
es
.mapSync((data) => {
Object.entries(data).forEach(([parentKey, value]) => {
if (parentKey === 'node_modules/govuk-frontend') {
console.log(
`${performance.now()}: Found the node_modules/govuk-frontend package entry, version ${
data[parentKey].version
}`
)
result = { version: data[parentKey].version, parent: null }
} else if (value.dependencies?.['govuk-frontend']) {
if (parentKey) {
console.log(
`${performance.now()}: Found govuk-frontend as a dependency of: ${parentKey}. This has been noted.`
)
}
result = {
version: value.dependencies['govuk-frontend'].version,
parent: parentKey,
}
}
})
})
.on('end', () => resolve(result))
.on('error', reject)
)
})
}

async function writeBatchToFiles(builtData) {
// Write JSON file
await writeFileSync(
Expand Down
117 changes: 114 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
"@octokit/plugin-throttling": "^9.3.0",
"@yarnpkg/lockfile": "^1.1.0",
"eslint": "^9.17.0",
"event-stream": "^4.0.1",
"globals": "^15.13.0",
"json-2-csv": "^5.5.7",
"JSONStream": "^1.3.5",
"octokit": "^4.0.2"
},
"dependencies": {
"json-2-csv": "^5.5.7"
}
}

0 comments on commit 7c0a6bb

Please sign in to comment.