From 4e8558b1002f50f8199691a541df49ca52c41a21 Mon Sep 17 00:00:00 2001 From: Ell Date: Tue, 15 Oct 2024 17:53:56 +0200 Subject: [PATCH] [Query API] Script dependencies (#1066) * refactor: basic SupportedQueries registry structure * feat: allow registering an ascii summarizer for a query directly * refactor: include query schemas in query definitions * feat-fix: fixed meta queries being included in print * wip: basic setup * wip: basic data structures * refactor: const all the arrays * refactor: rename directory to fit the other names * wip: some work on the actual query * feat: libraries and sourced files dependency tracking * feat: ascii summarizer * feat: read and write function extraction * feat(query-test): dep cycle breaker and criteria id resolve * feat-fix(dep-query): support arguments :D * refactor: cleaned up dependencies query parsing * feat: allow including custom functions in dependencies query * refactor: exact name matching * refactor(dep-query): general overhaul --------- Co-authored-by: Florian Sihler --- src/cli/repl/commands/repl-query.ts | 4 +- .../call/built-in/built-in-source.ts | 9 ++ src/documentation/doc-util/doc-query.ts | 1 - .../dependencies-query-executor.ts | 128 +++++++++++++++ .../dependencies-query-format.ts | 129 ++++++++++++++++ .../lineage-query/lineage-query-format.ts | 4 +- src/queries/query.ts | 22 +-- test/functionality/_helper/query.ts | 22 ++- .../dataflow/query/compound-query-tests.ts | 2 - .../query/dependencies-query-tests.ts | 146 ++++++++++++++++++ 10 files changed, 449 insertions(+), 18 deletions(-) create mode 100644 src/queries/catalog/dependencies-query/dependencies-query-executor.ts create mode 100644 src/queries/catalog/dependencies-query/dependencies-query-format.ts create mode 100644 test/functionality/dataflow/query/dependencies-query-tests.ts diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index 55c8bb4fcd..d78d559924 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -7,12 +7,12 @@ import { splitAtEscapeSensitive } from '../../../util/args'; import { italic } from '../../../util/ansi'; import { describeSchema } from '../../../util/schema'; import type { Query, QueryResults, SupportedQueryTypes } from '../../../queries/query'; -import { executeQueries } from '../../../queries/query'; - +import { executeQueries } from '../../../queries/query'; import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline'; import { jsonReplacer } from '../../../util/json'; import { AnyQuerySchema, QueriesSchema } from '../../../queries/query-schema'; + async function getDataflow(shell: RShell, remainingLine: string) { return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts index 2909b73e5b..86c606435a 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts @@ -25,6 +25,7 @@ import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type'; import { overwriteEnvironment } from '../../../../../environments/overwrite'; import type { NoInfo } from '../../../../../../r-bridge/lang-4.x/ast/model/model'; import { expensiveTrace } from '../../../../../../util/log'; +import fs from 'fs'; let sourceProvider = requestProviderFromFile(); @@ -76,6 +77,14 @@ export function processSourceCall( } export function sourceRequest(rootId: NodeId, request: RParseRequest, data: DataflowProcessorInformation, information: DataflowInformation, getId: IdGenerator): DataflowInformation { + if(request.request === 'file') { + /* check if the file exists and if not, fail */ + if(!fs.existsSync(request.content)) { + dataflowLogger.warn(`Failed to analyze sourced file ${JSON.stringify(request)}: file does not exist`); + information.graph.markIdForUnknownSideEffects(rootId); + return information; + } + } const executor = new RShellExecutor(); // parse, normalize and dataflow the sourced file diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts index 04338b9f1f..1545915112 100644 --- a/src/documentation/doc-util/doc-query.ts +++ b/src/documentation/doc-util/doc-query.ts @@ -1,7 +1,6 @@ import type { RShell } from '../../r-bridge/shell'; import type { Queries, QueryResults, SupportedQueryTypes } from '../../queries/query'; import { SupportedQueries , executeQueries } from '../../queries/query'; - import { PipelineExecutor } from '../../core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../core/steps/pipeline/default-pipelines'; import { requestFromInput } from '../../r-bridge/retriever'; diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts new file mode 100644 index 0000000000..818466ba1e --- /dev/null +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -0,0 +1,128 @@ +import type { BasicQueryData } from '../../query'; +import { executeQueries } from '../../query'; +import type { + DependenciesQuery, + DependenciesQueryResult, DependencyInfo, + FunctionInfo, + LibraryInfo, + ReadInfo, SourceInfo, + WriteInfo +} from './dependencies-query-format'; +import { LibraryFunctions, ReadFunctions, SourceFunctions, WriteFunctions } from './dependencies-query-format'; +import type { CallContextQuery, CallContextQueryResult } from '../call-context-query/call-context-query-format'; +import type { DataflowGraphVertexFunctionCall } from '../../../dataflow/graph/vertex'; +import { getReferenceOfArgument } from '../../../dataflow/graph/graph'; +import { log } from '../../../util/log'; +import { RType } from '../../../r-bridge/lang-4.x/ast/model/type'; +import { removeRQuotes } from '../../../r-bridge/retriever'; +import { EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; +import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; + +const SupportedVertexTypes = [RType.String, RType.Logical, RType.Number]; + +const Unknown = 'unknown'; + +export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { + if(queries.length !== 1) { + log.warn('Dependencies query expects only up to one query, but got ', queries.length); + } + const now = Date.now(); + + const query = queries[0]; + const ignoreDefault = query.ignoreDefaultFunctions ?? false; + const libraryFunctions = getFunctionsToCheck(query.libraryFunctions, ignoreDefault, LibraryFunctions); + const sourceFunctions = getFunctionsToCheck(query.sourceFunctions, ignoreDefault, SourceFunctions); + const readFunctions = getFunctionsToCheck(query.readFunctions, ignoreDefault, ReadFunctions); + const writeFunctions = getFunctionsToCheck(query.writeFunctions, ignoreDefault, WriteFunctions); + + const results = executeQueries(data, [ + ...makeCallContextQuery(libraryFunctions, 'library'), + ...makeCallContextQuery(sourceFunctions, 'source'), + ...makeCallContextQuery(readFunctions, 'read'), + ...makeCallContextQuery(writeFunctions, 'write') + ])['call-context']; + + const libraries: LibraryInfo[] = getResults(data, results, 'library', libraryFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + libraryName: argument ?? Unknown + }), [RType.Symbol]); + const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', sourceFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + file: argument ?? Unknown + })); + const readData: ReadInfo[] = getResults(data, results, 'read', readFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + source: argument ?? Unknown + })); + const writtenData: WriteInfo[] = getResults(data, results, 'write', writeFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + // write functions that don't have argIndex are assumed to write to stdout + destination: argument ?? 'stdout' + })); + + return { + '.meta': { + timing: Date.now() - now + }, + libraries, sourcedFiles, readData, writtenData + }; +} + +function makeCallContextQuery(functions: readonly FunctionInfo[], kind: string): CallContextQuery[] { + return functions.map(f => ({ + type: 'call-context', + callName: f.name, + includeAliases: true, + callNameExact: true, + subkind: f.name, + kind + })); +} + +function getResults(data: BasicQueryData, results: CallContextQueryResult, kind: string, functions: FunctionInfo[], makeInfo: (id: NodeId, vertex: DataflowGraphVertexFunctionCall, argument: string | undefined) => T | undefined, additionalAllowedTypes?: RType[]) { + return Object.entries(results?.kinds[kind]?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { + const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; + const info = functions.find(f => f.name === name) as FunctionInfo; + let index = info.argIdx; + if(info.argName) { + const arg = vertex?.args.findIndex(arg => arg !== EmptyArgument && arg.name === info.argName); + if(arg >= 0) { + index = arg; + } + } + const argument = index !== undefined ? getArgumentValue(data, vertex, index, additionalAllowedTypes) : undefined; + return makeInfo(id, vertex, argument); + })).filter(x => x !== undefined) ?? []; +} + +function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertexFunctionCall, argumentIndex: number, additionalAllowedTypes: RType[] | undefined): string | undefined { + if(vertex && vertex.args.length > argumentIndex) { + const arg = getReferenceOfArgument(vertex.args[argumentIndex]); + if(arg) { + let valueNode = graph.idMap?.get(arg); + if(valueNode?.type === RType.Argument) { + valueNode = valueNode.value; + } + if(valueNode) { + const allowedTypes = [...SupportedVertexTypes, ...additionalAllowedTypes ?? []]; + return allowedTypes.includes(valueNode.type) ? removeRQuotes(valueNode.lexeme as string) : Unknown; + } + } + } + return undefined; +} + +function getFunctionsToCheck(customFunctions: FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: FunctionInfo[]): FunctionInfo[] { + const functions: FunctionInfo[] = []; + if(!ignoreDefaultFunctions) { + functions.push(...defaultFunctions); + } + if(customFunctions) { + functions.push(...customFunctions); + } + return functions; +} diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts new file mode 100644 index 0000000000..73a4bf0b47 --- /dev/null +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -0,0 +1,129 @@ +import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format'; +import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; +import type { QueryResults, SupportedQuery } from '../../query'; +import { bold } from '../../../util/ansi'; +import { printAsMs } from '../../../util/time'; +import Joi from 'joi'; +import { executeDependenciesQuery } from './dependencies-query-executor'; + +// these lists are originally based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R +export const LibraryFunctions: FunctionInfo[] = [ + { name: 'library', argIdx: 0, argName: 'package' }, + { name: 'require', argIdx: 0, argName: 'package' }, + { name: 'loadNamespace', argIdx: 0, argName: 'package' }, + { name: 'attachNamespace', argIdx: 0, argName: 'ns' }, +] as const; +export const SourceFunctions: FunctionInfo[] = [ + { name: 'source', argIdx: 0, argName: 'file' } +] as const; +export const ReadFunctions: FunctionInfo[] = [ + { name: 'read.table', argIdx: 0, argName: 'file' }, + { name: 'read.csv', argIdx: 0, argName: 'file' }, + { name: 'read.csv2', argIdx: 0, argName: 'file' }, + { name: 'read.delim', argIdx: 0, argName: 'file' }, + { name: 'read.delim', argIdx: 0, argName: 'file' }, + { name: 'read.fwf', argIdx: 0, argName: 'file' }, + { name: 'file', argIdx: 1, argName: 'open' }, + { name: 'url', argIdx: 1, argName: 'open' }, + { name: 'load', argIdx: 0, argName: 'file' }, + { name: 'gzfile', argIdx: 1, argName: 'open' }, + { name: 'bzfile', argIdx: 1, argName: 'open' }, + { name: 'download.file', argIdx: 0, argName: 'url' }, + { name: 'pipe', argIdx: 1, argName: 'open' }, + { name: 'fifo', argIdx: 1, argName: 'open' }, + { name: 'unz', argIdx: 1, argName: 'open' }, + { name: 'matrix', argIdx: 0, argName: 'data' }, + { name: 'readRDS', argIdx: 0, argName: 'file' }, + { name: 'readLines', argIdx: 0, argName: 'con' }, +] as const; +export const WriteFunctions: FunctionInfo[] = [ + { name: 'save', argIdx: 0, argName: '...' }, + { name: 'save.image', argIdx: 0, argName: 'file' }, + { name: 'write', argIdx: 1, argName: 'file' }, + { name: 'dput', argIdx: 1, argName: 'file' }, + { name: 'dump', argIdx: 1, argName: 'file' }, + { name: 'write.table', argIdx: 1, argName: 'file' }, + { name: 'write.csv', argIdx: 1, argName: 'file' }, + { name: 'saveRDS', argIdx: 1, argName: 'file' }, + // write functions that don't have argIndex are assumed to write to stdout + { name: 'print' }, + { name: 'cat' }, +] as const; + +export interface FunctionInfo { + name: string + argIdx?: number + argName?: string +} + +export interface DependenciesQuery extends BaseQueryFormat { + readonly type: 'dependencies' + readonly ignoreDefaultFunctions?: boolean + readonly libraryFunctions?: FunctionInfo[] + readonly sourceFunctions?: FunctionInfo[] + readonly readFunctions?: FunctionInfo[] + readonly writeFunctions?: FunctionInfo[] +} + +export interface DependenciesQueryResult extends BaseQueryResult { + libraries: LibraryInfo[] + sourcedFiles: SourceInfo[] + readData: ReadInfo[] + writtenData: WriteInfo[] +} + +export interface DependencyInfo { + nodeId: NodeId + functionName: string +} +export type LibraryInfo = (DependencyInfo & { libraryName: 'unknown' | string }) +export type SourceInfo = (DependencyInfo & { file: string }) +export type ReadInfo = (DependencyInfo & { source: string }) +export type WriteInfo = (DependencyInfo & { destination: 'stdout' | string }) + +function printResultSection(title: string, infos: T[], result: string[], sectionSpecifics: (info: T) => string): void { + if(infos.length <= 0) { + return; + } + result.push(` ╰ ${title}`); + const grouped = infos.reduce(function(groups: Map, i) { + const array = groups.get(i.functionName); + if(array) { + array.push(i); + } else { + groups.set(i.functionName, [i]); + } + return groups; + }, new Map()); + for(const [functionName, infos] of grouped) { + result.push(` ╰ ${functionName}`); + result.push(infos.map(i => ` ╰ Node Id: ${i.nodeId}, ${sectionSpecifics(i)}`).join('\n')); + } +} + +const functionInfoSchema: Joi.ArraySchema = Joi.array().items(Joi.object({ + name: Joi.string().required().description('The name of the library function.'), + argIdx: Joi.number().optional().description('The index of the argument that contains the library name.'), + argName: Joi.string().optional().description('The name of the argument that contains the library name.'), +})).optional(); + +export const DependenciesQueryDefinition = { + executor: executeDependenciesQuery, + asciiSummarizer: (formatter, _processed, queryResults, result) => { + const out = queryResults as QueryResults<'dependencies'>['dependencies']; + result.push(`Query: ${bold('dependencies', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); + printResultSection('Libraries', out.libraries, result, l => `Library Name: ${l.libraryName}`); + printResultSection('Sourced Files', out.sourcedFiles, result, s => `Sourced File: ${s.file}`); + printResultSection('Read Data', out.readData, result, r => `Source: ${r.source}`); + printResultSection('Written Data', out.writtenData, result, w => `Destination: ${w.destination}`); + return true; + }, + schema: Joi.object({ + type: Joi.string().valid('dependencies').required().description('The type of the query.'), + ignoreDefaultFunctions: Joi.boolean().optional().description('Should the set of functions that are detected by default be ignored/skipped?'), + libraryFunctions: functionInfoSchema.description('The set of library functions to search for.'), + sourceFunctions: functionInfoSchema.description('The set of source functions to search for.'), + readFunctions: functionInfoSchema.description('The set of data reading functions to search for.'), + writeFunctions: functionInfoSchema.description('The set of data writing functions to search for.'), + }).description('The dependencies query retrieves and returns the set of all dependencies in the dataflow graph, which includes libraries, sourced files, read data, and written data.') +} as const satisfies SupportedQuery<'dependencies'>; diff --git a/src/queries/catalog/lineage-query/lineage-query-format.ts b/src/queries/catalog/lineage-query/lineage-query-format.ts index 78f23e4d02..2f3f514cd0 100644 --- a/src/queries/catalog/lineage-query/lineage-query-format.ts +++ b/src/queries/catalog/lineage-query/lineage-query-format.ts @@ -32,7 +32,7 @@ export const LineageQueryDefinition = { return true; }, schema: Joi.object({ - type: Joi.string().valid('lineage').required().description('The type of the query.'), - id: Joi.string().required().description('The ID of the node to get the lineage of.') + type: Joi.string().valid('lineage').required().description('The type of the query.'), + criterion: Joi.string().required().description('The slicing criterion of the node to get the lineage of.') }).description('Lineage query used to find the lineage of a node in the dataflow graph') } as const satisfies SupportedQuery<'lineage'>; diff --git a/src/queries/query.ts b/src/queries/query.ts index cc83f285a6..c2c065fa2f 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,6 +1,7 @@ -import type { CallContextQuery } from './catalog/call-context-query/call-context-query-format'; +import type { + CallContextQuery +} from './catalog/call-context-query/call-context-query-format'; import { CallContextQueryDefinition } from './catalog/call-context-query/call-context-query-format'; - import type { DataflowGraph } from '../dataflow/graph/graph'; import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; import { guard } from '../util/assert'; @@ -15,18 +16,20 @@ import type { IdMapQuery } from './catalog/id-map-query/id-map-query-format'; import { IdMapQueryDefinition } from './catalog/id-map-query/id-map-query-format'; import type { NormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-format'; import { NormalizedAstQueryDefinition } from './catalog/normalized-ast-query/normalized-ast-query-format'; -import type { DataflowClusterQuery } from './catalog/cluster-query/cluster-query-format'; -import { ClusterQueryDefinition } from './catalog/cluster-query/cluster-query-format'; -import type { StaticSliceQuery } from './catalog/static-slice-query/static-slice-query-format'; -import { StaticSliceQueryDefinition } from './catalog/static-slice-query/static-slice-query-format'; import type { LineageQuery } from './catalog/lineage-query/lineage-query-format'; import { LineageQueryDefinition } from './catalog/lineage-query/lineage-query-format'; -import { type OutputFormatter } from '../util/ansi'; +import type { StaticSliceQuery } from './catalog/static-slice-query/static-slice-query-format'; +import { StaticSliceQueryDefinition } from './catalog/static-slice-query/static-slice-query-format'; +import type { DataflowClusterQuery } from './catalog/cluster-query/cluster-query-format'; +import { ClusterQueryDefinition } from './catalog/cluster-query/cluster-query-format'; +import type { DependenciesQuery } from './catalog/dependencies-query/dependencies-query-format'; +import { DependenciesQueryDefinition } from './catalog/dependencies-query/dependencies-query-format'; +import type { OutputFormatter } from '../util/ansi'; import type { PipelineOutput } from '../core/steps/pipeline/pipeline'; import type { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-pipelines'; import type Joi from 'joi'; -export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DataflowClusterQuery | StaticSliceQuery | LineageQuery; +export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DataflowClusterQuery | StaticSliceQuery | LineageQuery | DependenciesQuery; export type QueryArgumentsWithType = Query & { type: QueryType }; @@ -55,7 +58,8 @@ export const SupportedQueries = { 'normalized-ast': NormalizedAstQueryDefinition, 'dataflow-cluster': ClusterQueryDefinition, 'static-slice': StaticSliceQueryDefinition, - 'lineage': LineageQueryDefinition + 'lineage': LineageQueryDefinition, + 'dependencies': DependenciesQueryDefinition } as const satisfies SupportedQueries; export type SupportedQueryTypes = keyof typeof SupportedQueries; diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 8ca65fbc08..8e0cc9910b 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -6,7 +6,7 @@ import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/defa import { requestFromInput } from '../../../src/r-bridge/retriever'; import { deterministicCountingIdGenerator } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; import type { QueryResults, Query, QueryResultsWithoutMeta } from '../../../src/queries/query'; -import { executeQueries } from '../../../src/queries/query'; +import { SupportedQueries , executeQueries } from '../../../src/queries/query'; import { assert } from 'chai'; import type { VirtualQueryArgumentsWithType } from '../../../src/queries/virtual-query/virtual-queries'; import type { TestLabel } from './label'; @@ -57,6 +57,24 @@ export function assertQuery< const effectiveName = decorateLabelContext(name, ['query']); it(effectiveName, async() => { + for(const query of queries) { + if(query.type === 'compound') { + continue; + } + const queryType = SupportedQueries[query.type]; + const queryString = JSON.stringify(query, (_key, value) => { + if(value instanceof RegExp) { + return value.toString(); + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return value; + }); + const validationResult = queryType.schema.validate(JSON.parse(queryString)); + if(validationResult.error) { + assert.fail(`Invalid query: ${validationResult.error.message}`); + } + } + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, request: requestFromInput(code), @@ -73,7 +91,7 @@ export function assertQuery< try { // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment const expectedNormalized = typeof expected === 'function' ? await expected(info) : expected; - assert.deepStrictEqual(normalized, expectedNormalized, 'The result of the call context query does not match the expected result'); + assert.deepStrictEqual(normalized, expectedNormalized, 'The result of the query does not match the expected result'); } catch(e: unknown) { console.error('Dataflow-Graph', dataflowGraphToMermaidUrl(info.dataflow)); throw e; diff --git a/test/functionality/dataflow/query/compound-query-tests.ts b/test/functionality/dataflow/query/compound-query-tests.ts index f524fa96f5..6dac0c64a5 100644 --- a/test/functionality/dataflow/query/compound-query-tests.ts +++ b/test/functionality/dataflow/query/compound-query-tests.ts @@ -1,5 +1,3 @@ - - import { withShell } from '../../_helper/shell'; import { assertQuery } from '../../_helper/query'; import { label } from '../../_helper/label'; diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts new file mode 100644 index 0000000000..0a7426d559 --- /dev/null +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -0,0 +1,146 @@ +import { withShell } from '../../_helper/shell'; +import { assertQuery } from '../../_helper/query'; +import { label } from '../../_helper/label'; +import { slicingCriterionToId } from '../../../../src/slicing/criterion/parse'; +import type { + DependenciesQuery, + DependenciesQueryResult, DependencyInfo +} from '../../../../src/queries/catalog/dependencies-query/dependencies-query-format'; +import type { AstIdMap } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import type { SingleSlicingCriterion } from '../../../../src/slicing/criterion/parse'; + + +const emptyDependencies: Omit = { libraries: [], sourcedFiles: [], readData: [], writtenData: [] }; + +function decodeIds(res: Partial, idMap: AstIdMap): Partial { + const out: Partial = { + ...res + }; + for(const [key, value] of Object.entries(res) as [keyof DependenciesQueryResult, DependencyInfo[]][]) { + if(key === '.meta') { + continue; + } + // @ts-expect-error -- we do not need key-dependent typing due to the spread + out[key] = value.map(({ nodeId, ...rest }) => ({ nodeId: typeof nodeId === 'number' ? nodeId : slicingCriterionToId(String(nodeId) as SingleSlicingCriterion, idMap), ...rest })); + } + return out; +} + +describe('Dependencies Query', withShell(shell => { + /** handles slicing criteria for the node ids */ + function testQuery( + name: string, + code: string, + expected: Partial, + query: Partial = {} + ): void { + assertQuery(label(name), shell, code, [{ type: 'dependencies', ...query }], ({ normalize }) => ({ + dependencies: { + ...emptyDependencies, + ...decodeIds(expected, normalize.idMap) + } + })); + } + + describe('Simple', () => { + testQuery('No dependencies', 'x + 1', {}); + }); + + describe('Libraries', () => { + for(const [loadFn, str] of [ + ['library', false], + ['library', true], + ['require', true], + ['loadNamespace', true], + ['attachNamespace', true] + /* support attach, support with, support pacman::p_load and the like? */ + ] as const) { + testQuery(`${loadFn} (${str ? 'string' : 'symbol'})`, `${loadFn}(${str ? '"a"' : 'a'})`, { + libraries: [{ nodeId: '1@' + loadFn, functionName: loadFn, libraryName: 'a' }] + }); + } + + testQuery('Multiple Libraries', 'library(a)\nlibrary(b)\nrequire(c)', { libraries: [ + { nodeId: '1@library', functionName: 'library', libraryName: 'a' }, + { nodeId: '2@library', functionName: 'library', libraryName: 'b' }, + { nodeId: '3@require', functionName: 'require', libraryName: 'c' } + ] }); + + testQuery('Call with Alias', 'foo <- library\nfoo(x)', { libraries: [ + { nodeId: '2@foo', functionName: 'foo', libraryName: 'x' } + ] }); + + + /* currently not supported */ + testQuery('Using a vector to load', 'lapply(c("a", "b", "c"), library, character.only = TRUE)', { libraries: [ + /* { nodeId: '1@library', functionName: 'library', libraryName: 'a' }, + { nodeId: '1@library', functionName: 'library', libraryName: 'b' }, + { nodeId: '1@library', functionName: 'library', libraryName: 'c' } */ + { nodeId: '1@library', functionName: 'library', libraryName: 'unknown' } + ] }); + + describe('Custom', () => { + const readCustomFile: Partial = { + libraryFunctions: [{ name: 'custom.library', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + libraries: [{ nodeId: '1@custom.library', functionName: 'custom.library', libraryName: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'custom.library(1, "my-custom-file", 2)', expected, readCustomFile); + testQuery('Custom (by name)', 'custom.library(num1 = 1, num2 = 2, file = "my-custom-file")', expected, readCustomFile); + testQuery('Ignore default', 'library(testLibrary)', {}, { ignoreDefaultFunctions: true }); + }); + }); + + describe('Sourced files', () => { + testQuery('Single source', 'source("test/file.R")', { sourcedFiles: [{ nodeId: '1@source', functionName: 'source', file: 'test/file.R' }] }); + + describe('Custom', () => { + const sourceCustomFile: Partial = { + sourceFunctions: [{ name: 'source.custom.file', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + sourcedFiles: [{ nodeId: '1@source.custom.file', functionName: 'source.custom.file', file: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'source.custom.file(1, "my-custom-file", 2)', expected, sourceCustomFile); + testQuery('Custom (by name)', 'source.custom.file(num1 = 1, num2 = 2, file = "my-custom-file")', expected, sourceCustomFile); + testQuery('Ignore default', 'source("test/file.R")', {}, { ignoreDefaultFunctions: true }); + }); + }); + + describe('Read Files', () => { + testQuery('read.table', "read.table('test.csv')", { readData: [{ nodeId: '1@read.table', functionName: 'read.table', source: 'test.csv' }] }); + testQuery('gzfile', 'gzfile("this is my gzip file :)", "test.gz")', { readData: [{ nodeId: '1@gzfile', functionName: 'gzfile', source: 'test.gz' }] }); + testQuery('With Argument', 'gzfile(open="test.gz",description="this is my gzip file :)")', { readData: [{ nodeId: '1@gzfile', functionName: 'gzfile', source: 'test.gz' }] }); + + describe('Custom', () => { + const readCustomFile: Partial = { + readFunctions: [{ name: 'read.custom.file', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + readData: [{ nodeId: '1@read.custom.file', functionName: 'read.custom.file', source: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'read.custom.file(1, "my-custom-file", 2)', expected, readCustomFile); + testQuery('Custom (by name)', 'read.custom.file(num1 = 1, num2 = 2, file = "my-custom-file")', expected, readCustomFile); + testQuery('Ignore default', "read.table('test.csv')", {}, { ignoreDefaultFunctions: true }); + }); + }); + + describe('Write Files', () => { + testQuery('dump', 'dump("My text", "MyTextFile.txt")', { writtenData: [{ nodeId: '1@dump', functionName: 'dump', destination: 'MyTextFile.txt' }] }); + testQuery('dump (argument)', 'dump(file="foo.txt", "foo")', { writtenData: [{ nodeId: '1@dump', functionName: 'dump', destination: 'foo.txt' }] }); + testQuery('cat', 'cat("Hello!")', { writtenData: [{ nodeId: '1@cat', functionName: 'cat', destination: 'stdout' }] }); + + describe('Custom', () => { + const writeCustomFile: Partial = { + writeFunctions: [{ name: 'write.custom.file', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + writtenData: [{ nodeId: '1@write.custom.file', functionName: 'write.custom.file', destination: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'write.custom.file(1, "my-custom-file", 2)', expected, writeCustomFile); + testQuery('Custom (by name)', 'write.custom.file(num1 = 1, num2 = 2, file = "my-custom-file")', expected, writeCustomFile); + testQuery('Ignore default', 'dump("My text", "MyTextFile.txt")', {}, { ignoreDefaultFunctions: true }); + }); + }); +}));