From 69263cf59e4e073b50c9dd754e55729e974d123f Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Fri, 11 Oct 2024 13:22:24 +0200 Subject: [PATCH 01/18] refactor: basic SupportedQueries registry structure --- src/queries/query.ts | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/queries/query.ts b/src/queries/query.ts index 78979e872e..88c8937585 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -28,26 +28,38 @@ export interface BasicQueryData { export type QueryExecutor = (data: BasicQueryData, query: readonly Query[]) => Result; type SupportedQueries = { - [QueryType in Query['type']]: QueryExecutor, BaseQueryResult> + [QueryType in Query['type']]: SupportedQuery +} + +interface SupportedQuery { + executor: QueryExecutor, BaseQueryResult> } export const SupportedQueries = { - 'call-context': executeCallContextQueries, - 'dataflow': executeDataflowQuery, - 'id-map': executeIdMapQuery, - 'normalized-ast': executeNormalizedAstQuery + 'call-context': { + executor: executeCallContextQueries + }, + 'dataflow': { + executor: executeDataflowQuery + }, + 'id-map': { + executor: executeIdMapQuery + }, + 'normalized-ast': { + executor: executeNormalizedAstQuery + } } as const satisfies SupportedQueries; export type SupportedQueryTypes = keyof typeof SupportedQueries; -export type QueryResult = ReturnType; +export type QueryResult = ReturnType; export function executeQueriesOfSameType(data: BasicQueryData, ...queries: readonly SpecificQuery[]): QueryResult { guard(queries.length > 0, 'At least one query must be provided'); /* every query must have the same type */ guard(queries.every(q => q.type === queries[0].type), 'All queries must have the same type'); - const executor = SupportedQueries[queries[0].type]; - guard(executor !== undefined, `Unsupported query type: ${queries[0].type}`); - return executor(data, queries as never) as QueryResult; + const query = SupportedQueries[queries[0].type]; + guard(query !== undefined, `Unsupported query type: ${queries[0].type}`); + return query.executor(data, queries as never) as QueryResult; } function isVirtualQuery< From 9be8a73dee337e14659efea987d8b128322a4ea3 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Fri, 11 Oct 2024 13:38:30 +0200 Subject: [PATCH 02/18] feat: allow registering an ascii summarizer for a query directly --- src/cli/repl/commands/repl-query.ts | 35 +++++------------------- src/queries/query.ts | 42 +++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index b170ce2c46..87e1a91037 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -10,15 +10,14 @@ import { bold, italic } from '../../../util/ansi'; import type { CallContextQuerySubKindResult } from '../../../queries/catalog/call-context-query/call-context-query-format'; import { describeSchema } from '../../../util/schema'; import type { Query, QueryResults, SupportedQueryTypes } from '../../../queries/query'; -import { executeQueries } from '../../../queries/query'; +import { SupportedQueries , executeQueries } from '../../../queries/query'; + import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline'; -import type { BaseQueryMeta } from '../../../queries/base-query-format'; +import type { BaseQueryMeta, BaseQueryResult } from '../../../queries/base-query-format'; import { jsonReplacer } from '../../../util/json'; import { AnyQuerySchema, QueriesSchema } from '../../../queries/query-schema'; import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { BuiltIn } from '../../../dataflow/environments/built-in'; -import { graphToMermaidUrl } from '../../../util/mermaid/dfg'; -import { normalizedAstToMermaidUrl } from '../../../util/mermaid/ast'; import { printAsMs } from '../../../util/time'; @@ -106,7 +105,7 @@ function asciiCallContextSubHit(formatter: OutputFormatter, results: readonly Ca return result.join(', '); } -function asciiCallContext(formatter: OutputFormatter, results: QueryResults<'call-context'>['call-context'], processed: PipelineOutput): string { +export function asciiCallContext(formatter: OutputFormatter, results: QueryResults<'call-context'>['call-context'], processed: PipelineOutput): string { /* traverse over 'kinds' and within them 'subkinds' */ const result: string[] = []; for(const [kind, { subkinds }] of Object.entries(results['kinds'])) { @@ -118,7 +117,7 @@ function asciiCallContext(formatter: OutputFormatter, results: QueryResults<'cal return result.join('\n'); } -function summarizeIdsIfTooLong(ids: readonly NodeId[]) { +export function summarizeIdsIfTooLong(ids: readonly NodeId[]) { const naive = ids.join(', '); if(naive.length <= 20) { return naive; @@ -138,28 +137,8 @@ export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: const result: string[] = []; for(const [query, queryResults] of Object.entries(results)) { - if(query === '.meta') { - continue; - } - if(query === 'call-context') { - const out = queryResults as QueryResults<'call-context'>['call-context']; - result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`); - result.push(asciiCallContext(formatter, out, processed)); - continue; - } else if(query === 'dataflow') { - const out = queryResults as QueryResults<'dataflow'>['dataflow']; - result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`); - result.push(` ╰ [Dataflow Graph](${graphToMermaidUrl(out.graph)})`); - continue; - } else if(query === 'id-map') { - const out = queryResults as QueryResults<'id-map'>['id-map']; - result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`); - result.push(` ╰ Id List: {${summarizeIdsIfTooLong([...out.idMap.keys()])}}`); - continue; - } else if(query === 'normalized-ast') { - const out = queryResults as QueryResults<'normalized-ast'>['normalized-ast']; - result.push(`Query: ${bold(query, formatter)} (${printAsMs(out['.meta'].timing, 0)})`); - result.push(` ╰ [Normalized AST](${normalizedAstToMermaidUrl(out.normalized.ast)})`); + const queryType = SupportedQueries[query as SupportedQueryTypes]; + if(queryType.asciiSummarizer(formatter, processed, queryResults as BaseQueryResult, result)) { continue; } diff --git a/src/queries/query.ts b/src/queries/query.ts index 88c8937585..d7662ab6d9 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -14,6 +14,13 @@ import { executeIdMapQuery } from './catalog/id-map-query/id-map-query-executor' import type { IdMapQuery } from './catalog/id-map-query/id-map-query-format'; import { executeNormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-executor'; import type { NormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-format'; +import { bold, type OutputFormatter } from '../util/ansi'; +import { printAsMs } from '../util/time'; +import { asciiCallContext, summarizeIdsIfTooLong } from '../cli/repl/commands/repl-query'; +import type { PipelineOutput } from '../core/steps/pipeline/pipeline'; +import type { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-pipelines'; +import { graphToMermaidUrl } from '../util/mermaid/dfg'; +import { normalizedAstToMermaidUrl } from '../util/mermaid/ast'; export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery; @@ -32,21 +39,46 @@ type SupportedQueries = { } interface SupportedQuery { - executor: QueryExecutor, BaseQueryResult> + executor: QueryExecutor, BaseQueryResult> + asciiSummarizer: (formatter: OutputFormatter, processed: PipelineOutput, queryResults: BaseQueryResult, resultStrings: string[]) => boolean } export const SupportedQueries = { 'call-context': { - executor: executeCallContextQueries + executor: executeCallContextQueries, + asciiSummarizer: (formatter, processed, queryResults, result) => { + const out = queryResults as QueryResults<'call-context'>['call-context']; + result.push(`Query: ${bold('call-context', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); + result.push(asciiCallContext(formatter, out, processed)); + return true; + } }, 'dataflow': { - executor: executeDataflowQuery + executor: executeDataflowQuery, + asciiSummarizer: (formatter, _processed, queryResults, result) => { + const out = queryResults as QueryResults<'dataflow'>['dataflow']; + result.push(`Query: ${bold('dataflow', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); + result.push(` ╰ [Dataflow Graph](${graphToMermaidUrl(out.graph)})`); + return true; + } }, 'id-map': { - executor: executeIdMapQuery + executor: executeIdMapQuery, + asciiSummarizer: (formatter, _processed, queryResults, result) => { + const out = queryResults as QueryResults<'id-map'>['id-map']; + result.push(`Query: ${bold('id-map', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); + result.push(` ╰ Id List: {${summarizeIdsIfTooLong([...out.idMap.keys()])}}`); + return true; + } }, 'normalized-ast': { - executor: executeNormalizedAstQuery + executor: executeNormalizedAstQuery, + asciiSummarizer: (formatter, _processed, queryResults, result) => { + const out = queryResults as QueryResults<'normalized-ast'>['normalized-ast']; + result.push(`Query: ${bold('normalized-ast', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); + result.push(` ╰ [Normalized AST](${normalizedAstToMermaidUrl(out.normalized.ast)})`); + return true; + } } } as const satisfies SupportedQueries; From c2487f3e3408420cd43f63faa2873fb9bff7535e Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Fri, 11 Oct 2024 13:46:11 +0200 Subject: [PATCH 03/18] refactor: include query schemas in query definitions --- src/queries/query-schema.ts | 35 ++--------------------------------- src/queries/query.ts | 33 +++++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/src/queries/query-schema.ts b/src/queries/query-schema.ts index 406bdd1231..14f713243b 100644 --- a/src/queries/query-schema.ts +++ b/src/queries/query-schema.ts @@ -1,38 +1,7 @@ import Joi from 'joi'; -import { CallTargets } from './catalog/call-context-query/call-context-query-format'; +import { SupportedQueries } from './query'; -export const CallContextQuerySchema = Joi.object({ - type: Joi.string().valid('call-context').required().description('The type of the query.'), - callName: Joi.string().required().description('Regex regarding the function name!'), - callNameExact: Joi.boolean().optional().description('Should we automatically add the `^` and `$` anchors to the regex to make it an exact match?'), - kind: Joi.string().optional().description('The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`'), - subkind: Joi.string().optional().description('The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`'), - callTargets: Joi.string().valid(...Object.values(CallTargets)).optional().description('Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve.'), - includeAliases: Joi.boolean().optional().description('Consider a case like `f <- function_of_interest`, do you want uses of `f` to be included in the results?'), - linkTo: Joi.object({ - type: Joi.string().valid('link-to-last-call').required().description('The type of the linkTo sub-query.'), - callName: Joi.string().required().description('Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression.') - }).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.') -}).description('Call context query used to find calls in the dataflow graph'); - -export const DataflowQuerySchema = Joi.object({ - type: Joi.string().valid('dataflow').required().description('The type of the query.'), -}).description('The dataflow query simply returns the dataflow graph, there is no need to pass it multiple times!'); - -export const IdMapQuerySchema = Joi.object({ - type: Joi.string().valid('id-map').required().description('The type of the query.'), -}).description('The id map query retrieves the id map from the normalized AST.'); - -export const NormalizedAstQuerySchema = Joi.object({ - type: Joi.string().valid('normalized-ast').required().description('The type of the query.'), -}).description('The normalized AST query simply returns the normalized AST, there is no need to pass it multiple times!'); - -export const SupportedQueriesSchema = Joi.alternatives( - CallContextQuerySchema, - DataflowQuerySchema, - IdMapQuerySchema, - NormalizedAstQuerySchema -).description('Supported queries'); +export const SupportedQueriesSchema = Joi.alternatives(Object.values(SupportedQueries).map(q => q.schema)).description('Supported queries'); export const CompoundQuerySchema = Joi.object({ type: Joi.string().valid('compound').required().description('The type of the query.'), diff --git a/src/queries/query.ts b/src/queries/query.ts index d7662ab6d9..a7167ce9df 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,4 +1,5 @@ import type { CallContextQuery } from './catalog/call-context-query/call-context-query-format'; +import { CallTargets } from './catalog/call-context-query/call-context-query-format'; import type { DataflowGraph } from '../dataflow/graph/graph'; import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; import { executeCallContextQueries } from './catalog/call-context-query/call-context-query-executor'; @@ -21,6 +22,7 @@ import type { PipelineOutput } from '../core/steps/pipeline/pipeline'; import type { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-pipelines'; import { graphToMermaidUrl } from '../util/mermaid/dfg'; import { normalizedAstToMermaidUrl } from '../util/mermaid/ast'; +import Joi from 'joi'; export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery; @@ -41,6 +43,7 @@ type SupportedQueries = { interface SupportedQuery { executor: QueryExecutor, BaseQueryResult> asciiSummarizer: (formatter: OutputFormatter, processed: PipelineOutput, queryResults: BaseQueryResult, resultStrings: string[]) => boolean + schema: Joi.ObjectSchema } export const SupportedQueries = { @@ -51,7 +54,20 @@ export const SupportedQueries = { result.push(`Query: ${bold('call-context', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); result.push(asciiCallContext(formatter, out, processed)); return true; - } + }, + schema: Joi.object({ + type: Joi.string().valid('call-context').required().description('The type of the query.'), + callName: Joi.string().required().description('Regex regarding the function name!'), + callNameExact: Joi.boolean().optional().description('Should we automatically add the `^` and `$` anchors to the regex to make it an exact match?'), + kind: Joi.string().optional().description('The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`'), + subkind: Joi.string().optional().description('The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`'), + callTargets: Joi.string().valid(...Object.values(CallTargets)).optional().description('Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve.'), + includeAliases: Joi.boolean().optional().description('Consider a case like `f <- function_of_interest`, do you want uses of `f` to be included in the results?'), + linkTo: Joi.object({ + type: Joi.string().valid('link-to-last-call').required().description('The type of the linkTo sub-query.'), + callName: Joi.string().required().description('Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression.') + }).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.') + }).description('Call context query used to find calls in the dataflow graph') }, 'dataflow': { executor: executeDataflowQuery, @@ -60,7 +76,10 @@ export const SupportedQueries = { result.push(`Query: ${bold('dataflow', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); result.push(` ╰ [Dataflow Graph](${graphToMermaidUrl(out.graph)})`); return true; - } + }, + schema: Joi.object({ + type: Joi.string().valid('dataflow').required().description('The type of the query.'), + }).description('The dataflow query simply returns the dataflow graph, there is no need to pass it multiple times!') }, 'id-map': { executor: executeIdMapQuery, @@ -69,7 +88,10 @@ export const SupportedQueries = { result.push(`Query: ${bold('id-map', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); result.push(` ╰ Id List: {${summarizeIdsIfTooLong([...out.idMap.keys()])}}`); return true; - } + }, + schema: Joi.object({ + type: Joi.string().valid('id-map').required().description('The type of the query.'), + }).description('The id map query retrieves the id map from the normalized AST.') }, 'normalized-ast': { executor: executeNormalizedAstQuery, @@ -78,7 +100,10 @@ export const SupportedQueries = { result.push(`Query: ${bold('normalized-ast', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); result.push(` ╰ [Normalized AST](${normalizedAstToMermaidUrl(out.normalized.ast)})`); return true; - } + }, + schema: Joi.object({ + type: Joi.string().valid('normalized-ast').required().description('The type of the query.'), + }).description('The normalized AST query simply returns the normalized AST, there is no need to pass it multiple times!') } } as const satisfies SupportedQueries; From 256e2d5f3ff91c5f96367f0a8e11fccd8b913998 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Fri, 11 Oct 2024 13:50:20 +0200 Subject: [PATCH 04/18] feat-fix: fixed meta queries being included in print --- src/cli/repl/commands/repl-query.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index 87e1a91037..59eb10d95d 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -137,6 +137,10 @@ export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: const result: string[] = []; for(const [query, queryResults] of Object.entries(results)) { + if(query === '.meta') { + continue; + } + const queryType = SupportedQueries[query as SupportedQueryTypes]; if(queryType.asciiSummarizer(formatter, processed, queryResults as BaseQueryResult, result)) { continue; From 23a884a20c20bf922cd6a79df2326a1d15177f15 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Fri, 11 Oct 2024 14:07:12 +0200 Subject: [PATCH 05/18] wip: basic setup --- .../dependencies/dependencies-query-executor.ts | 12 ++++++++++++ .../dependencies/dependencies-query-format.ts | 12 ++++++++++++ src/queries/query.ts | 12 +++++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 src/queries/catalog/dependencies/dependencies-query-executor.ts create mode 100644 src/queries/catalog/dependencies/dependencies-query-format.ts diff --git a/src/queries/catalog/dependencies/dependencies-query-executor.ts b/src/queries/catalog/dependencies/dependencies-query-executor.ts new file mode 100644 index 0000000000..72676c360a --- /dev/null +++ b/src/queries/catalog/dependencies/dependencies-query-executor.ts @@ -0,0 +1,12 @@ +import type { BasicQueryData } from '../../query'; +import type { DependenciesQuery, DependenciesQueryResult } from './dependencies-query-format'; + +export function executeDependenciesQuery({ ast, graph }: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { + const now = Date.now(); + // TODO execute the query + return { + '.meta': { + timing: Date.now() - now + } + }; +} diff --git a/src/queries/catalog/dependencies/dependencies-query-format.ts b/src/queries/catalog/dependencies/dependencies-query-format.ts new file mode 100644 index 0000000000..297596558f --- /dev/null +++ b/src/queries/catalog/dependencies/dependencies-query-format.ts @@ -0,0 +1,12 @@ +import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format'; + +export interface DependenciesQuery extends BaseQueryFormat { + readonly type: 'dependencies'; +} + +export interface DependenciesQueryResult extends BaseQueryResult { + // TODO: Result structure + // DependencyInfo (with the value and the location) + // ScriptDependencyInfo as an object with libraries, data (e.g. read csvs), sources (sourced R-files), outputs (written files, may include "commandline") + +} diff --git a/src/queries/query.ts b/src/queries/query.ts index a7167ce9df..a32ac2d88a 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -23,8 +23,10 @@ import type { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-p import { graphToMermaidUrl } from '../util/mermaid/dfg'; import { normalizedAstToMermaidUrl } from '../util/mermaid/ast'; import Joi from 'joi'; +import { executeDependenciesQuery } from './catalog/dependencies/dependencies-query-executor'; +import type { DependenciesQuery } from './catalog/dependencies/dependencies-query-format'; -export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery; +export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DependenciesQuery; export type QueryArgumentsWithType = Query & { type: QueryType }; @@ -104,6 +106,14 @@ export const SupportedQueries = { schema: Joi.object({ type: Joi.string().valid('normalized-ast').required().description('The type of the query.'), }).description('The normalized AST query simply returns the normalized AST, there is no need to pass it multiple times!') + }, + 'dependencies': { + executor: executeDependenciesQuery, + asciiSummarizer: (formatter, _processed, queryResults, result) => { + // TODO ascii summarizer + }, + // TODO schema + schema: Joi.any() } } as const satisfies SupportedQueries; From a51f765da687f2e45a5bb614e818af46e337b420 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Fri, 11 Oct 2024 14:33:57 +0200 Subject: [PATCH 06/18] wip: basic data structures --- .../dependencies-query-executor.ts | 1 - .../dependencies/dependencies-query-format.ts | 18 +++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/queries/catalog/dependencies/dependencies-query-executor.ts b/src/queries/catalog/dependencies/dependencies-query-executor.ts index 72676c360a..f8c4382573 100644 --- a/src/queries/catalog/dependencies/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies/dependencies-query-executor.ts @@ -3,7 +3,6 @@ import type { DependenciesQuery, DependenciesQueryResult } from './dependencies- export function executeDependenciesQuery({ ast, graph }: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { const now = Date.now(); - // TODO execute the query return { '.meta': { timing: Date.now() - now diff --git a/src/queries/catalog/dependencies/dependencies-query-format.ts b/src/queries/catalog/dependencies/dependencies-query-format.ts index 297596558f..ab80b929bb 100644 --- a/src/queries/catalog/dependencies/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies/dependencies-query-format.ts @@ -1,12 +1,24 @@ import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format'; +import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; + +// these lists are based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R +export const LibraryFunctions = ['library', 'require']; +export const SourceFunctions = ['source']; +export const ReadFunctions = ['read.table', 'read.csv', 'read.csv2', 'read.delim', 'read.fwf', 'file', 'url', 'load', 'gzfile', 'bzfile', 'download.file', 'pipe', 'fifo', 'unz', 'data.frame', 'matrix', 'readRDS', 'readLines'] as const; +export const WriteFunctions = ['save', 'save.image', 'write', 'dput', 'dump', 'write.table', 'write.csv', 'saveRDS', 'print', 'cat'] as const; export interface DependenciesQuery extends BaseQueryFormat { readonly type: 'dependencies'; } export interface DependenciesQueryResult extends BaseQueryResult { - // TODO: Result structure - // DependencyInfo (with the value and the location) - // ScriptDependencyInfo as an object with libraries, data (e.g. read csvs), sources (sourced R-files), outputs (written files, may include "commandline") + libraries: (DependencyInfo & { libraryName: string })[] + sourcedFiles: (DependencyInfo & { file: string })[] + readData: (DependencyInfo & { source: string })[] + writtenData: (DependencyInfo & { destination: 'stdout' | string })[] +} +export interface DependencyInfo { + nodeId: NodeId + functionName: string } From ba12ce30321b73d9b5d160cf5689dfd1730dfa8d Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Fri, 11 Oct 2024 14:36:04 +0200 Subject: [PATCH 07/18] refactor: const all the arrays --- src/queries/catalog/dependencies/dependencies-query-format.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/queries/catalog/dependencies/dependencies-query-format.ts b/src/queries/catalog/dependencies/dependencies-query-format.ts index ab80b929bb..6b72e8a15e 100644 --- a/src/queries/catalog/dependencies/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies/dependencies-query-format.ts @@ -2,8 +2,8 @@ import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format'; import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; // these lists are based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R -export const LibraryFunctions = ['library', 'require']; -export const SourceFunctions = ['source']; +export const LibraryFunctions = ['library', 'require'] as const; +export const SourceFunctions = ['source'] as const; export const ReadFunctions = ['read.table', 'read.csv', 'read.csv2', 'read.delim', 'read.fwf', 'file', 'url', 'load', 'gzfile', 'bzfile', 'download.file', 'pipe', 'fifo', 'unz', 'data.frame', 'matrix', 'readRDS', 'readLines'] as const; export const WriteFunctions = ['save', 'save.image', 'write', 'dput', 'dump', 'write.table', 'write.csv', 'saveRDS', 'print', 'cat'] as const; From 6eab77bf34ed2f2241324d52b0ce8e43245046d0 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 14 Oct 2024 12:39:08 +0200 Subject: [PATCH 08/18] refactor: rename directory to fit the other names --- .../dependencies-query-executor.ts | 0 .../dependencies-query-format.ts | 0 src/queries/query.ts | 4 ++-- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/queries/catalog/{dependencies => dependencies-query}/dependencies-query-executor.ts (100%) rename src/queries/catalog/{dependencies => dependencies-query}/dependencies-query-format.ts (100%) diff --git a/src/queries/catalog/dependencies/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts similarity index 100% rename from src/queries/catalog/dependencies/dependencies-query-executor.ts rename to src/queries/catalog/dependencies-query/dependencies-query-executor.ts diff --git a/src/queries/catalog/dependencies/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts similarity index 100% rename from src/queries/catalog/dependencies/dependencies-query-format.ts rename to src/queries/catalog/dependencies-query/dependencies-query-format.ts diff --git a/src/queries/query.ts b/src/queries/query.ts index a32ac2d88a..f605137013 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -23,8 +23,8 @@ import type { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-p import { graphToMermaidUrl } from '../util/mermaid/dfg'; import { normalizedAstToMermaidUrl } from '../util/mermaid/ast'; import Joi from 'joi'; -import { executeDependenciesQuery } from './catalog/dependencies/dependencies-query-executor'; -import type { DependenciesQuery } from './catalog/dependencies/dependencies-query-format'; +import { executeDependenciesQuery } from './catalog/dependencies-query/dependencies-query-executor'; +import type { DependenciesQuery } from './catalog/dependencies-query/dependencies-query-format'; export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DependenciesQuery; From e642de15efcaa9cc2734add0acb9c2071e71d7e4 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 14 Oct 2024 13:25:59 +0200 Subject: [PATCH 09/18] wip: some work on the actual query --- .../dependencies-query-executor.ts | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index f8c4382573..da7ab489a1 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -1,11 +1,49 @@ import type { BasicQueryData } from '../../query'; +import { executeQueries } from '../../query'; import type { DependenciesQuery, DependenciesQueryResult } from './dependencies-query-format'; +import { ReadFunctions, SourceFunctions, WriteFunctions , LibraryFunctions } from './dependencies-query-format'; +import type { CallContextQuery } from '../call-context-query/call-context-query-format'; +import type { DataflowGraphVertexFunctionCall } from '../../../dataflow/graph/vertex'; +import { getReferenceOfArgument } from '../../../dataflow/graph/graph'; +import type { RArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-argument'; +import { log } from '../../../util/log'; + +export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { + if(queries.length !== 1) { + log.warn('Dependencies query expects only up to one query, but got ', queries.length); + } -export function executeDependenciesQuery({ ast, graph }: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { const now = Date.now(); + + const results = executeQueries(data, [ + makeCallContextQuery(LibraryFunctions, 'library'), + makeCallContextQuery(SourceFunctions, 'source'), + makeCallContextQuery(ReadFunctions, 'read'), + makeCallContextQuery(WriteFunctions, 'write') + ])['call-context']; + + const libraries = results.kinds.libary.subkinds['.'].map(({ id }) => { + const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; + if(vertex){ + const arg = getReferenceOfArgument(vertex.args[0]); + if(arg) { + const valueNode = data.graph.idMap?.get(arg) as RArgument; + if(valueNode) { + return { nodeId: id, functionName: vertex.name, libraryName: valueNode.lexeme }; + } + } + } + return undefined; + }).filter(x => x !== undefined); + return { '.meta': { timing: Date.now() - now - } + }, + libraries }; } + +function makeCallContextQuery(functions: readonly string[], kind: string): CallContextQuery { + return { type: 'call-context', callName: `^(${functions.join('|')})$`, kind, includeAliases: true }; +} From 083f3d6ff46c2aebfac6ff9a53e0bb26f5844111 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 14 Oct 2024 15:16:36 +0200 Subject: [PATCH 10/18] feat: libraries and sourced files dependency tracking --- .../dependencies-query-executor.ts | 64 ++++++++++++++----- .../dependencies-query-format.ts | 12 ++-- src/queries/query.ts | 8 ++- test/functionality/_helper/query.ts | 2 +- .../query/dependencies-query-tests.ts | 31 +++++++++ 5 files changed, 93 insertions(+), 24 deletions(-) create mode 100644 test/functionality/dataflow/query/dependencies-query-tests.ts diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index da7ab489a1..4dc2c63f79 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -1,18 +1,18 @@ import type { BasicQueryData } from '../../query'; import { executeQueries } from '../../query'; -import type { DependenciesQuery, DependenciesQueryResult } from './dependencies-query-format'; -import { ReadFunctions, SourceFunctions, WriteFunctions , LibraryFunctions } from './dependencies-query-format'; +import type { DependenciesQuery, DependenciesQueryResult, LibraryInfo, SourceInfo } from './dependencies-query-format'; +import { LibraryFunctions, ReadFunctions, SourceFunctions, WriteFunctions } from './dependencies-query-format'; import type { CallContextQuery } from '../call-context-query/call-context-query-format'; import type { DataflowGraphVertexFunctionCall } from '../../../dataflow/graph/vertex'; import { getReferenceOfArgument } from '../../../dataflow/graph/graph'; -import type { RArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-argument'; import { log } from '../../../util/log'; +import { RType } from '../../../r-bridge/lang-4.x/ast/model/type'; +import { removeRQuotes } from '../../../r-bridge/retriever'; export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { if(queries.length !== 1) { log.warn('Dependencies query expects only up to one query, but got ', queries.length); } - const now = Date.now(); const results = executeQueries(data, [ @@ -22,28 +22,60 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly makeCallContextQuery(WriteFunctions, 'write') ])['call-context']; - const libraries = results.kinds.libary.subkinds['.'].map(({ id }) => { + const libraries: LibraryInfo[] = results.kinds['library']?.subkinds['.'].map(({ id }) => { const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; - if(vertex){ - const arg = getReferenceOfArgument(vertex.args[0]); - if(arg) { - const valueNode = data.graph.idMap?.get(arg) as RArgument; - if(valueNode) { - return { nodeId: id, functionName: vertex.name, libraryName: valueNode.lexeme }; - } - } + const libraryName = getArgumentValue(data, vertex, 0, [RType.String, RType.Symbol]); + if(libraryName) { + return { + nodeId: id, + functionName: vertex.name, + libraryName + }; + } + return undefined; + }).filter(x => x !== undefined) ?? []; + const sourcedFiles: SourceInfo[] = results.kinds['source']?.subkinds['.'].map(({ id }) => { + const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; + const file = getArgumentValue(data, vertex, 0, [RType.String]); + if(file) { + return { + nodeId: id, + functionName: vertex.name, + file + }; } return undefined; - }).filter(x => x !== undefined); + }).filter(x => x !== undefined) ?? []; return { '.meta': { timing: Date.now() - now }, - libraries + libraries, sourcedFiles, + readData: [], + writtenData: [] }; } function makeCallContextQuery(functions: readonly string[], kind: string): CallContextQuery { - return { type: 'call-context', callName: `^(${functions.join('|')})$`, kind, includeAliases: true }; + return { + type: 'call-context', + callName: `(${functions.map(f => f.replace('.', '\\.')).join('|')})`, + includeAliases: true, + subkind: '.', + kind + }; +} + +function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertexFunctionCall, argumentIndex: number, allowedTypes: RType[]): string | undefined { + if(vertex) { + const arg = getReferenceOfArgument(vertex.args[argumentIndex]); + if(arg) { + const valueNode = graph.idMap?.get(arg); + if(valueNode) { + return allowedTypes.includes(valueNode.type) ? removeRQuotes(valueNode.lexeme as string) : 'unknown'; + } + } + } + return undefined; } diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index 6b72e8a15e..578eb99a30 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -12,13 +12,17 @@ export interface DependenciesQuery extends BaseQueryFormat { } export interface DependenciesQueryResult extends BaseQueryResult { - libraries: (DependencyInfo & { libraryName: string })[] - sourcedFiles: (DependencyInfo & { file: string })[] - readData: (DependencyInfo & { source: string })[] - writtenData: (DependencyInfo & { destination: 'stdout' | string })[] + libraries: LibraryInfo[] + sourcedFiles: SourceInfo[] + readData: ReadInfo[] + writtenData: WriteInfo[] } export interface DependencyInfo { nodeId: NodeId functionName: string } +export type LibraryInfo = (DependencyInfo & { libraryName: 'unknown' | string }) +export type SourceInfo = (DependencyInfo & { file: string }) +export type ReadInfo = (DependencyInfo & { source: string }) +export type WriteInfo = (DependencyInfo & { destination: 'stdout' | string }) diff --git a/src/queries/query.ts b/src/queries/query.ts index f605137013..311c902889 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -109,11 +109,13 @@ export const SupportedQueries = { }, 'dependencies': { executor: executeDependenciesQuery, - asciiSummarizer: (formatter, _processed, queryResults, result) => { + asciiSummarizer: (_formatter, _processed, _queryResults, _result) => { // TODO ascii summarizer + return false; }, - // TODO schema - schema: Joi.any() + schema: Joi.object({ + type: Joi.string().valid('dependencies').required().description('The type of the query.'), + }).description('The dependencies query retrieves and returns the set of all dependencies in the dataflow graph, which includes libraries, sourced files, read data, and written data.') } } as const satisfies SupportedQueries; diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 93d60b4d9c..75e411fa32 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -69,7 +69,7 @@ export function assertQuery< try { // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment const expectedNormalized = typeof expected === 'function' ? expected(info) : expected; - assert.deepStrictEqual(normalized, expectedNormalized, 'The result of the call context query does not match the expected result'); + assert.deepStrictEqual(normalized, expectedNormalized, 'The result of the query does not match the expected result'); } catch(e: unknown) { console.error('Dataflow-Graph', dataflowGraphToMermaidUrl(info.dataflow)); throw e; diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts new file mode 100644 index 0000000000..c9eccd0951 --- /dev/null +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -0,0 +1,31 @@ +import { withShell } from '../../_helper/shell'; +import { assertQuery } from '../../_helper/query'; +import { label } from '../../_helper/label'; +import type { DependenciesQueryResult } from '../../../../src/queries/catalog/dependencies-query/dependencies-query-format'; + +describe('Dependencies Query', withShell(shell => { + function testQuery(name: string, code: string, expected: Partial): void { + assertQuery(label(name), shell, code, [{ type: 'dependencies' }], { + dependencies: { + libraries: expected.libraries ?? [], + sourcedFiles: expected.sourcedFiles ?? [], + readData: expected.readData ?? [], + writtenData: expected.writtenData ?? [] + } + }); + } + + describe('Simple', () => { + testQuery('No dependencies', 'x + 1', {}); + }); + + describe('Libraries', () => { + testQuery('Single library (symbol)', 'library(testLibrary)', { libraries: [{ nodeId: 3, functionName: 'library', libraryName: 'testLibrary' }] }); + testQuery('Single library (string)', 'library("testLibrary")', { libraries: [{ nodeId: 3, functionName: 'library', libraryName: 'testLibrary' }] }); + testQuery('Single require (string)', 'require("testLibrary")', { libraries: [{ nodeId: 3, functionName: 'require', libraryName: 'testLibrary' }] }); + }); + + describe('Sourced files', () => { + testQuery('Single source', 'source("test/file.R")', { sourcedFiles: [{ nodeId: 3, functionName: 'source', file: 'test/file.R' }] }); + }); +})); From 76edcd50e43db5da03ce266939ae74b7d07d01b2 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 14 Oct 2024 15:47:17 +0200 Subject: [PATCH 11/18] feat: ascii summarizer --- .../dependencies-query-format.ts | 22 +++++++++++++++++++ src/queries/query.ts | 12 +++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index 578eb99a30..4fca395ba3 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -26,3 +26,25 @@ export type LibraryInfo = (DependencyInfo & { libraryName: 'unknown' | string }) export type SourceInfo = (DependencyInfo & { file: string }) export type ReadInfo = (DependencyInfo & { source: string }) export type WriteInfo = (DependencyInfo & { destination: 'stdout' | string }) + + + +export function printResultSection(title: string, infos: T[], result: string[], sectionSpecifics: (info: T) => string): void { + if(infos.length <= 0) { + return; + } + result.push(` ╰ ${title}`); + const grouped = infos.reduce(function(groups: Map, i) { + const array = groups.get(i.functionName); + if(array) { + array.push(i); + } else { + groups.set(i.functionName, [i]); + } + return groups; + }, new Map()); + for(const [functionName, infos] of grouped) { + result.push(` ╰ ${functionName}`); + result.push(infos.map(i => ` ╰ Node Id: ${i.nodeId}, ${sectionSpecifics(i)}`).join('\n')); + } +} diff --git a/src/queries/query.ts b/src/queries/query.ts index 311c902889..e46e97e9ca 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -25,6 +25,7 @@ import { normalizedAstToMermaidUrl } from '../util/mermaid/ast'; import Joi from 'joi'; import { executeDependenciesQuery } from './catalog/dependencies-query/dependencies-query-executor'; import type { DependenciesQuery } from './catalog/dependencies-query/dependencies-query-format'; +import { printResultSection } from './catalog/dependencies-query/dependencies-query-format'; export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DependenciesQuery; @@ -109,9 +110,14 @@ export const SupportedQueries = { }, 'dependencies': { executor: executeDependenciesQuery, - asciiSummarizer: (_formatter, _processed, _queryResults, _result) => { - // TODO ascii summarizer - return false; + asciiSummarizer: (formatter, _processed, queryResults, result) => { + const out = queryResults as QueryResults<'dependencies'>['dependencies']; + result.push(`Query: ${bold('dependencies', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); + printResultSection('Libraries', out.libraries, result, l => `Library Name: ${l.libraryName}`); + printResultSection('Sourced Files', out.sourcedFiles, result, s => `Sourced File: ${s.file}`); + printResultSection('Read Data', out.readData, result, r => `Source: ${r.source}`); + printResultSection('Written Data', out.writtenData, result, w => `Destination: ${w.destination}`); + return true; }, schema: Joi.object({ type: Joi.string().valid('dependencies').required().description('The type of the query.'), From 86029143637946abd5c35defa7aa5d1e02080f83 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Mon, 14 Oct 2024 16:21:43 +0200 Subject: [PATCH 12/18] feat: read and write function extraction --- .../dependencies-query-executor.ts | 96 +++++++++++++++---- .../dependencies-query-format.ts | 45 ++++++++- .../query/dependencies-query-tests.ts | 10 ++ 3 files changed, 127 insertions(+), 24 deletions(-) diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index 4dc2c63f79..4ae3e488c4 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -1,6 +1,6 @@ import type { BasicQueryData } from '../../query'; import { executeQueries } from '../../query'; -import type { DependenciesQuery, DependenciesQueryResult, LibraryInfo, SourceInfo } from './dependencies-query-format'; +import type { DependenciesQuery, DependenciesQueryResult, FunctionInfo, LibraryInfo, ReadInfo, SourceInfo, WriteInfo } from './dependencies-query-format'; import { LibraryFunctions, ReadFunctions, SourceFunctions, WriteFunctions } from './dependencies-query-format'; import type { CallContextQuery } from '../call-context-query/call-context-query-format'; import type { DataflowGraphVertexFunctionCall } from '../../../dataflow/graph/vertex'; @@ -8,6 +8,7 @@ import { getReferenceOfArgument } from '../../../dataflow/graph/graph'; import { log } from '../../../util/log'; import { RType } from '../../../r-bridge/lang-4.x/ast/model/type'; import { removeRQuotes } from '../../../r-bridge/retriever'; +import { EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { if(queries.length !== 1) { @@ -16,10 +17,10 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly const now = Date.now(); const results = executeQueries(data, [ - makeCallContextQuery(LibraryFunctions, 'library'), - makeCallContextQuery(SourceFunctions, 'source'), - makeCallContextQuery(ReadFunctions, 'read'), - makeCallContextQuery(WriteFunctions, 'write') + ...makeCallContextQuery(LibraryFunctions, 'library'), + ...makeCallContextQuery(SourceFunctions, 'source'), + ...makeCallContextQuery(ReadFunctions.map(f => f.name), 'read', true), + ...makeCallContextQuery(WriteFunctions.map(f => f.name), 'write', true) ])['call-context']; const libraries: LibraryInfo[] = results.kinds['library']?.subkinds['.'].map(({ id }) => { @@ -30,7 +31,7 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly nodeId: id, functionName: vertex.name, libraryName - }; + } as LibraryInfo; } return undefined; }).filter(x => x !== undefined) ?? []; @@ -42,33 +43,90 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly nodeId: id, functionName: vertex.name, file - }; + } as SourceInfo; } return undefined; }).filter(x => x !== undefined) ?? []; + const readData: ReadInfo[] = Object.entries(results.kinds['read']?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { + const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; + const info = ReadFunctions.find(f => f.name === name) as FunctionInfo; + let index = info.argIdx as number; + if(info.argName) { + const arg = vertex?.args.findIndex(arg => arg !== EmptyArgument && arg.name === info.argName); + if(arg >= 0) { + index = arg; + } + } + const source = getArgumentValue(data, vertex, index, [RType.String]); + if(source) { + return { + nodeId: id, + functionName: vertex.name, + source + } as ReadInfo; + } + return undefined; + })).filter(x => x !== undefined) ?? []; + const writtenData: WriteInfo[] = Object.entries(results.kinds['write']?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { + const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; + const info = WriteFunctions.find(f => f.name === name) as FunctionInfo; + let index = info.argIdx; + if(info.argName) { + const arg = vertex?.args.findIndex(arg => arg !== EmptyArgument && arg.name === info.argName); + if(arg >= 0) { + index = arg; + } + } + if(index) { + const destination = getArgumentValue(data, vertex, index, [RType.String]); + if(destination) { + return { + nodeId: id, + functionName: vertex.name, + destination + } as WriteInfo; + } + } else if(vertex) { + // write functions that don't have argIndex are assumed to write to stdout + return { + nodeId: id, + functionName: vertex.name, + destination: 'stdout' + } as WriteInfo; + } + return undefined; + })).filter(x => x !== undefined) ?? []; return { '.meta': { timing: Date.now() - now }, - libraries, sourcedFiles, - readData: [], - writtenData: [] + libraries, sourcedFiles, readData, writtenData }; } -function makeCallContextQuery(functions: readonly string[], kind: string): CallContextQuery { - return { - type: 'call-context', - callName: `(${functions.map(f => f.replace('.', '\\.')).join('|')})`, - includeAliases: true, - subkind: '.', - kind - }; +function makeCallContextQuery(functions: readonly string[], kind: string, groupByName = false): CallContextQuery[] { + if(groupByName){ + return functions.map(f => ({ + type: 'call-context', + callName: `^${f}$`, + includeAliases: true, + subkind: f, + kind + })); + } else { + return [{ + type: 'call-context', + callName: `^(${functions.map(f => f.replace('.', '\\.')).join('|')})$`, + includeAliases: true, + subkind: '.', + kind + }]; + } } function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertexFunctionCall, argumentIndex: number, allowedTypes: RType[]): string | undefined { - if(vertex) { + if(vertex && vertex.args.length > argumentIndex) { const arg = getReferenceOfArgument(vertex.args[argumentIndex]); if(arg) { const valueNode = graph.idMap?.get(arg); diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index 4fca395ba3..e2c400f50c 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -4,11 +4,48 @@ import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/nod // these lists are based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R export const LibraryFunctions = ['library', 'require'] as const; export const SourceFunctions = ['source'] as const; -export const ReadFunctions = ['read.table', 'read.csv', 'read.csv2', 'read.delim', 'read.fwf', 'file', 'url', 'load', 'gzfile', 'bzfile', 'download.file', 'pipe', 'fifo', 'unz', 'data.frame', 'matrix', 'readRDS', 'readLines'] as const; -export const WriteFunctions = ['save', 'save.image', 'write', 'dput', 'dump', 'write.table', 'write.csv', 'saveRDS', 'print', 'cat'] as const; +export const ReadFunctions: FunctionInfo[] = [ + { name: 'read.table', argIdx: 0, argName: 'file' }, + { name: 'read.csv', argIdx: 0, argName: 'file' }, + { name: 'read.csv2', argIdx: 0, argName: 'file' }, + { name: 'read.delim', argIdx: 0, argName: 'file' }, + { name: 'read.delim', argIdx: 0, argName: 'file' }, + { name: 'read.fwf', argIdx: 0, argName: 'file' }, + { name: 'file', argIdx: 1, argName: 'open' }, + { name: 'url', argIdx: 1, argName: 'open' }, + { name: 'load', argIdx: 0, argName: 'file' }, + { name: 'gzfile', argIdx: 1, argName: 'open' }, + { name: 'bzfile', argIdx: 1, argName: 'open' }, + { name: 'download.file', argIdx: 0, argName: 'url' }, + { name: 'pipe', argIdx: 1, argName: 'open' }, + { name: 'fifo', argIdx: 1, argName: 'open' }, + { name: 'unz', argIdx: 1, argName: 'open' }, + { name: 'matrix', argIdx: 0, argName: 'data' }, + { name: 'readRDS', argIdx: 0, argName: 'file' }, + { name: 'readLines', argIdx: 0, argName: 'con' }, +] as const; +export const WriteFunctions: FunctionInfo[] = [ + { name: 'save', argIdx: 0, argName: '...' }, + { name: 'save.image', argIdx: 0, argName: 'file' }, + { name: 'write', argIdx: 1, argName: 'file' }, + { name: 'dput', argIdx: 1, argName: 'file' }, + { name: 'dump', argIdx: 1, argName: 'file' }, + { name: 'write.table', argIdx: 1, argName: 'file' }, + { name: 'write.csv', argIdx: 1, argName: 'file' }, + { name: 'saveRDS', argIdx: 1, argName: 'file' }, + // write functions that don't have argIndex are assumed to write to stdout + { name: 'print' }, + { name: 'cat' }, +] as const; + +export interface FunctionInfo { + name: string + argIdx?: number + argName?: string +} export interface DependenciesQuery extends BaseQueryFormat { - readonly type: 'dependencies'; + readonly type: 'dependencies' } export interface DependenciesQueryResult extends BaseQueryResult { @@ -27,8 +64,6 @@ export type SourceInfo = (DependencyInfo & { file: string }) export type ReadInfo = (DependencyInfo & { source: string }) export type WriteInfo = (DependencyInfo & { destination: 'stdout' | string }) - - export function printResultSection(title: string, infos: T[], result: string[], sectionSpecifics: (info: T) => string): void { if(infos.length <= 0) { return; diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts index c9eccd0951..bab67285b7 100644 --- a/test/functionality/dataflow/query/dependencies-query-tests.ts +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -28,4 +28,14 @@ describe('Dependencies Query', withShell(shell => { describe('Sourced files', () => { testQuery('Single source', 'source("test/file.R")', { sourcedFiles: [{ nodeId: 3, functionName: 'source', file: 'test/file.R' }] }); }); + + describe('Read Files', () => { + testQuery('read.table', "read.table('test.csv')", { readData: [{ nodeId: 3, functionName: 'read.table', source: 'test.csv' }] }); + testQuery('gzfile', 'gzfile("this is my gzip file :)", "test.gz")', { readData: [{ nodeId: 5, functionName: 'gzfile', source: 'test.gz' }] }); + }); + + describe('Write Files', () => { + testQuery('dump', 'dump("My text", "MyTextFile.txt")', { writtenData: [{ nodeId: 5, functionName: 'dump', destination: 'MyTextFile.txt' }] }); + testQuery('cat', 'cat("Hello!")', { writtenData: [{ nodeId: 3, functionName: 'cat', destination: 'stdout' }] }); + }); })); From b424ed3472b3d5a632e5eb0a6cb28cc25649c82d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 15 Oct 2024 10:05:15 +0200 Subject: [PATCH 13/18] feat(query-test): dep cycle breaker and criteria id resolve --- src/cli/repl/commands/repl-query.ts | 102 +---------------- src/queries/query-schema.ts | 4 +- src/queries/query.ts | 106 +++++++++++++++++- test/functionality/_helper/query.ts | 3 +- .../dataflow/query/compound-query-tests.ts | 2 - .../query/dependencies-query-tests.ts | 41 +++++-- 6 files changed, 141 insertions(+), 117 deletions(-) diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index 59eb10d95d..294f8749d6 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -4,22 +4,16 @@ import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default- import { fileProtocol, requestFromInput } from '../../../r-bridge/retriever'; import type { ReplCommand, ReplOutput } from './repl-main'; import { splitAtEscapeSensitive } from '../../../util/args'; -import type { OutputFormatter } from '../../../util/ansi'; -import { bold, italic } from '../../../util/ansi'; +import { italic } from '../../../util/ansi'; -import type { CallContextQuerySubKindResult } from '../../../queries/catalog/call-context-query/call-context-query-format'; import { describeSchema } from '../../../util/schema'; import type { Query, QueryResults, SupportedQueryTypes } from '../../../queries/query'; -import { SupportedQueries , executeQueries } from '../../../queries/query'; +import { asciiSummaryOfQueryResult , executeQueries } from '../../../queries/query'; import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline'; -import type { BaseQueryMeta, BaseQueryResult } from '../../../queries/base-query-format'; import { jsonReplacer } from '../../../util/json'; import { AnyQuerySchema, QueriesSchema } from '../../../queries/query-schema'; -import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; -import { BuiltIn } from '../../../dataflow/environments/built-in'; -import { printAsMs } from '../../../util/time'; async function getDataflow(shell: RShell, remainingLine: string) { return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { @@ -71,98 +65,6 @@ async function processQueryArgs(line: string, shell: RShell, output: ReplOutput) }; } -function nodeString(id: NodeId, formatter: OutputFormatter, processed: PipelineOutput): string { - if(id === BuiltIn) { - return italic('built-in', formatter); - } - const node = processed.normalize.idMap.get(id); - if(node === undefined) { - return `UNKNOWN: ${id}`; - } - return `${italic('`' + (node.lexeme ?? node.info.fullLexeme ?? 'UNKNOWN') + '`', formatter)} (L.${node.location?.[0]})`; -} - -function asciiCallContextSubHit(formatter: OutputFormatter, results: readonly CallContextQuerySubKindResult[], processed: PipelineOutput): string { - const result: string[] = []; - for(const { id, calls = [], linkedIds = [], aliasRoots = [] } of results) { - const node = processed.normalize.idMap.get(id); - if(node === undefined) { - result.push(` ${bold('UNKNOWN: ' + JSON.stringify({ calls, linkedIds }))}`); - continue; - } - let line = nodeString(id, formatter, processed); - if(calls.length > 0) { - line += ` with ${calls.length} call${calls.length > 1 ? 's' : ''} (${calls.map(c => nodeString(c, formatter, processed)).join(', ')})`; - } - if(linkedIds.length > 0) { - line += ` with ${linkedIds.length} link${linkedIds.length > 1 ? 's' : ''} (${linkedIds.map(c => nodeString(c, formatter, processed)).join(', ')})`; - } - if(aliasRoots.length > 0) { - line += ` with ${aliasRoots.length} alias root${aliasRoots.length > 1 ? 's' : ''} (${aliasRoots.map(c => nodeString(c, formatter, processed)).join(', ')})`; - } - result.push(line); - } - return result.join(', '); -} - -export function asciiCallContext(formatter: OutputFormatter, results: QueryResults<'call-context'>['call-context'], processed: PipelineOutput): string { - /* traverse over 'kinds' and within them 'subkinds' */ - const result: string[] = []; - for(const [kind, { subkinds }] of Object.entries(results['kinds'])) { - result.push(` ╰ ${bold(kind, formatter)}`); - for(const [subkind, values] of Object.entries(subkinds)) { - result.push(` ╰ ${bold(subkind, formatter)}: ${asciiCallContextSubHit(formatter, values, processed)}`); - } - } - return result.join('\n'); -} - -export function summarizeIdsIfTooLong(ids: readonly NodeId[]) { - const naive = ids.join(', '); - if(naive.length <= 20) { - return naive; - } - let acc = ''; - let i = 0; - while(acc.length <= 20) { - acc += ids[i++] + ', '; - } - if(i < ids.length) { - acc += '... (see JSON below)'; - } - return acc; -} - -export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: number, results: QueryResults, processed: PipelineOutput): string { - const result: string[] = []; - - for(const [query, queryResults] of Object.entries(results)) { - if(query === '.meta') { - continue; - } - - const queryType = SupportedQueries[query as SupportedQueryTypes]; - if(queryType.asciiSummarizer(formatter, processed, queryResults as BaseQueryResult, result)) { - continue; - } - - result.push(`Query: ${bold(query, formatter)}`); - - let timing = -1; - // eslint-disable-next-line @typescript-eslint/no-unsafe-argument - for(const [key, value] of Object.entries(queryResults)) { - if(key === '.meta') { - timing = (value as BaseQueryMeta).timing; - continue; - } - result.push(` ╰ ${key}: ${JSON.stringify(value)}`); - } - result.push(` - Took ${printAsMs(timing, 0)}`); - } - - result.push(italic(`All queries together required ≈${printAsMs(results['.meta'].timing, 0)} (1ms accuracy, total ${printAsMs(totalInMs, 0)})`, formatter)); - return formatter.format(result.join('\n')); -} export const queryCommand: ReplCommand = { description: `Query the given R code, start with '${fileProtocol}' to indicate a file. The query is to be a valid query in json format (use 'help' to get more information).`, diff --git a/src/queries/query-schema.ts b/src/queries/query-schema.ts index 14f713243b..81ee343df6 100644 --- a/src/queries/query-schema.ts +++ b/src/queries/query-schema.ts @@ -1,7 +1,9 @@ import Joi from 'joi'; import { SupportedQueries } from './query'; -export const SupportedQueriesSchema = Joi.alternatives(Object.values(SupportedQueries).map(q => q.schema)).description('Supported queries'); +export const SupportedQueriesSchema = Joi.alternatives( + Object.values(SupportedQueries).map(q => q.schema) +).description('Supported queries'); export const CompoundQuerySchema = Joi.object({ type: Joi.string().valid('compound').required().description('The type of the query.'), diff --git a/src/queries/query.ts b/src/queries/query.ts index e46e97e9ca..35db67e830 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,7 +1,10 @@ -import type { CallContextQuery } from './catalog/call-context-query/call-context-query-format'; +import type { + CallContextQuery, + CallContextQuerySubKindResult +} from './catalog/call-context-query/call-context-query-format'; import { CallTargets } from './catalog/call-context-query/call-context-query-format'; import type { DataflowGraph } from '../dataflow/graph/graph'; -import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; +import type { BaseQueryFormat, BaseQueryMeta, BaseQueryResult } from './base-query-format'; import { executeCallContextQueries } from './catalog/call-context-query/call-context-query-executor'; import { guard } from '../util/assert'; import type { VirtualQueryArgumentsWithType } from './virtual-query/virtual-queries'; @@ -15,9 +18,8 @@ import { executeIdMapQuery } from './catalog/id-map-query/id-map-query-executor' import type { IdMapQuery } from './catalog/id-map-query/id-map-query-format'; import { executeNormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-executor'; import type { NormalizedAstQuery } from './catalog/normalized-ast-query/normalized-ast-query-format'; -import { bold, type OutputFormatter } from '../util/ansi'; +import { bold, italic, type OutputFormatter } from '../util/ansi'; import { printAsMs } from '../util/time'; -import { asciiCallContext, summarizeIdsIfTooLong } from '../cli/repl/commands/repl-query'; import type { PipelineOutput } from '../core/steps/pipeline/pipeline'; import type { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-pipelines'; import { graphToMermaidUrl } from '../util/mermaid/dfg'; @@ -26,6 +28,102 @@ import Joi from 'joi'; import { executeDependenciesQuery } from './catalog/dependencies-query/dependencies-query-executor'; import type { DependenciesQuery } from './catalog/dependencies-query/dependencies-query-format'; import { printResultSection } from './catalog/dependencies-query/dependencies-query-format'; +import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id'; +import { BuiltIn } from '../dataflow/environments/built-in'; + + +function nodeString(id: NodeId, formatter: OutputFormatter, processed: PipelineOutput): string { + if(id === BuiltIn) { + return italic('built-in', formatter); + } + const node = processed.normalize.idMap.get(id); + if(node === undefined) { + return `UNKNOWN: ${id}`; + } + return `${italic('`' + (node.lexeme ?? node.info.fullLexeme ?? 'UNKNOWN') + '`', formatter)} (L.${node.location?.[0]})`; +} + +function asciiCallContextSubHit(formatter: OutputFormatter, results: readonly CallContextQuerySubKindResult[], processed: PipelineOutput): string { + const result: string[] = []; + for(const { id, calls = [], linkedIds = [], aliasRoots = [] } of results) { + const node = processed.normalize.idMap.get(id); + if(node === undefined) { + result.push(` ${bold('UNKNOWN: ' + JSON.stringify({ calls, linkedIds }))}`); + continue; + } + let line = nodeString(id, formatter, processed); + if(calls.length > 0) { + line += ` with ${calls.length} call${calls.length > 1 ? 's' : ''} (${calls.map(c => nodeString(c, formatter, processed)).join(', ')})`; + } + if(linkedIds.length > 0) { + line += ` with ${linkedIds.length} link${linkedIds.length > 1 ? 's' : ''} (${linkedIds.map(c => nodeString(c, formatter, processed)).join(', ')})`; + } + if(aliasRoots.length > 0) { + line += ` with ${aliasRoots.length} alias root${aliasRoots.length > 1 ? 's' : ''} (${aliasRoots.map(c => nodeString(c, formatter, processed)).join(', ')})`; + } + result.push(line); + } + return result.join(', '); +} + +export function asciiCallContext(formatter: OutputFormatter, results: QueryResults<'call-context'>['call-context'], processed: PipelineOutput): string { + /* traverse over 'kinds' and within them 'subkinds' */ + const result: string[] = []; + for(const [kind, { subkinds }] of Object.entries(results['kinds'])) { + result.push(` ╰ ${bold(kind, formatter)}`); + for(const [subkind, values] of Object.entries(subkinds)) { + result.push(` ╰ ${bold(subkind, formatter)}: ${asciiCallContextSubHit(formatter, values, processed)}`); + } + } + return result.join('\n'); +} + +export function summarizeIdsIfTooLong(ids: readonly NodeId[]) { + const naive = ids.join(', '); + if(naive.length <= 20) { + return naive; + } + let acc = ''; + let i = 0; + while(acc.length <= 20) { + acc += ids[i++] + ', '; + } + if(i < ids.length) { + acc += '... (see JSON below)'; + } + return acc; +} + +export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: number, results: QueryResults, processed: PipelineOutput): string { + const result: string[] = []; + + for(const [query, queryResults] of Object.entries(results)) { + if(query === '.meta') { + continue; + } + + const queryType = SupportedQueries[query as SupportedQueryTypes]; + if(queryType.asciiSummarizer(formatter, processed, queryResults as BaseQueryResult, result)) { + continue; + } + + result.push(`Query: ${bold(query, formatter)}`); + + let timing = -1; + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + for(const [key, value] of Object.entries(queryResults)) { + if(key === '.meta') { + timing = (value as BaseQueryMeta).timing; + continue; + } + result.push(` ╰ ${key}: ${JSON.stringify(value)}`); + } + result.push(` - Took ${printAsMs(timing, 0)}`); + } + + result.push(italic(`All queries together required ≈${printAsMs(results['.meta'].timing, 0)} (1ms accuracy, total ${printAsMs(totalInMs, 0)})`, formatter)); + return formatter.format(result.join('\n')); +} export type Query = CallContextQuery | DataflowQuery | NormalizedAstQuery | IdMapQuery | DependenciesQuery; diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 75e411fa32..e61732df3b 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -6,7 +6,6 @@ import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/defa import { requestFromInput } from '../../../src/r-bridge/retriever'; import { deterministicCountingIdGenerator } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; import type { QueryResults, Query, QueryResultsWithoutMeta } from '../../../src/queries/query'; -import { executeQueries } from '../../../src/queries/query'; import { assert } from 'chai'; import type { VirtualQueryArgumentsWithType } from '../../../src/queries/virtual-query/virtual-queries'; import type { TestLabel } from './label'; @@ -15,7 +14,7 @@ import type { VirtualCompoundConstraint } from '../../../src/queries/virtual-que import { log } from '../../../src/util/log'; import { dataflowGraphToMermaidUrl } from '../../../src/core/print/dataflow-printer'; import type { PipelineOutput } from '../../../src/core/steps/pipeline/pipeline'; - +import { executeQueries } from '../../../src/queries/query'; function normalizeResults(result: QueryResults): QueryResultsWithoutMeta { const normalized = {} as QueryResultsWithoutMeta; diff --git a/test/functionality/dataflow/query/compound-query-tests.ts b/test/functionality/dataflow/query/compound-query-tests.ts index f524fa96f5..6dac0c64a5 100644 --- a/test/functionality/dataflow/query/compound-query-tests.ts +++ b/test/functionality/dataflow/query/compound-query-tests.ts @@ -1,5 +1,3 @@ - - import { withShell } from '../../_helper/shell'; import { assertQuery } from '../../_helper/query'; import { label } from '../../_helper/label'; diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts index bab67285b7..fccab3ed73 100644 --- a/test/functionality/dataflow/query/dependencies-query-tests.ts +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -1,18 +1,43 @@ import { withShell } from '../../_helper/shell'; import { assertQuery } from '../../_helper/query'; import { label } from '../../_helper/label'; -import type { DependenciesQueryResult } from '../../../../src/queries/catalog/dependencies-query/dependencies-query-format'; +import { slicingCriterionToId } from '../../../../src/slicing/criterion/parse'; +import type { + DependenciesQueryResult, DependencyInfo +} from '../../../../src/queries/catalog/dependencies-query/dependencies-query-format'; +import type { AstIdMap } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import type { SingleSlicingCriterion } from '../../../../src/slicing/criterion/parse'; + + +const emptyDependencies: Omit = { libraries: [], sourcedFiles: [], readData: [], writtenData: [] }; + +function decodeIds(res: Partial, idMap: AstIdMap): Partial { + const out: Partial = { + ...res + }; + for(const [key, value] of Object.entries(res) as [keyof DependenciesQueryResult, DependencyInfo[]][]) { + if(key === '.meta') { + continue; + } + // @ts-expect-error -- we do not need key-dependent typing due to the spread + out[key] = value.map(({ nodeId, ...rest }) => ({ nodeId: typeof nodeId === 'number' ? nodeId : slicingCriterionToId(String(nodeId) as SingleSlicingCriterion, idMap), ...rest })); + } + return out; +} describe('Dependencies Query', withShell(shell => { - function testQuery(name: string, code: string, expected: Partial): void { - assertQuery(label(name), shell, code, [{ type: 'dependencies' }], { + /** handles slicing criteria for the node ids */ + function testQuery( + name: string, + code: string, + expected: Partial + ): void { + assertQuery(label(name), shell, code, [{ type: 'dependencies' }], ({ normalize }) => ({ dependencies: { - libraries: expected.libraries ?? [], - sourcedFiles: expected.sourcedFiles ?? [], - readData: expected.readData ?? [], - writtenData: expected.writtenData ?? [] + ...emptyDependencies, + ...decodeIds(expected, normalize.idMap) } - }); + })); } describe('Simple', () => { From 4ef5e49d5c9ca4d06fc77dc19ff962adf7e8964a Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 15 Oct 2024 10:40:19 +0200 Subject: [PATCH 14/18] feat-fix(dep-query): support arguments :D --- src/documentation/doc-util/doc-query.ts | 3 +-- .../dependencies-query-executor.ts | 23 +++++++++++++++---- .../dependencies-query-format.ts | 2 +- .../query/dependencies-query-tests.ts | 17 +++++++------- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts index 808b3b6378..976b592e49 100644 --- a/src/documentation/doc-util/doc-query.ts +++ b/src/documentation/doc-util/doc-query.ts @@ -1,12 +1,11 @@ import type { RShell } from '../../r-bridge/shell'; import type { Queries, QueryResults, SupportedQueryTypes } from '../../queries/query'; -import { executeQueries } from '../../queries/query'; +import { asciiSummaryOfQueryResult , executeQueries } from '../../queries/query'; import { PipelineExecutor } from '../../core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../core/steps/pipeline/default-pipelines'; import { requestFromInput } from '../../r-bridge/retriever'; import { jsonReplacer } from '../../util/json'; import { markdownFormatter } from '../../util/ansi'; -import { asciiSummaryOfQueryResult } from '../../cli/repl/commands/repl-query'; import { FlowrWikiBaseRef, getFilePathMd } from './doc-files'; import type { SupportedVirtualQueryTypes } from '../../queries/virtual-query/virtual-queries'; import type { VirtualCompoundConstraint } from '../../queries/virtual-query/compound-query'; diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index 4ae3e488c4..37d203774a 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -1,6 +1,14 @@ import type { BasicQueryData } from '../../query'; import { executeQueries } from '../../query'; -import type { DependenciesQuery, DependenciesQueryResult, FunctionInfo, LibraryInfo, ReadInfo, SourceInfo, WriteInfo } from './dependencies-query-format'; +import type { + DependenciesQuery, + DependenciesQueryResult, + FunctionInfo, + LibraryInfo, + ReadInfo, + SourceInfo, + WriteInfo +} from './dependencies-query-format'; import { LibraryFunctions, ReadFunctions, SourceFunctions, WriteFunctions } from './dependencies-query-format'; import type { CallContextQuery } from '../call-context-query/call-context-query-format'; import type { DataflowGraphVertexFunctionCall } from '../../../dataflow/graph/vertex'; @@ -10,6 +18,8 @@ import { RType } from '../../../r-bridge/lang-4.x/ast/model/type'; import { removeRQuotes } from '../../../r-bridge/retriever'; import { EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; +const SupportedVertexType = [ RType.String, RType.Logical, RType.Number ]; + export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { if(queries.length !== 1) { log.warn('Dependencies query expects only up to one query, but got ', queries.length); @@ -25,7 +35,7 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly const libraries: LibraryInfo[] = results.kinds['library']?.subkinds['.'].map(({ id }) => { const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; - const libraryName = getArgumentValue(data, vertex, 0, [RType.String, RType.Symbol]); + const libraryName = getArgumentValue(data, vertex, 0, [...SupportedVertexType, RType.Symbol]); if(libraryName) { return { nodeId: id, @@ -37,7 +47,7 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly }).filter(x => x !== undefined) ?? []; const sourcedFiles: SourceInfo[] = results.kinds['source']?.subkinds['.'].map(({ id }) => { const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; - const file = getArgumentValue(data, vertex, 0, [RType.String]); + const file = getArgumentValue(data, vertex, 0, SupportedVertexType); if(file) { return { nodeId: id, @@ -57,7 +67,7 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly index = arg; } } - const source = getArgumentValue(data, vertex, index, [RType.String]); + const source = getArgumentValue(data, vertex, index, SupportedVertexType); if(source) { return { nodeId: id, @@ -129,7 +139,10 @@ function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertex if(vertex && vertex.args.length > argumentIndex) { const arg = getReferenceOfArgument(vertex.args[argumentIndex]); if(arg) { - const valueNode = graph.idMap?.get(arg); + let valueNode = graph.idMap?.get(arg); + if(valueNode?.type === RType.Argument) { + valueNode = valueNode.value; + } if(valueNode) { return allowedTypes.includes(valueNode.type) ? removeRQuotes(valueNode.lexeme as string) : 'unknown'; } diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index e2c400f50c..39694b1030 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -1,7 +1,7 @@ import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format'; import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; -// these lists are based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R +// these lists are originally based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R export const LibraryFunctions = ['library', 'require'] as const; export const SourceFunctions = ['source'] as const; export const ReadFunctions: FunctionInfo[] = [ diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts index fccab3ed73..32b3c02603 100644 --- a/test/functionality/dataflow/query/dependencies-query-tests.ts +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -45,22 +45,23 @@ describe('Dependencies Query', withShell(shell => { }); describe('Libraries', () => { - testQuery('Single library (symbol)', 'library(testLibrary)', { libraries: [{ nodeId: 3, functionName: 'library', libraryName: 'testLibrary' }] }); - testQuery('Single library (string)', 'library("testLibrary")', { libraries: [{ nodeId: 3, functionName: 'library', libraryName: 'testLibrary' }] }); - testQuery('Single require (string)', 'require("testLibrary")', { libraries: [{ nodeId: 3, functionName: 'require', libraryName: 'testLibrary' }] }); + testQuery('Single library (symbol)', 'library(testLibrary)', { libraries: [{ nodeId: '1@library', functionName: 'library', libraryName: 'testLibrary' }] }); + testQuery('Single library (string)', 'library("testLibrary")', { libraries: [{ nodeId: '1@library', functionName: 'library', libraryName: 'testLibrary' }] }); + testQuery('Single require (string)', 'require("testLibrary")', { libraries: [{ nodeId: '1@require', functionName: 'require', libraryName: 'testLibrary' }] }); }); describe('Sourced files', () => { - testQuery('Single source', 'source("test/file.R")', { sourcedFiles: [{ nodeId: 3, functionName: 'source', file: 'test/file.R' }] }); + testQuery('Single source', 'source("test/file.R")', { sourcedFiles: [{ nodeId: '1@source', functionName: 'source', file: 'test/file.R' }] }); }); describe('Read Files', () => { - testQuery('read.table', "read.table('test.csv')", { readData: [{ nodeId: 3, functionName: 'read.table', source: 'test.csv' }] }); - testQuery('gzfile', 'gzfile("this is my gzip file :)", "test.gz")', { readData: [{ nodeId: 5, functionName: 'gzfile', source: 'test.gz' }] }); + testQuery('read.table', "read.table('test.csv')", { readData: [{ nodeId: '1@read.table', functionName: 'read.table', source: 'test.csv' }] }); + testQuery('gzfile', 'gzfile("this is my gzip file :)", "test.gz")', { readData: [{ nodeId: '1@gzfile', functionName: 'gzfile', source: 'test.gz' }] }); + testQuery('With Argument', 'gzfile(open="test.gz",description="this is my gzip file :)")', { readData: [{ nodeId: '1@gzfile', functionName: 'gzfile', source: 'test.gz' }] }); }); describe('Write Files', () => { - testQuery('dump', 'dump("My text", "MyTextFile.txt")', { writtenData: [{ nodeId: 5, functionName: 'dump', destination: 'MyTextFile.txt' }] }); - testQuery('cat', 'cat("Hello!")', { writtenData: [{ nodeId: 3, functionName: 'cat', destination: 'stdout' }] }); + testQuery('dump', 'dump("My text", "MyTextFile.txt")', { writtenData: [{ nodeId: '1@dump', functionName: 'dump', destination: 'MyTextFile.txt' }] }); + testQuery('cat', 'cat("Hello!")', { writtenData: [{ nodeId: '1@cat', functionName: 'cat', destination: 'stdout' }] }); }); })); From 756b18772f43a2695424c8c9a2b45db469829294 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 15 Oct 2024 11:17:22 +0200 Subject: [PATCH 15/18] refactor: cleaned up dependencies query parsing --- .../dependencies-query-executor.ts | 153 +++++++----------- .../dependencies-query-format.ts | 9 +- 2 files changed, 61 insertions(+), 101 deletions(-) diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index 37d203774a..2e582f9474 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -2,23 +2,23 @@ import type { BasicQueryData } from '../../query'; import { executeQueries } from '../../query'; import type { DependenciesQuery, - DependenciesQueryResult, + DependenciesQueryResult, DependencyInfo, FunctionInfo, LibraryInfo, - ReadInfo, - SourceInfo, + ReadInfo, SourceInfo, WriteInfo } from './dependencies-query-format'; import { LibraryFunctions, ReadFunctions, SourceFunctions, WriteFunctions } from './dependencies-query-format'; -import type { CallContextQuery } from '../call-context-query/call-context-query-format'; +import type { CallContextQuery, CallContextQueryResult } from '../call-context-query/call-context-query-format'; import type { DataflowGraphVertexFunctionCall } from '../../../dataflow/graph/vertex'; import { getReferenceOfArgument } from '../../../dataflow/graph/graph'; import { log } from '../../../util/log'; import { RType } from '../../../r-bridge/lang-4.x/ast/model/type'; import { removeRQuotes } from '../../../r-bridge/retriever'; import { EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; +import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; -const SupportedVertexType = [ RType.String, RType.Logical, RType.Number ]; +const SupportedVertexTypes = [RType.String, RType.Logical, RType.Number]; export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { if(queries.length !== 1) { @@ -29,83 +29,31 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly const results = executeQueries(data, [ ...makeCallContextQuery(LibraryFunctions, 'library'), ...makeCallContextQuery(SourceFunctions, 'source'), - ...makeCallContextQuery(ReadFunctions.map(f => f.name), 'read', true), - ...makeCallContextQuery(WriteFunctions.map(f => f.name), 'write', true) + ...makeCallContextQuery(ReadFunctions, 'read'), + ...makeCallContextQuery(WriteFunctions, 'write') ])['call-context']; - const libraries: LibraryInfo[] = results.kinds['library']?.subkinds['.'].map(({ id }) => { - const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; - const libraryName = getArgumentValue(data, vertex, 0, [...SupportedVertexType, RType.Symbol]); - if(libraryName) { - return { - nodeId: id, - functionName: vertex.name, - libraryName - } as LibraryInfo; - } - return undefined; - }).filter(x => x !== undefined) ?? []; - const sourcedFiles: SourceInfo[] = results.kinds['source']?.subkinds['.'].map(({ id }) => { - const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; - const file = getArgumentValue(data, vertex, 0, SupportedVertexType); - if(file) { - return { - nodeId: id, - functionName: vertex.name, - file - } as SourceInfo; - } - return undefined; - }).filter(x => x !== undefined) ?? []; - const readData: ReadInfo[] = Object.entries(results.kinds['read']?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { - const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; - const info = ReadFunctions.find(f => f.name === name) as FunctionInfo; - let index = info.argIdx as number; - if(info.argName) { - const arg = vertex?.args.findIndex(arg => arg !== EmptyArgument && arg.name === info.argName); - if(arg >= 0) { - index = arg; - } - } - const source = getArgumentValue(data, vertex, index, SupportedVertexType); - if(source) { - return { - nodeId: id, - functionName: vertex.name, - source - } as ReadInfo; - } - return undefined; - })).filter(x => x !== undefined) ?? []; - const writtenData: WriteInfo[] = Object.entries(results.kinds['write']?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { - const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; - const info = WriteFunctions.find(f => f.name === name) as FunctionInfo; - let index = info.argIdx; - if(info.argName) { - const arg = vertex?.args.findIndex(arg => arg !== EmptyArgument && arg.name === info.argName); - if(arg >= 0) { - index = arg; - } - } - if(index) { - const destination = getArgumentValue(data, vertex, index, [RType.String]); - if(destination) { - return { - nodeId: id, - functionName: vertex.name, - destination - } as WriteInfo; - } - } else if(vertex) { - // write functions that don't have argIndex are assumed to write to stdout - return { - nodeId: id, - functionName: vertex.name, - destination: 'stdout' - } as WriteInfo; - } - return undefined; - })).filter(x => x !== undefined) ?? []; + const libraries: LibraryInfo[] = getResults(data, results, 'library', LibraryFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + libraryName: argument as string + }), [RType.Symbol]); + const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', SourceFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + file: argument as string + })); + const readData: ReadInfo[] = getResults(data, results, 'read', ReadFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + source: argument as string + })); + const writtenData: WriteInfo[] = getResults(data, results, 'write', WriteFunctions, (id, vertex, argument) => ({ + nodeId: id, + functionName: vertex.name, + // write functions that don't have argIndex are assumed to write to stdout + destination: argument ?? 'stdout' + })); return { '.meta': { @@ -115,27 +63,33 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly }; } -function makeCallContextQuery(functions: readonly string[], kind: string, groupByName = false): CallContextQuery[] { - if(groupByName){ - return functions.map(f => ({ - type: 'call-context', - callName: `^${f}$`, - includeAliases: true, - subkind: f, - kind - })); - } else { - return [{ - type: 'call-context', - callName: `^(${functions.map(f => f.replace('.', '\\.')).join('|')})$`, - includeAliases: true, - subkind: '.', - kind - }]; - } +function makeCallContextQuery(functions: readonly FunctionInfo[], kind: string): CallContextQuery[] { + return functions.map(f => ({ + type: 'call-context', + callName: `^${f.name}$`, + includeAliases: true, + subkind: f.name, + kind + })); +} + +function getResults(data: BasicQueryData, results: CallContextQueryResult, kind: string, functions: FunctionInfo[], makeInfo: (id: NodeId, vertex: DataflowGraphVertexFunctionCall, argument: string | undefined) => T | undefined, additionalAllowedTypes?: RType[]) { + return Object.entries(results.kinds[kind]?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { + const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; + const info = functions.find(f => f.name === name) as FunctionInfo; + let index = info.argIdx; + if(info.argName) { + const arg = vertex?.args.findIndex(arg => arg !== EmptyArgument && arg.name === info.argName); + if(arg >= 0) { + index = arg; + } + } + const argument = index !== undefined ? getArgumentValue(data, vertex, index, additionalAllowedTypes) : undefined; + return makeInfo(id, vertex, argument); + })).filter(x => x !== undefined) ?? []; } -function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertexFunctionCall, argumentIndex: number, allowedTypes: RType[]): string | undefined { +function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertexFunctionCall, argumentIndex: number, additionalAllowedTypes: RType[] | undefined): string | undefined { if(vertex && vertex.args.length > argumentIndex) { const arg = getReferenceOfArgument(vertex.args[argumentIndex]); if(arg) { @@ -144,6 +98,7 @@ function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertex valueNode = valueNode.value; } if(valueNode) { + const allowedTypes = [...SupportedVertexTypes, ...additionalAllowedTypes ?? []]; return allowedTypes.includes(valueNode.type) ? removeRQuotes(valueNode.lexeme as string) : 'unknown'; } } diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index 39694b1030..32fdf1c4c5 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -2,8 +2,13 @@ import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format'; import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; // these lists are originally based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R -export const LibraryFunctions = ['library', 'require'] as const; -export const SourceFunctions = ['source'] as const; +export const LibraryFunctions: FunctionInfo[] = [ + { name: 'library', argIdx: 0, argName: 'package' }, + { name: 'require', argIdx: 0, argName: 'package' } +] as const; +export const SourceFunctions: FunctionInfo[] = [ + { name: 'source', argIdx: 0, argName: 'file' } +] as const; export const ReadFunctions: FunctionInfo[] = [ { name: 'read.table', argIdx: 0, argName: 'file' }, { name: 'read.csv', argIdx: 0, argName: 'file' }, From d60c8f9475e10252de3c114ad0bdd3d1d69b8043 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Tue, 15 Oct 2024 13:43:02 +0200 Subject: [PATCH 16/18] feat: allow including custom functions in dependencies query --- .../dependencies-query-executor.ts | 36 +++++++++---- .../dependencies-query-format.ts | 20 ++++++- test/functionality/_helper/query.ts | 27 +++++++++- .../query/dependencies-query-tests.ts | 54 ++++++++++++++++++- .../dataflow/query/lineage-query-tests.ts | 2 +- 5 files changed, 123 insertions(+), 16 deletions(-) diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index 2e582f9474..8cf5c54a1d 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -26,29 +26,36 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly } const now = Date.now(); + const query = queries[0]; + const ignoreDefault = query.ignoreDefaultFunctions ?? false; + const libraryFunctions = getFunctionsToCheck(query.libraryFunctions, ignoreDefault, LibraryFunctions); + const sourceFunctions = getFunctionsToCheck(query.sourceFunctions, ignoreDefault, SourceFunctions); + const readFunctions = getFunctionsToCheck(query.readFunctions, ignoreDefault, ReadFunctions); + const writeFunctions = getFunctionsToCheck(query.writeFunctions, ignoreDefault, WriteFunctions); + const results = executeQueries(data, [ - ...makeCallContextQuery(LibraryFunctions, 'library'), - ...makeCallContextQuery(SourceFunctions, 'source'), - ...makeCallContextQuery(ReadFunctions, 'read'), - ...makeCallContextQuery(WriteFunctions, 'write') + ...makeCallContextQuery(libraryFunctions, 'library'), + ...makeCallContextQuery(sourceFunctions, 'source'), + ...makeCallContextQuery(readFunctions, 'read'), + ...makeCallContextQuery(writeFunctions, 'write') ])['call-context']; - const libraries: LibraryInfo[] = getResults(data, results, 'library', LibraryFunctions, (id, vertex, argument) => ({ + const libraries: LibraryInfo[] = getResults(data, results, 'library', libraryFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, libraryName: argument as string }), [RType.Symbol]); - const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', SourceFunctions, (id, vertex, argument) => ({ + const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', sourceFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, file: argument as string })); - const readData: ReadInfo[] = getResults(data, results, 'read', ReadFunctions, (id, vertex, argument) => ({ + const readData: ReadInfo[] = getResults(data, results, 'read', readFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, source: argument as string })); - const writtenData: WriteInfo[] = getResults(data, results, 'write', WriteFunctions, (id, vertex, argument) => ({ + const writtenData: WriteInfo[] = getResults(data, results, 'write', writeFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, // write functions that don't have argIndex are assumed to write to stdout @@ -74,7 +81,7 @@ function makeCallContextQuery(functions: readonly FunctionInfo[], kind: string): } function getResults(data: BasicQueryData, results: CallContextQueryResult, kind: string, functions: FunctionInfo[], makeInfo: (id: NodeId, vertex: DataflowGraphVertexFunctionCall, argument: string | undefined) => T | undefined, additionalAllowedTypes?: RType[]) { - return Object.entries(results.kinds[kind]?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { + return Object.entries(results?.kinds[kind]?.subkinds ?? {}).flatMap(([name, results]) => results.map(({ id }) => { const vertex = data.graph.getVertex(id) as DataflowGraphVertexFunctionCall; const info = functions.find(f => f.name === name) as FunctionInfo; let index = info.argIdx; @@ -105,3 +112,14 @@ function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertex } return undefined; } + +function getFunctionsToCheck(customFunctions: FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: FunctionInfo[]): FunctionInfo[] { + const functions: FunctionInfo[] = []; + if(!ignoreDefaultFunctions) { + functions.push(...defaultFunctions); + } + if(customFunctions) { + functions.push(...customFunctions); + } + return functions; +} diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index 15b44d463b..50bfcde944 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -55,7 +55,12 @@ export interface FunctionInfo { } export interface DependenciesQuery extends BaseQueryFormat { - readonly type: 'dependencies' + readonly type: 'dependencies' + readonly ignoreDefaultFunctions?: boolean + readonly libraryFunctions?: FunctionInfo[] + readonly sourceFunctions?: FunctionInfo[] + readonly readFunctions?: FunctionInfo[] + readonly writeFunctions?: FunctionInfo[] } export interface DependenciesQueryResult extends BaseQueryResult { @@ -94,6 +99,12 @@ function printResultSection(title: string, infos: T[], } } +const functionInfoSchema: Joi.ArraySchema = Joi.array().items(Joi.object({ + name: Joi.string().required().description('The name of the library function.'), + argIdx: Joi.number().optional().description('The index of the argument that contains the library name.'), + argName: Joi.string().optional().description('The name of the argument that contains the library name.'), +})).optional(); + export const DependenciesQueryDefinition = { executor: executeDependenciesQuery, asciiSummarizer: (formatter, _processed, queryResults, result) => { @@ -106,6 +117,11 @@ export const DependenciesQueryDefinition = { return true; }, schema: Joi.object({ - type: Joi.string().valid('dependencies').required().description('The type of the query.'), + type: Joi.string().valid('dependencies').required().description('The type of the query.'), + ignoreDefaultFunctions: Joi.boolean().optional().description('Should the set of functions that are detected by default be ignored/skipped?'), + libraryFunctions: functionInfoSchema.description('The set of library functions to search for.'), + sourceFunctions: functionInfoSchema.description('The set of source functions to search for.'), + readFunctions: functionInfoSchema.description('The set of data reading functions to search for.'), + writeFunctions: functionInfoSchema.description('The set of data writing functions to search for.'), }).description('The dependencies query retrieves and returns the set of all dependencies in the dataflow graph, which includes libraries, sourced files, read data, and written data.') } as const satisfies SupportedQuery<'dependencies'>; diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index a30733a395..0450dc7dd2 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -6,6 +6,7 @@ import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/defa import { requestFromInput } from '../../../src/r-bridge/retriever'; import { deterministicCountingIdGenerator } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; import type { QueryResults, Query, QueryResultsWithoutMeta } from '../../../src/queries/query'; +import { SupportedQueries , executeQueries } from '../../../src/queries/query'; import { assert } from 'chai'; import type { VirtualQueryArgumentsWithType } from '../../../src/queries/virtual-query/virtual-queries'; import type { TestLabel } from './label'; @@ -14,7 +15,7 @@ import type { VirtualCompoundConstraint } from '../../../src/queries/virtual-que import { log } from '../../../src/util/log'; import { dataflowGraphToMermaidUrl } from '../../../src/core/print/dataflow-printer'; import type { PipelineOutput } from '../../../src/core/steps/pipeline/pipeline'; -import { executeQueries } from '../../../src/queries/query'; + function normalizeResults(result: QueryResults): QueryResultsWithoutMeta { const normalized = {} as QueryResultsWithoutMeta; @@ -42,6 +43,7 @@ function normalizeResults(result: QueryResults)[], - expected: QueryResultsWithoutMeta | ((info: PipelineOutput) => (QueryResultsWithoutMeta | Promise>)) + expected: QueryResultsWithoutMeta | ((info: PipelineOutput) => (QueryResultsWithoutMeta | Promise>)), + validateSchema = true ) { const effectiveName = decorateLabelContext(name, ['query']); it(effectiveName, async() => { + if(validateSchema) { + for(const query of queries) { + if(query.type === 'compound') { + continue; + } + const queryType = SupportedQueries[query.type]; + const queryString = JSON.stringify(query, (_key, value) => { + if(value instanceof RegExp) { + return value.toString(); + } + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return value; + }); + const validationResult = queryType.schema.validate(JSON.parse(queryString)); + if(validationResult.error) { + assert.fail(`Invalid query: ${validationResult.error.message}`); + } + } + } + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, request: requestFromInput(code), diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts index 32b3c02603..fa0f4609c3 100644 --- a/test/functionality/dataflow/query/dependencies-query-tests.ts +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -3,6 +3,7 @@ import { assertQuery } from '../../_helper/query'; import { label } from '../../_helper/label'; import { slicingCriterionToId } from '../../../../src/slicing/criterion/parse'; import type { + DependenciesQuery, DependenciesQueryResult, DependencyInfo } from '../../../../src/queries/catalog/dependencies-query/dependencies-query-format'; import type { AstIdMap } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; @@ -30,9 +31,10 @@ describe('Dependencies Query', withShell(shell => { function testQuery( name: string, code: string, - expected: Partial + expected: Partial, + query: Partial = {} ): void { - assertQuery(label(name), shell, code, [{ type: 'dependencies' }], ({ normalize }) => ({ + assertQuery(label(name), shell, code, [{ type: 'dependencies', ...query }], ({ normalize }) => ({ dependencies: { ...emptyDependencies, ...decodeIds(expected, normalize.idMap) @@ -48,20 +50,68 @@ describe('Dependencies Query', withShell(shell => { testQuery('Single library (symbol)', 'library(testLibrary)', { libraries: [{ nodeId: '1@library', functionName: 'library', libraryName: 'testLibrary' }] }); testQuery('Single library (string)', 'library("testLibrary")', { libraries: [{ nodeId: '1@library', functionName: 'library', libraryName: 'testLibrary' }] }); testQuery('Single require (string)', 'require("testLibrary")', { libraries: [{ nodeId: '1@require', functionName: 'require', libraryName: 'testLibrary' }] }); + + describe('Custom', () => { + const readCustomFile: Partial = { + libraryFunctions: [{ name: 'custom.library', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + libraries: [{ nodeId: '1@custom.library', functionName: 'custom.library', libraryName: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'custom.library(1, "my-custom-file", 2)', expected, readCustomFile); + testQuery('Custom (by name)', 'custom.library(num1 = 1, num2 = 2, file = "my-custom-file")', expected, readCustomFile); + testQuery('Ignore default', 'library(testLibrary)', {}, { ignoreDefaultFunctions: true }); + }); }); describe('Sourced files', () => { testQuery('Single source', 'source("test/file.R")', { sourcedFiles: [{ nodeId: '1@source', functionName: 'source', file: 'test/file.R' }] }); + + describe('Custom', () => { + const sourceCustomFile: Partial = { + sourceFunctions: [{ name: 'source.custom.file', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + sourcedFiles: [{ nodeId: '1@source.custom.file', functionName: 'source.custom.file', file: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'source.custom.file(1, "my-custom-file", 2)', expected, sourceCustomFile); + testQuery('Custom (by name)', 'source.custom.file(num1 = 1, num2 = 2, file = "my-custom-file")', expected, sourceCustomFile); + testQuery('Ignore default', 'source("test/file.R")', {}, { ignoreDefaultFunctions: true }); + }); }); describe('Read Files', () => { testQuery('read.table', "read.table('test.csv')", { readData: [{ nodeId: '1@read.table', functionName: 'read.table', source: 'test.csv' }] }); testQuery('gzfile', 'gzfile("this is my gzip file :)", "test.gz")', { readData: [{ nodeId: '1@gzfile', functionName: 'gzfile', source: 'test.gz' }] }); testQuery('With Argument', 'gzfile(open="test.gz",description="this is my gzip file :)")', { readData: [{ nodeId: '1@gzfile', functionName: 'gzfile', source: 'test.gz' }] }); + + describe('Custom', () => { + const readCustomFile: Partial = { + readFunctions: [{ name: 'read.custom.file', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + readData: [{ nodeId: '1@read.custom.file', functionName: 'read.custom.file', source: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'read.custom.file(1, "my-custom-file", 2)', expected, readCustomFile); + testQuery('Custom (by name)', 'read.custom.file(num1 = 1, num2 = 2, file = "my-custom-file")', expected, readCustomFile); + testQuery('Ignore default', "read.table('test.csv')", {}, { ignoreDefaultFunctions: true }); + }); }); describe('Write Files', () => { testQuery('dump', 'dump("My text", "MyTextFile.txt")', { writtenData: [{ nodeId: '1@dump', functionName: 'dump', destination: 'MyTextFile.txt' }] }); testQuery('cat', 'cat("Hello!")', { writtenData: [{ nodeId: '1@cat', functionName: 'cat', destination: 'stdout' }] }); + + describe('Custom', () => { + const writeCustomFile: Partial = { + writeFunctions: [{ name: 'write.custom.file', argIdx: 1, argName: 'file' }] + }; + const expected: Partial = { + writtenData: [{ nodeId: '1@write.custom.file', functionName: 'write.custom.file', destination: 'my-custom-file' }] + }; + testQuery('Custom (by index)', 'write.custom.file(1, "my-custom-file", 2)', expected, writeCustomFile); + testQuery('Custom (by name)', 'write.custom.file(num1 = 1, num2 = 2, file = "my-custom-file")', expected, writeCustomFile); + testQuery('Ignore default', 'dump("My text", "MyTextFile.txt")', {}, { ignoreDefaultFunctions: true }); + }); }); })); diff --git a/test/functionality/dataflow/query/lineage-query-tests.ts b/test/functionality/dataflow/query/lineage-query-tests.ts index 1c628df17f..d04d756290 100644 --- a/test/functionality/dataflow/query/lineage-query-tests.ts +++ b/test/functionality/dataflow/query/lineage-query-tests.ts @@ -16,7 +16,7 @@ describe('Lineage Query', withShell(shell => { return acc; }, {} as LineageQueryResult['lineages']) } - })); + }), false); } testQuery('Single Expression', 'x + 1', [{ type: 'lineage', criterion: '1@x' }]); From 41d88026807290a64c0c6ad1af1c39578e969d2c Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 15 Oct 2024 16:02:21 +0200 Subject: [PATCH 17/18] refactor: exact name matching --- .../catalog/dependencies-query/dependencies-query-executor.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index 8cf5c54a1d..ea5def1e7e 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -73,8 +73,9 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly function makeCallContextQuery(functions: readonly FunctionInfo[], kind: string): CallContextQuery[] { return functions.map(f => ({ type: 'call-context', - callName: `^${f.name}$`, + callName: f.name, includeAliases: true, + callNameExact: true, subkind: f.name, kind })); From 2341cdebe3a02cbeb588c7e8a75eaa4c1ae47e9d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Tue, 15 Oct 2024 17:46:25 +0200 Subject: [PATCH 18/18] refactor(dep-query): general overhaul --- .../call/built-in/built-in-source.ts | 9 +++++ .../dependencies-query-executor.ts | 10 +++--- .../dependencies-query-format.ts | 6 ++-- .../lineage-query/lineage-query-format.ts | 4 +-- test/functionality/_helper/query.ts | 34 ++++++++---------- .../query/dependencies-query-tests.ts | 35 +++++++++++++++++-- .../dataflow/query/lineage-query-tests.ts | 2 +- 7 files changed, 69 insertions(+), 31 deletions(-) diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts index 2909b73e5b..86c606435a 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-source.ts @@ -25,6 +25,7 @@ import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type'; import { overwriteEnvironment } from '../../../../../environments/overwrite'; import type { NoInfo } from '../../../../../../r-bridge/lang-4.x/ast/model/model'; import { expensiveTrace } from '../../../../../../util/log'; +import fs from 'fs'; let sourceProvider = requestProviderFromFile(); @@ -76,6 +77,14 @@ export function processSourceCall( } export function sourceRequest(rootId: NodeId, request: RParseRequest, data: DataflowProcessorInformation, information: DataflowInformation, getId: IdGenerator): DataflowInformation { + if(request.request === 'file') { + /* check if the file exists and if not, fail */ + if(!fs.existsSync(request.content)) { + dataflowLogger.warn(`Failed to analyze sourced file ${JSON.stringify(request)}: file does not exist`); + information.graph.markIdForUnknownSideEffects(rootId); + return information; + } + } const executor = new RShellExecutor(); // parse, normalize and dataflow the sourced file diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index ea5def1e7e..818466ba1e 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -20,6 +20,8 @@ import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/nod const SupportedVertexTypes = [RType.String, RType.Logical, RType.Number]; +const Unknown = 'unknown'; + export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { if(queries.length !== 1) { log.warn('Dependencies query expects only up to one query, but got ', queries.length); @@ -43,17 +45,17 @@ export function executeDependenciesQuery(data: BasicQueryData, queries: readonly const libraries: LibraryInfo[] = getResults(data, results, 'library', libraryFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, - libraryName: argument as string + libraryName: argument ?? Unknown }), [RType.Symbol]); const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', sourceFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, - file: argument as string + file: argument ?? Unknown })); const readData: ReadInfo[] = getResults(data, results, 'read', readFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, - source: argument as string + source: argument ?? Unknown })); const writtenData: WriteInfo[] = getResults(data, results, 'write', writeFunctions, (id, vertex, argument) => ({ nodeId: id, @@ -107,7 +109,7 @@ function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertex } if(valueNode) { const allowedTypes = [...SupportedVertexTypes, ...additionalAllowedTypes ?? []]; - return allowedTypes.includes(valueNode.type) ? removeRQuotes(valueNode.lexeme as string) : 'unknown'; + return allowedTypes.includes(valueNode.type) ? removeRQuotes(valueNode.lexeme as string) : Unknown; } } } diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index 50bfcde944..73a4bf0b47 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -8,8 +8,10 @@ import { executeDependenciesQuery } from './dependencies-query-executor'; // these lists are originally based on https://github.com/duncantl/CodeDepends/blob/7fd96dfee16b252e5f642c77a7ababf48e9326f8/R/codeTypes.R export const LibraryFunctions: FunctionInfo[] = [ - { name: 'library', argIdx: 0, argName: 'package' }, - { name: 'require', argIdx: 0, argName: 'package' } + { name: 'library', argIdx: 0, argName: 'package' }, + { name: 'require', argIdx: 0, argName: 'package' }, + { name: 'loadNamespace', argIdx: 0, argName: 'package' }, + { name: 'attachNamespace', argIdx: 0, argName: 'ns' }, ] as const; export const SourceFunctions: FunctionInfo[] = [ { name: 'source', argIdx: 0, argName: 'file' } diff --git a/src/queries/catalog/lineage-query/lineage-query-format.ts b/src/queries/catalog/lineage-query/lineage-query-format.ts index 78f23e4d02..2f3f514cd0 100644 --- a/src/queries/catalog/lineage-query/lineage-query-format.ts +++ b/src/queries/catalog/lineage-query/lineage-query-format.ts @@ -32,7 +32,7 @@ export const LineageQueryDefinition = { return true; }, schema: Joi.object({ - type: Joi.string().valid('lineage').required().description('The type of the query.'), - id: Joi.string().required().description('The ID of the node to get the lineage of.') + type: Joi.string().valid('lineage').required().description('The type of the query.'), + criterion: Joi.string().required().description('The slicing criterion of the node to get the lineage of.') }).description('Lineage query used to find the lineage of a node in the dataflow graph') } as const satisfies SupportedQuery<'lineage'>; diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 0450dc7dd2..8e0cc9910b 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -43,7 +43,6 @@ function normalizeResults(result: QueryResults)[], - expected: QueryResultsWithoutMeta | ((info: PipelineOutput) => (QueryResultsWithoutMeta | Promise>)), - validateSchema = true + expected: QueryResultsWithoutMeta | ((info: PipelineOutput) => (QueryResultsWithoutMeta | Promise>)) ) { const effectiveName = decorateLabelContext(name, ['query']); it(effectiveName, async() => { - if(validateSchema) { - for(const query of queries) { - if(query.type === 'compound') { - continue; - } - const queryType = SupportedQueries[query.type]; - const queryString = JSON.stringify(query, (_key, value) => { - if(value instanceof RegExp) { - return value.toString(); - } - // eslint-disable-next-line @typescript-eslint/no-unsafe-return - return value; - }); - const validationResult = queryType.schema.validate(JSON.parse(queryString)); - if(validationResult.error) { - assert.fail(`Invalid query: ${validationResult.error.message}`); + for(const query of queries) { + if(query.type === 'compound') { + continue; + } + const queryType = SupportedQueries[query.type]; + const queryString = JSON.stringify(query, (_key, value) => { + if(value instanceof RegExp) { + return value.toString(); } + // eslint-disable-next-line @typescript-eslint/no-unsafe-return + return value; + }); + const validationResult = queryType.schema.validate(JSON.parse(queryString)); + if(validationResult.error) { + assert.fail(`Invalid query: ${validationResult.error.message}`); } } diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts index fa0f4609c3..0a7426d559 100644 --- a/test/functionality/dataflow/query/dependencies-query-tests.ts +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -47,9 +47,37 @@ describe('Dependencies Query', withShell(shell => { }); describe('Libraries', () => { - testQuery('Single library (symbol)', 'library(testLibrary)', { libraries: [{ nodeId: '1@library', functionName: 'library', libraryName: 'testLibrary' }] }); - testQuery('Single library (string)', 'library("testLibrary")', { libraries: [{ nodeId: '1@library', functionName: 'library', libraryName: 'testLibrary' }] }); - testQuery('Single require (string)', 'require("testLibrary")', { libraries: [{ nodeId: '1@require', functionName: 'require', libraryName: 'testLibrary' }] }); + for(const [loadFn, str] of [ + ['library', false], + ['library', true], + ['require', true], + ['loadNamespace', true], + ['attachNamespace', true] + /* support attach, support with, support pacman::p_load and the like? */ + ] as const) { + testQuery(`${loadFn} (${str ? 'string' : 'symbol'})`, `${loadFn}(${str ? '"a"' : 'a'})`, { + libraries: [{ nodeId: '1@' + loadFn, functionName: loadFn, libraryName: 'a' }] + }); + } + + testQuery('Multiple Libraries', 'library(a)\nlibrary(b)\nrequire(c)', { libraries: [ + { nodeId: '1@library', functionName: 'library', libraryName: 'a' }, + { nodeId: '2@library', functionName: 'library', libraryName: 'b' }, + { nodeId: '3@require', functionName: 'require', libraryName: 'c' } + ] }); + + testQuery('Call with Alias', 'foo <- library\nfoo(x)', { libraries: [ + { nodeId: '2@foo', functionName: 'foo', libraryName: 'x' } + ] }); + + + /* currently not supported */ + testQuery('Using a vector to load', 'lapply(c("a", "b", "c"), library, character.only = TRUE)', { libraries: [ + /* { nodeId: '1@library', functionName: 'library', libraryName: 'a' }, + { nodeId: '1@library', functionName: 'library', libraryName: 'b' }, + { nodeId: '1@library', functionName: 'library', libraryName: 'c' } */ + { nodeId: '1@library', functionName: 'library', libraryName: 'unknown' } + ] }); describe('Custom', () => { const readCustomFile: Partial = { @@ -100,6 +128,7 @@ describe('Dependencies Query', withShell(shell => { describe('Write Files', () => { testQuery('dump', 'dump("My text", "MyTextFile.txt")', { writtenData: [{ nodeId: '1@dump', functionName: 'dump', destination: 'MyTextFile.txt' }] }); + testQuery('dump (argument)', 'dump(file="foo.txt", "foo")', { writtenData: [{ nodeId: '1@dump', functionName: 'dump', destination: 'foo.txt' }] }); testQuery('cat', 'cat("Hello!")', { writtenData: [{ nodeId: '1@cat', functionName: 'cat', destination: 'stdout' }] }); describe('Custom', () => { diff --git a/test/functionality/dataflow/query/lineage-query-tests.ts b/test/functionality/dataflow/query/lineage-query-tests.ts index d04d756290..1c628df17f 100644 --- a/test/functionality/dataflow/query/lineage-query-tests.ts +++ b/test/functionality/dataflow/query/lineage-query-tests.ts @@ -16,7 +16,7 @@ describe('Lineage Query', withShell(shell => { return acc; }, {} as LineageQueryResult['lineages']) } - }), false); + })); } testQuery('Single Expression', 'x + 1', [{ type: 'lineage', criterion: '1@x' }]);