doc(dep-query): and refinements for the wiki pages (#1090)

flowr-analysis · Oct 17, 2024 · 55f75c6 · 55f75c6 · github-actions · Oct 17, 2024
1 parent 3e73aa5
commit 55f75c6
Show file tree

Hide file tree

Showing 6 changed files with 659 additions and 374 deletions.
diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts
@@ -10,19 +10,20 @@ import { FlowrWikiBaseRef, getFilePathMd } from './doc-files';
 import type { SupportedVirtualQueryTypes } from '../../queries/virtual-query/virtual-queries';
 import type { VirtualCompoundConstraint } from '../../queries/virtual-query/compound-query';
 import { printDfGraphForCode } from './doc-dfg';
-import { jsonWithLimit } from './doc-code';
+import { codeBlock, jsonWithLimit } from './doc-code';
 import { printAsMs } from '../../util/time';
 import { asciiSummaryOfQueryResult } from '../../queries/query-print';
 
 export interface ShowQueryOptions {
 	readonly showCode?:       boolean;
 	readonly collapseResult?: boolean;
+	readonly collapseQuery?:  boolean;
 }
 
 export async function showQuery<
 	Base extends SupportedQueryTypes,
 	VirtualArguments extends VirtualCompoundConstraint<Base> = VirtualCompoundConstraint<Base>
->(shell: RShell, code: string, queries: Queries<Base, VirtualArguments>, { showCode, collapseResult }: ShowQueryOptions = {}): Promise<string> {
+>(shell: RShell, code: string, queries: Queries<Base, VirtualArguments>, { showCode, collapseResult, collapseQuery }: ShowQueryOptions = {}): Promise<string> {
 	const now = performance.now();
 	const analysis = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
 		shell,
@@ -35,11 +36,10 @@ export async function showQuery<
 The analysis required _${printAsMs(duration)}_ (including parsing and normalization and the query) within the generation environment.
 	`.trim();
 
+	const str = JSON.stringify(queries, jsonReplacer, collapseQuery ? ' ' : 2);
 	return `
 
-\`\`\`json
-${JSON.stringify(queries, jsonReplacer, 2)}
-\`\`\`
+${codeBlock('json', collapseQuery ? str.split('\n').join(' ').replace(/([{[])\s{2,}/g,'$1 ').replace(/\s{2,}([\]}])/g,' $1') : str)}
 
 ${collapseResult ? ' <details> <summary style="color:gray">Show Results</summary>' : ''}
 
@@ -129,10 +129,10 @@ Responsible for the execution of the ${name} query is \`${functionName}\` in ${g
 }
 
 export async function explainQueries(shell: RShell, type: 'active' | 'virtual'): Promise<string> {
-	const queries = RegisteredQueries[type];
+	const queries = [...RegisteredQueries[type].entries()].sort(([,{ name: a }], [, { name: b }]) => a.localeCompare(b));
 	const result: string[] = [];
-	for(const doc of queries.values()) {
+	for(const [,doc] of queries) {
 		result.push(await explainQuery(shell, doc));
 	}
-	return result.join('\n\n\n');
+	return result.join(`\n${'-'.repeat(5)}\n\n`);
 }
diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts
@@ -21,6 +21,7 @@ import { executeNormalizedAstQuery } from '../queries/catalog/normalized-ast-que
 import { executeDataflowClusterQuery } from '../queries/catalog/cluster-query/cluster-query-executor';
 import { executeStaticSliceClusterQuery } from '../queries/catalog/static-slice-query/static-slice-query-executor';
 import { executeLineageQuery } from '../queries/catalog/lineage-query/lineage-query-executor';
+import { executeDependenciesQuery } from '../queries/catalog/dependencies-query/dependencies-query-executor';
 
 
 registerQueryDocumentation('call-context', {
@@ -105,7 +106,7 @@ Using the example code \`${exampleCode}\`, the following query returns the dataf
 ${
 	await showQuery(shell, exampleCode, [{
 		type: 'dataflow'
-	}], { showCode: true })
+	}], { showCode: true, collapseQuery: true })
 }
 		`;
 	}
@@ -127,7 +128,7 @@ Using the example code \`${exampleCode}\`, the following query returns the norma
 ${
 	await showQuery(shell, exampleCode, [{
 		type: 'normalized-ast'
-	}], { showCode: true })
+	}], { showCode: true, collapseQuery: true })
 }
 		`;
 	}
@@ -194,7 +195,7 @@ Using the example code from above, the following query returns all clusters:
 ${
 	await showQuery(shell, exampleQueryCode, [{
 		type: 'dataflow-cluster'
-	}], { showCode: false })
+	}], { showCode: false, collapseQuery: true })
 }
 		`;
 	}
@@ -215,7 +216,7 @@ Using the example code \`${exampleCode}\`, the following query returns all nodes
 ${
 	await showQuery(shell, exampleCode, [{
 		type: 'id-map'
-	}], { showCode: true })
+	}], { showCode: true, collapseQuery: true })
 }
 		`;
 	}
@@ -327,6 +328,52 @@ This query replaces the old [\`request-slice\`](${FlowrWikiBaseRef}/Interface#me
 	}
 });
 
+registerQueryDocumentation('dependencies', {
+	name:             'Dependencies Query',
+	type:             'active',
+	shortDescription: 'Returns all direct dependencies (in- and outputs) of a given R~script',
+	functionName:     executeDependenciesQuery.name,
+	functionFile:     '../queries/catalog/dependencies-query/dependencies-query-executor.ts',
+	buildExplanation: async(shell: RShell) => {
+		const exampleCode = 'library(x)';
+		const longerCode = `
+source("sample.R")
+foo <- loadNamespace("bar")
+
+data <- read.csv("data.csv")
+
+#' @importFrom ggplot2 ggplot geom_point aes
+ggplot(data, aes(x=x, y=y)) + geom_point()
+
+better::write.csv(data, "data2.csv")
+print("hello world!")
+		`;
+		return `
+This query extracts all dependencies from an R script, using a combination of [Call-Context Queries](#call-context-query)
+and more advanced tracking in the [Dataflow Graph](${FlowrWikiBaseRef}/Dataflow%20Graph).  
+
+In other words, if you have a script simply reading: \`${exampleCode}\`, the following query returns the loaded library:
+${
+	await showQuery(shell, exampleCode, [{
+		type: 'dependencies'
+	}], { showCode: false, collapseQuery: true })
+}
+
+Of course, this works for more complicated scripts too. The query offers information on the loaded _libraries_, _sourced_ files, data which is _read_ and data which is _written_.
+For example, consider the following script:
+${codeBlock('r', longerCode)}
+The following query returns the dependencies of the script:
+${
+	await showQuery(shell, longerCode, [{
+		type: 'dependencies'
+	}], { showCode: false, collapseQuery: true })
+}
+
+		`;
+	}
+});
+
+
 
 async function getText(shell: RShell) {
 	const rversion = (await shell.usedRVersion())?.format() ?? 'unknown';

diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts
@@ -1,5 +1,5 @@
 import type { BasicQueryData } from '../../query';
-import { executeQueries } from '../../query';
+import { executeQueriesOfSameType  } from '../../query';
 import type {
 	DependenciesQuery,
 	DependenciesQueryResult, DependencyInfo,
@@ -17,36 +17,53 @@ import { RType } from '../../../r-bridge/lang-4.x/ast/model/type';
 import { removeRQuotes } from '../../../r-bridge/retriever';
 import { EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
 import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';
+import { visitAst } from '../../../r-bridge/lang-4.x/ast/model/processing/visitor';
 
 const SupportedVertexTypes = [RType.String, RType.Logical, RType.Number];
 
 const Unknown = 'unknown';
 
 export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult {
 	if(queries.length !== 1) {
-		log.warn('Dependencies query expects only up to one query, but got ', queries.length);
+		log.warn('Dependencies query expects only up to one query, but got ', queries.length, 'only using the first query');
 	}
 	const now = Date.now();
 
-	const query = queries[0];
+	const [query] = queries;
 	const ignoreDefault = query.ignoreDefaultFunctions ?? false;
 	const libraryFunctions = getFunctionsToCheck(query.libraryFunctions, ignoreDefault, LibraryFunctions);
 	const sourceFunctions = getFunctionsToCheck(query.sourceFunctions, ignoreDefault, SourceFunctions);
 	const readFunctions = getFunctionsToCheck(query.readFunctions, ignoreDefault, ReadFunctions);
 	const writeFunctions = getFunctionsToCheck(query.writeFunctions, ignoreDefault, WriteFunctions);
 
-	const results = executeQueries(data, [
+	const numberOfFunctions = libraryFunctions.length + sourceFunctions.length + readFunctions.length + writeFunctions.length;
+
+	const results = numberOfFunctions === 0 ? { kinds: {}, '.meta': { timing: 0 } } : executeQueriesOfSameType<CallContextQuery>(data,
 		...makeCallContextQuery(libraryFunctions, 'library'),
 		...makeCallContextQuery(sourceFunctions, 'source'),
 		...makeCallContextQuery(readFunctions, 'read'),
 		...makeCallContextQuery(writeFunctions, 'write')
-	])['call-context'];
+	);
 
 	const libraries: LibraryInfo[] = getResults(data, results, 'library', libraryFunctions, (id, vertex, argument) => ({
 		nodeId:       id,
 		functionName: vertex.name,
 		libraryName:  argument ?? Unknown
 	}), [RType.Symbol]);
+
+	/* for libraries, we have to additionally track all uses of `::` and `:::`, for this we currently simply traverse all uses */
+	visitAst(data.ast.ast, n => {
+		if(n.type === RType.Symbol && n.namespace) {
+			/* we should improve the identification of ':::' */
+			libraries.push({
+				nodeId:       n.info.id,
+				functionName: (n.info.fullLexeme ?? n.lexeme).includes(':::') ? ':::' : '::',
+				libraryName:  n.namespace
+			});
+		}
+	});
+
+
 	const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', sourceFunctions, (id, vertex, argument) => ({
 		nodeId:       id,
 		functionName: vertex.name,
@@ -116,11 +133,8 @@ function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertex
 	return undefined;
 }
 
-function getFunctionsToCheck(customFunctions: FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: FunctionInfo[]): FunctionInfo[] {
-	const functions: FunctionInfo[] = [];
-	if(!ignoreDefaultFunctions) {
-		functions.push(...defaultFunctions);
-	}
+function getFunctionsToCheck(customFunctions: readonly FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: readonly FunctionInfo[]): FunctionInfo[] {
+	const functions: FunctionInfo[] = ignoreDefaultFunctions ? [] : [...defaultFunctions];
 	if(customFunctions) {
 		functions.push(...customFunctions);
 	}

diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts
@@ -112,10 +112,10 @@ export const DependenciesQueryDefinition = {
 	asciiSummarizer: (formatter, _processed, queryResults, result) => {
 		const out = queryResults as QueryResults<'dependencies'>['dependencies'];
 		result.push(`Query: ${bold('dependencies', formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
-		printResultSection('Libraries', out.libraries, result, l => `Library Name: ${l.libraryName}`);
-		printResultSection('Sourced Files', out.sourcedFiles, result, s => `Sourced File: ${s.file}`);
-		printResultSection('Read Data', out.readData, result, r => `Source: ${r.source}`);
-		printResultSection('Written Data', out.writtenData, result, w => `Destination: ${w.destination}`);
+		printResultSection('Libraries', out.libraries, result, l => `\`${l.libraryName}\``);
+		printResultSection('Sourced Files', out.sourcedFiles, result, s => `\`${s.file}\``);
+		printResultSection('Read Data', out.readData, result, r => `\`${r.source}\``);
+		printResultSection('Written Data', out.writtenData, result, w => `\`${w.destination}\``);
 		return true;
 	},
 	schema: Joi.object({

diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts
@@ -70,6 +70,11 @@ describe('Dependencies Query', withShell(shell => {
 			{ nodeId: '2@foo', functionName: 'foo', libraryName: 'x' }
 		] });
 
+		testQuery('Load implicitly', 'foo::x\nbar:::y()', { libraries: [
+			{ nodeId: '1@x', functionName: '::', libraryName: 'foo' },
+			{ nodeId: '2@y', functionName: ':::', libraryName: 'bar' }
+		] });
+
 
 		/* currently not supported */
 		testQuery('Using a vector to load', 'lapply(c("a", "b", "c"), library, character.only = TRUE)', { libraries: [
Benchmark suite	Current: `55f75c6`	Previous: `96e933e`	Ratio
`Retrieve AST from R code`	`245.17328972727273` ms (`105.63788513414127`)	`246.50067822727272` ms (`108.0778534921102`)	`0.99`
`Normalize R AST`	`17.59354604545455` ms (`31.64964928197199`)	`17.822801272727272` ms (`32.47501104567212`)	`0.99`
`Produce dataflow information`	`39.99429668181818` ms (`86.12085923237936`)	`40.41399131818182` ms (`86.99830114822697`)	`0.99`
`Total per-file`	`834.6713325454544` ms (`1502.1360410007476`)	`829.6500495454545` ms (`1498.0426737610057`)	`1.01`
`Static slicing`	`2.1316294589831637` ms (`1.2851271028934697`)	`2.0971564864344034` ms (`1.2085738336418979`)	`1.02`
`Reconstruct code`	`0.25509202973551137` ms (`0.20270278709779085`)	`0.24421041798721546` ms (`0.1931884846808244`)	`1.04`
`Total per-slice`	`2.401651389986461` ms (`1.359103836138158`)	`2.356082671207842` ms (`1.2841644653561477`)	`1.02`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.7869360165281424` #	`0.7869360165281424` #	`1`
`reduction (normalized tokens)`	`0.7639690077689504` #	`0.7639690077689504` #	`1`
`memory (df-graph)`	`95.46617542613636` KiB (`244.77619956879823`)	`95.46617542613636` KiB (`244.77619956879823`)	`1`
Benchmark suite	Current: `55f75c6`	Previous: `96e933e`	Ratio
`Retrieve AST from R code`	`248.57031174000002` ms (`47.26702537797175`)	`239.76002830000002` ms (`44.476154875177215`)	`1.04`
`Normalize R AST`	`19.03586858` ms (`14.430348444018906`)	`19.04045506` ms (`14.770405721401682`)	`1.00`
`Produce dataflow information`	`59.39528194` ms (`59.312101449462354`)	`74.39786466` ms (`87.80796950166253`)	`0.80`
`Total per-file`	`7736.6692878` ms (`28755.081385215915`)	`7666.33215246` ms (`28737.408915639426`)	`1.01`
`Static slicing`	`16.021224336455454` ms (`43.92909521631927`)	`15.907723437298863` ms (`43.83669809749617`)	`1.01`
`Reconstruct code`	`0.2969120907621507` ms (`0.16643504024013106`)	`0.2509487217116593` ms (`0.14943631432024615`)	`1.18`
`Total per-slice`	`16.32676231235041` ms (`43.95680436984275`)	`16.166379499642126` ms (`43.873427530614464`)	`1.01`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.8712997340230448` #	`0.8712997340230448` #	`1`
`reduction (normalized tokens)`	`0.8102441553774778` #	`0.8102441553774778` #	`1`
`memory (df-graph)`	`99.8990234375` KiB (`113.72812769327498`)	`99.8990234375` KiB (`113.72812769327498`)	`1`