Skip to content

Commit

Permalink
doc(dep-query): and refinements for the wiki pages (#1090)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce authored Oct 17, 2024
1 parent 3e73aa5 commit 55f75c6
Show file tree
Hide file tree
Showing 6 changed files with 659 additions and 374 deletions.
16 changes: 8 additions & 8 deletions src/documentation/doc-util/doc-query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,20 @@ import { FlowrWikiBaseRef, getFilePathMd } from './doc-files';
import type { SupportedVirtualQueryTypes } from '../../queries/virtual-query/virtual-queries';
import type { VirtualCompoundConstraint } from '../../queries/virtual-query/compound-query';
import { printDfGraphForCode } from './doc-dfg';
import { jsonWithLimit } from './doc-code';
import { codeBlock, jsonWithLimit } from './doc-code';
import { printAsMs } from '../../util/time';
import { asciiSummaryOfQueryResult } from '../../queries/query-print';

export interface ShowQueryOptions {
readonly showCode?: boolean;
readonly collapseResult?: boolean;
readonly collapseQuery?: boolean;
}

export async function showQuery<
Base extends SupportedQueryTypes,
VirtualArguments extends VirtualCompoundConstraint<Base> = VirtualCompoundConstraint<Base>
>(shell: RShell, code: string, queries: Queries<Base, VirtualArguments>, { showCode, collapseResult }: ShowQueryOptions = {}): Promise<string> {
>(shell: RShell, code: string, queries: Queries<Base, VirtualArguments>, { showCode, collapseResult, collapseQuery }: ShowQueryOptions = {}): Promise<string> {
const now = performance.now();
const analysis = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
shell,
Expand All @@ -35,11 +36,10 @@ export async function showQuery<
The analysis required _${printAsMs(duration)}_ (including parsing and normalization and the query) within the generation environment.
`.trim();

const str = JSON.stringify(queries, jsonReplacer, collapseQuery ? ' ' : 2);
return `
\`\`\`json
${JSON.stringify(queries, jsonReplacer, 2)}
\`\`\`
${codeBlock('json', collapseQuery ? str.split('\n').join(' ').replace(/([{[])\s{2,}/g,'$1 ').replace(/\s{2,}([\]}])/g,' $1') : str)}
${collapseResult ? ' <details> <summary style="color:gray">Show Results</summary>' : ''}
Expand Down Expand Up @@ -129,10 +129,10 @@ Responsible for the execution of the ${name} query is \`${functionName}\` in ${g
}

export async function explainQueries(shell: RShell, type: 'active' | 'virtual'): Promise<string> {
const queries = RegisteredQueries[type];
const queries = [...RegisteredQueries[type].entries()].sort(([,{ name: a }], [, { name: b }]) => a.localeCompare(b));
const result: string[] = [];
for(const doc of queries.values()) {
for(const [,doc] of queries) {
result.push(await explainQuery(shell, doc));
}
return result.join('\n\n\n');
return result.join(`\n${'-'.repeat(5)}\n\n`);
}
55 changes: 51 additions & 4 deletions src/documentation/print-query-wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { executeNormalizedAstQuery } from '../queries/catalog/normalized-ast-que
import { executeDataflowClusterQuery } from '../queries/catalog/cluster-query/cluster-query-executor';
import { executeStaticSliceClusterQuery } from '../queries/catalog/static-slice-query/static-slice-query-executor';
import { executeLineageQuery } from '../queries/catalog/lineage-query/lineage-query-executor';
import { executeDependenciesQuery } from '../queries/catalog/dependencies-query/dependencies-query-executor';


registerQueryDocumentation('call-context', {
Expand Down Expand Up @@ -105,7 +106,7 @@ Using the example code \`${exampleCode}\`, the following query returns the dataf
${
await showQuery(shell, exampleCode, [{
type: 'dataflow'
}], { showCode: true })
}], { showCode: true, collapseQuery: true })
}
`;
}
Expand All @@ -127,7 +128,7 @@ Using the example code \`${exampleCode}\`, the following query returns the norma
${
await showQuery(shell, exampleCode, [{
type: 'normalized-ast'
}], { showCode: true })
}], { showCode: true, collapseQuery: true })
}
`;
}
Expand Down Expand Up @@ -194,7 +195,7 @@ Using the example code from above, the following query returns all clusters:
${
await showQuery(shell, exampleQueryCode, [{
type: 'dataflow-cluster'
}], { showCode: false })
}], { showCode: false, collapseQuery: true })
}
`;
}
Expand All @@ -215,7 +216,7 @@ Using the example code \`${exampleCode}\`, the following query returns all nodes
${
await showQuery(shell, exampleCode, [{
type: 'id-map'
}], { showCode: true })
}], { showCode: true, collapseQuery: true })
}
`;
}
Expand Down Expand Up @@ -327,6 +328,52 @@ This query replaces the old [\`request-slice\`](${FlowrWikiBaseRef}/Interface#me
}
});

registerQueryDocumentation('dependencies', {
name: 'Dependencies Query',
type: 'active',
shortDescription: 'Returns all direct dependencies (in- and outputs) of a given R~script',
functionName: executeDependenciesQuery.name,
functionFile: '../queries/catalog/dependencies-query/dependencies-query-executor.ts',
buildExplanation: async(shell: RShell) => {
const exampleCode = 'library(x)';
const longerCode = `
source("sample.R")
foo <- loadNamespace("bar")
data <- read.csv("data.csv")
#' @importFrom ggplot2 ggplot geom_point aes
ggplot(data, aes(x=x, y=y)) + geom_point()
better::write.csv(data, "data2.csv")
print("hello world!")
`;
return `
This query extracts all dependencies from an R script, using a combination of [Call-Context Queries](#call-context-query)
and more advanced tracking in the [Dataflow Graph](${FlowrWikiBaseRef}/Dataflow%20Graph).
In other words, if you have a script simply reading: \`${exampleCode}\`, the following query returns the loaded library:
${
await showQuery(shell, exampleCode, [{
type: 'dependencies'
}], { showCode: false, collapseQuery: true })
}
Of course, this works for more complicated scripts too. The query offers information on the loaded _libraries_, _sourced_ files, data which is _read_ and data which is _written_.
For example, consider the following script:
${codeBlock('r', longerCode)}
The following query returns the dependencies of the script:
${
await showQuery(shell, longerCode, [{
type: 'dependencies'
}], { showCode: false, collapseQuery: true })
}
`;
}
});



async function getText(shell: RShell) {
const rversion = (await shell.usedRVersion())?.format() ?? 'unknown';
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { BasicQueryData } from '../../query';
import { executeQueries } from '../../query';
import { executeQueriesOfSameType } from '../../query';
import type {
DependenciesQuery,
DependenciesQueryResult, DependencyInfo,
Expand All @@ -17,36 +17,53 @@ import { RType } from '../../../r-bridge/lang-4.x/ast/model/type';
import { removeRQuotes } from '../../../r-bridge/retriever';
import { EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { visitAst } from '../../../r-bridge/lang-4.x/ast/model/processing/visitor';

const SupportedVertexTypes = [RType.String, RType.Logical, RType.Number];

const Unknown = 'unknown';

export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult {
if(queries.length !== 1) {
log.warn('Dependencies query expects only up to one query, but got ', queries.length);
log.warn('Dependencies query expects only up to one query, but got ', queries.length, 'only using the first query');
}
const now = Date.now();

const query = queries[0];
const [query] = queries;
const ignoreDefault = query.ignoreDefaultFunctions ?? false;
const libraryFunctions = getFunctionsToCheck(query.libraryFunctions, ignoreDefault, LibraryFunctions);
const sourceFunctions = getFunctionsToCheck(query.sourceFunctions, ignoreDefault, SourceFunctions);
const readFunctions = getFunctionsToCheck(query.readFunctions, ignoreDefault, ReadFunctions);
const writeFunctions = getFunctionsToCheck(query.writeFunctions, ignoreDefault, WriteFunctions);

const results = executeQueries(data, [
const numberOfFunctions = libraryFunctions.length + sourceFunctions.length + readFunctions.length + writeFunctions.length;

const results = numberOfFunctions === 0 ? { kinds: {}, '.meta': { timing: 0 } } : executeQueriesOfSameType<CallContextQuery>(data,
...makeCallContextQuery(libraryFunctions, 'library'),
...makeCallContextQuery(sourceFunctions, 'source'),
...makeCallContextQuery(readFunctions, 'read'),
...makeCallContextQuery(writeFunctions, 'write')
])['call-context'];
);

const libraries: LibraryInfo[] = getResults(data, results, 'library', libraryFunctions, (id, vertex, argument) => ({
nodeId: id,
functionName: vertex.name,
libraryName: argument ?? Unknown
}), [RType.Symbol]);

/* for libraries, we have to additionally track all uses of `::` and `:::`, for this we currently simply traverse all uses */
visitAst(data.ast.ast, n => {
if(n.type === RType.Symbol && n.namespace) {
/* we should improve the identification of ':::' */
libraries.push({
nodeId: n.info.id,
functionName: (n.info.fullLexeme ?? n.lexeme).includes(':::') ? ':::' : '::',
libraryName: n.namespace
});
}
});


const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', sourceFunctions, (id, vertex, argument) => ({
nodeId: id,
functionName: vertex.name,
Expand Down Expand Up @@ -116,11 +133,8 @@ function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertex
return undefined;
}

function getFunctionsToCheck(customFunctions: FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: FunctionInfo[]): FunctionInfo[] {
const functions: FunctionInfo[] = [];
if(!ignoreDefaultFunctions) {
functions.push(...defaultFunctions);
}
function getFunctionsToCheck(customFunctions: readonly FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: readonly FunctionInfo[]): FunctionInfo[] {
const functions: FunctionInfo[] = ignoreDefaultFunctions ? [] : [...defaultFunctions];
if(customFunctions) {
functions.push(...customFunctions);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ export const DependenciesQueryDefinition = {
asciiSummarizer: (formatter, _processed, queryResults, result) => {
const out = queryResults as QueryResults<'dependencies'>['dependencies'];
result.push(`Query: ${bold('dependencies', formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
printResultSection('Libraries', out.libraries, result, l => `Library Name: ${l.libraryName}`);
printResultSection('Sourced Files', out.sourcedFiles, result, s => `Sourced File: ${s.file}`);
printResultSection('Read Data', out.readData, result, r => `Source: ${r.source}`);
printResultSection('Written Data', out.writtenData, result, w => `Destination: ${w.destination}`);
printResultSection('Libraries', out.libraries, result, l => `\`${l.libraryName}\``);
printResultSection('Sourced Files', out.sourcedFiles, result, s => `\`${s.file}\``);
printResultSection('Read Data', out.readData, result, r => `\`${r.source}\``);
printResultSection('Written Data', out.writtenData, result, w => `\`${w.destination}\``);
return true;
},
schema: Joi.object({
Expand Down
5 changes: 5 additions & 0 deletions test/functionality/dataflow/query/dependencies-query-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ describe('Dependencies Query', withShell(shell => {
{ nodeId: '2@foo', functionName: 'foo', libraryName: 'x' }
] });

testQuery('Load implicitly', 'foo::x\nbar:::y()', { libraries: [
{ nodeId: '1@x', functionName: '::', libraryName: 'foo' },
{ nodeId: '2@y', functionName: ':::', libraryName: 'bar' }
] });


/* currently not supported */
testQuery('Using a vector to load', 'lapply(c("a", "b", "c"), library, character.only = TRUE)', { libraries: [
Expand Down
Loading

2 comments on commit 55f75c6

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 55f75c6 Previous: 96e933e Ratio
Retrieve AST from R code 245.17328972727273 ms (105.63788513414127) 246.50067822727272 ms (108.0778534921102) 0.99
Normalize R AST 17.59354604545455 ms (31.64964928197199) 17.822801272727272 ms (32.47501104567212) 0.99
Produce dataflow information 39.99429668181818 ms (86.12085923237936) 40.41399131818182 ms (86.99830114822697) 0.99
Total per-file 834.6713325454544 ms (1502.1360410007476) 829.6500495454545 ms (1498.0426737610057) 1.01
Static slicing 2.1316294589831637 ms (1.2851271028934697) 2.0971564864344034 ms (1.2085738336418979) 1.02
Reconstruct code 0.25509202973551137 ms (0.20270278709779085) 0.24421041798721546 ms (0.1931884846808244) 1.04
Total per-slice 2.401651389986461 ms (1.359103836138158) 2.356082671207842 ms (1.2841644653561477) 1.02
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869360165281424 # 0.7869360165281424 # 1
reduction (normalized tokens) 0.7639690077689504 # 0.7639690077689504 # 1
memory (df-graph) 95.46617542613636 KiB (244.77619956879823) 95.46617542613636 KiB (244.77619956879823) 1

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 55f75c6 Previous: 96e933e Ratio
Retrieve AST from R code 248.57031174000002 ms (47.26702537797175) 239.76002830000002 ms (44.476154875177215) 1.04
Normalize R AST 19.03586858 ms (14.430348444018906) 19.04045506 ms (14.770405721401682) 1.00
Produce dataflow information 59.39528194 ms (59.312101449462354) 74.39786466 ms (87.80796950166253) 0.80
Total per-file 7736.6692878 ms (28755.081385215915) 7666.33215246 ms (28737.408915639426) 1.01
Static slicing 16.021224336455454 ms (43.92909521631927) 15.907723437298863 ms (43.83669809749617) 1.01
Reconstruct code 0.2969120907621507 ms (0.16643504024013106) 0.2509487217116593 ms (0.14943631432024615) 1.18
Total per-slice 16.32676231235041 ms (43.95680436984275) 16.166379499642126 ms (43.873427530614464) 1.01
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.8712997340230448 # 0.8712997340230448 # 1
reduction (normalized tokens) 0.8102441553774778 # 0.8102441553774778 # 1
memory (df-graph) 99.8990234375 KiB (113.72812769327498) 99.8990234375 KiB (113.72812769327498) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.