From 55f75c6c1accf6d62e96feff147cb309a14c7549 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Thu, 17 Oct 2024 11:16:30 +0200 Subject: [PATCH] doc(dep-query): and refinements for the wiki pages (#1090) --- src/documentation/doc-util/doc-query.ts | 16 +- src/documentation/print-query-wiki.ts | 55 +- .../dependencies-query-executor.ts | 34 +- .../dependencies-query-format.ts | 8 +- .../query/dependencies-query-tests.ts | 5 + wiki/Query API.md | 915 +++++++++++------- 6 files changed, 659 insertions(+), 374 deletions(-) diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts index 4727f6e127..25f87d3ae3 100644 --- a/src/documentation/doc-util/doc-query.ts +++ b/src/documentation/doc-util/doc-query.ts @@ -10,19 +10,20 @@ import { FlowrWikiBaseRef, getFilePathMd } from './doc-files'; import type { SupportedVirtualQueryTypes } from '../../queries/virtual-query/virtual-queries'; import type { VirtualCompoundConstraint } from '../../queries/virtual-query/compound-query'; import { printDfGraphForCode } from './doc-dfg'; -import { jsonWithLimit } from './doc-code'; +import { codeBlock, jsonWithLimit } from './doc-code'; import { printAsMs } from '../../util/time'; import { asciiSummaryOfQueryResult } from '../../queries/query-print'; export interface ShowQueryOptions { readonly showCode?: boolean; readonly collapseResult?: boolean; + readonly collapseQuery?: boolean; } export async function showQuery< Base extends SupportedQueryTypes, VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint ->(shell: RShell, code: string, queries: Queries, { showCode, collapseResult }: ShowQueryOptions = {}): Promise { +>(shell: RShell, code: string, queries: Queries, { showCode, collapseResult, collapseQuery }: ShowQueryOptions = {}): Promise { const now = performance.now(); const analysis = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, @@ -35,11 +36,10 @@ export async function showQuery< The analysis required _${printAsMs(duration)}_ (including parsing and normalization and the query) within the generation environment. `.trim(); + const str = JSON.stringify(queries, jsonReplacer, collapseQuery ? ' ' : 2); return ` -\`\`\`json -${JSON.stringify(queries, jsonReplacer, 2)} -\`\`\` +${codeBlock('json', collapseQuery ? str.split('\n').join(' ').replace(/([{[])\s{2,}/g,'$1 ').replace(/\s{2,}([\]}])/g,' $1') : str)} ${collapseResult ? '
Show Results' : ''} @@ -129,10 +129,10 @@ Responsible for the execution of the ${name} query is \`${functionName}\` in ${g } export async function explainQueries(shell: RShell, type: 'active' | 'virtual'): Promise { - const queries = RegisteredQueries[type]; + const queries = [...RegisteredQueries[type].entries()].sort(([,{ name: a }], [, { name: b }]) => a.localeCompare(b)); const result: string[] = []; - for(const doc of queries.values()) { + for(const [,doc] of queries) { result.push(await explainQuery(shell, doc)); } - return result.join('\n\n\n'); + return result.join(`\n${'-'.repeat(5)}\n\n`); } diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts index 5734283f6d..bc0bd86924 100644 --- a/src/documentation/print-query-wiki.ts +++ b/src/documentation/print-query-wiki.ts @@ -21,6 +21,7 @@ import { executeNormalizedAstQuery } from '../queries/catalog/normalized-ast-que import { executeDataflowClusterQuery } from '../queries/catalog/cluster-query/cluster-query-executor'; import { executeStaticSliceClusterQuery } from '../queries/catalog/static-slice-query/static-slice-query-executor'; import { executeLineageQuery } from '../queries/catalog/lineage-query/lineage-query-executor'; +import { executeDependenciesQuery } from '../queries/catalog/dependencies-query/dependencies-query-executor'; registerQueryDocumentation('call-context', { @@ -105,7 +106,7 @@ Using the example code \`${exampleCode}\`, the following query returns the dataf ${ await showQuery(shell, exampleCode, [{ type: 'dataflow' - }], { showCode: true }) + }], { showCode: true, collapseQuery: true }) } `; } @@ -127,7 +128,7 @@ Using the example code \`${exampleCode}\`, the following query returns the norma ${ await showQuery(shell, exampleCode, [{ type: 'normalized-ast' - }], { showCode: true }) + }], { showCode: true, collapseQuery: true }) } `; } @@ -194,7 +195,7 @@ Using the example code from above, the following query returns all clusters: ${ await showQuery(shell, exampleQueryCode, [{ type: 'dataflow-cluster' - }], { showCode: false }) + }], { showCode: false, collapseQuery: true }) } `; } @@ -215,7 +216,7 @@ Using the example code \`${exampleCode}\`, the following query returns all nodes ${ await showQuery(shell, exampleCode, [{ type: 'id-map' - }], { showCode: true }) + }], { showCode: true, collapseQuery: true }) } `; } @@ -327,6 +328,52 @@ This query replaces the old [\`request-slice\`](${FlowrWikiBaseRef}/Interface#me } }); +registerQueryDocumentation('dependencies', { + name: 'Dependencies Query', + type: 'active', + shortDescription: 'Returns all direct dependencies (in- and outputs) of a given R~script', + functionName: executeDependenciesQuery.name, + functionFile: '../queries/catalog/dependencies-query/dependencies-query-executor.ts', + buildExplanation: async(shell: RShell) => { + const exampleCode = 'library(x)'; + const longerCode = ` +source("sample.R") +foo <- loadNamespace("bar") + +data <- read.csv("data.csv") + +#' @importFrom ggplot2 ggplot geom_point aes +ggplot(data, aes(x=x, y=y)) + geom_point() + +better::write.csv(data, "data2.csv") +print("hello world!") + `; + return ` +This query extracts all dependencies from an R script, using a combination of [Call-Context Queries](#call-context-query) +and more advanced tracking in the [Dataflow Graph](${FlowrWikiBaseRef}/Dataflow%20Graph). + +In other words, if you have a script simply reading: \`${exampleCode}\`, the following query returns the loaded library: +${ + await showQuery(shell, exampleCode, [{ + type: 'dependencies' + }], { showCode: false, collapseQuery: true }) +} + +Of course, this works for more complicated scripts too. The query offers information on the loaded _libraries_, _sourced_ files, data which is _read_ and data which is _written_. +For example, consider the following script: +${codeBlock('r', longerCode)} +The following query returns the dependencies of the script: +${ + await showQuery(shell, longerCode, [{ + type: 'dependencies' + }], { showCode: false, collapseQuery: true }) +} + + `; + } +}); + + async function getText(shell: RShell) { const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; diff --git a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts index 818466ba1e..d9cd34eb1f 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-executor.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-executor.ts @@ -1,5 +1,5 @@ import type { BasicQueryData } from '../../query'; -import { executeQueries } from '../../query'; +import { executeQueriesOfSameType } from '../../query'; import type { DependenciesQuery, DependenciesQueryResult, DependencyInfo, @@ -17,6 +17,7 @@ import { RType } from '../../../r-bridge/lang-4.x/ast/model/type'; import { removeRQuotes } from '../../../r-bridge/retriever'; import { EmptyArgument } from '../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; +import { visitAst } from '../../../r-bridge/lang-4.x/ast/model/processing/visitor'; const SupportedVertexTypes = [RType.String, RType.Logical, RType.Number]; @@ -24,29 +25,45 @@ const Unknown = 'unknown'; export function executeDependenciesQuery(data: BasicQueryData, queries: readonly DependenciesQuery[]): DependenciesQueryResult { if(queries.length !== 1) { - log.warn('Dependencies query expects only up to one query, but got ', queries.length); + log.warn('Dependencies query expects only up to one query, but got ', queries.length, 'only using the first query'); } const now = Date.now(); - const query = queries[0]; + const [query] = queries; const ignoreDefault = query.ignoreDefaultFunctions ?? false; const libraryFunctions = getFunctionsToCheck(query.libraryFunctions, ignoreDefault, LibraryFunctions); const sourceFunctions = getFunctionsToCheck(query.sourceFunctions, ignoreDefault, SourceFunctions); const readFunctions = getFunctionsToCheck(query.readFunctions, ignoreDefault, ReadFunctions); const writeFunctions = getFunctionsToCheck(query.writeFunctions, ignoreDefault, WriteFunctions); - const results = executeQueries(data, [ + const numberOfFunctions = libraryFunctions.length + sourceFunctions.length + readFunctions.length + writeFunctions.length; + + const results = numberOfFunctions === 0 ? { kinds: {}, '.meta': { timing: 0 } } : executeQueriesOfSameType(data, ...makeCallContextQuery(libraryFunctions, 'library'), ...makeCallContextQuery(sourceFunctions, 'source'), ...makeCallContextQuery(readFunctions, 'read'), ...makeCallContextQuery(writeFunctions, 'write') - ])['call-context']; + ); const libraries: LibraryInfo[] = getResults(data, results, 'library', libraryFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, libraryName: argument ?? Unknown }), [RType.Symbol]); + + /* for libraries, we have to additionally track all uses of `::` and `:::`, for this we currently simply traverse all uses */ + visitAst(data.ast.ast, n => { + if(n.type === RType.Symbol && n.namespace) { + /* we should improve the identification of ':::' */ + libraries.push({ + nodeId: n.info.id, + functionName: (n.info.fullLexeme ?? n.lexeme).includes(':::') ? ':::' : '::', + libraryName: n.namespace + }); + } + }); + + const sourcedFiles: SourceInfo[] = getResults(data, results, 'source', sourceFunctions, (id, vertex, argument) => ({ nodeId: id, functionName: vertex.name, @@ -116,11 +133,8 @@ function getArgumentValue({ graph }: BasicQueryData, vertex: DataflowGraphVertex return undefined; } -function getFunctionsToCheck(customFunctions: FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: FunctionInfo[]): FunctionInfo[] { - const functions: FunctionInfo[] = []; - if(!ignoreDefaultFunctions) { - functions.push(...defaultFunctions); - } +function getFunctionsToCheck(customFunctions: readonly FunctionInfo[] | undefined, ignoreDefaultFunctions: boolean, defaultFunctions: readonly FunctionInfo[]): FunctionInfo[] { + const functions: FunctionInfo[] = ignoreDefaultFunctions ? [] : [...defaultFunctions]; if(customFunctions) { functions.push(...customFunctions); } diff --git a/src/queries/catalog/dependencies-query/dependencies-query-format.ts b/src/queries/catalog/dependencies-query/dependencies-query-format.ts index 73a4bf0b47..a189b081e2 100644 --- a/src/queries/catalog/dependencies-query/dependencies-query-format.ts +++ b/src/queries/catalog/dependencies-query/dependencies-query-format.ts @@ -112,10 +112,10 @@ export const DependenciesQueryDefinition = { asciiSummarizer: (formatter, _processed, queryResults, result) => { const out = queryResults as QueryResults<'dependencies'>['dependencies']; result.push(`Query: ${bold('dependencies', formatter)} (${printAsMs(out['.meta'].timing, 0)})`); - printResultSection('Libraries', out.libraries, result, l => `Library Name: ${l.libraryName}`); - printResultSection('Sourced Files', out.sourcedFiles, result, s => `Sourced File: ${s.file}`); - printResultSection('Read Data', out.readData, result, r => `Source: ${r.source}`); - printResultSection('Written Data', out.writtenData, result, w => `Destination: ${w.destination}`); + printResultSection('Libraries', out.libraries, result, l => `\`${l.libraryName}\``); + printResultSection('Sourced Files', out.sourcedFiles, result, s => `\`${s.file}\``); + printResultSection('Read Data', out.readData, result, r => `\`${r.source}\``); + printResultSection('Written Data', out.writtenData, result, w => `\`${w.destination}\``); return true; }, schema: Joi.object({ diff --git a/test/functionality/dataflow/query/dependencies-query-tests.ts b/test/functionality/dataflow/query/dependencies-query-tests.ts index 0a7426d559..cc6596567f 100644 --- a/test/functionality/dataflow/query/dependencies-query-tests.ts +++ b/test/functionality/dataflow/query/dependencies-query-tests.ts @@ -70,6 +70,11 @@ describe('Dependencies Query', withShell(shell => { { nodeId: '2@foo', functionName: 'foo', libraryName: 'x' } ] }); + testQuery('Load implicitly', 'foo::x\nbar:::y()', { libraries: [ + { nodeId: '1@x', functionName: '::', libraryName: 'foo' }, + { nodeId: '2@y', functionName: ':::', libraryName: 'bar' } + ] }); + /* currently not supported */ testQuery('Using a vector to load', 'lapply(c("a", "b", "c"), library, character.only = TRUE)', { libraries: [ diff --git a/wiki/Query API.md b/wiki/Query API.md index 928339879f..e97d05f44c 100644 --- a/wiki/Query API.md +++ b/wiki/Query API.md @@ -1,4 +1,4 @@ -_This document was generated from 'src/documentation/print-query-wiki.ts' on 2024-10-15, 19:02:54 UTC presenting an overview of flowR's query API (v2.1.3, using R v4.4.0)._ +_This document was generated from 'src/documentation/print-query-wiki.ts' on 2024-10-17, 08:18:49 UTC presenting an overview of flowR's query API (v2.1.3, using R v4.4.1)._ This page briefly summarizes flowR's query API, represented by the executeQueries function in [`./src/queries/query.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/query.ts). Please see the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to access this API. @@ -20,6 +20,8 @@ For now, we support the following **active** queries (which we will refer to sim Calculates and returns all the clusters present in the dataflow graph. 1. [Dataflow Query](#dataflow-query) (`dataflow`):\ Returns the dataflow graph of the given code. +1. [Dependencies Query](#dependencies-query) (`dependencies`):\ + Returns all direct dependencies (in- and outputs) of a given R~script 1. [Id-Map Query](#id-map-query) (`id-map`):\ Returns the id-map of the normalized AST of the given code. 1. [Lineage Query](#lineage-query) (`lineage`):\ @@ -479,7 +481,7 @@ flowchart LR 89 -->|"reads, returns, argument"| 87 ``` -(The analysis required _21.42 ms_ (incl. parse and normalize) within the generation environment.) +(The analysis required _28.17 ms_ (incl. parse and normalize) within the generation environment.) @@ -502,6 +504,7 @@ Just as an example, the following [Call-Context Query](#call-context-query) find + ```json [ { @@ -516,16 +519,17 @@ Just as an example, the following [Call-Context Query](#call-context-query) find + _Results (prettified and summarized):_ Query: **call-context** (1 ms)\    ╰ **input**\      ╰ **csv-file**: _`read_csv`_ (L.6), _`read_csv`_ (L.7)\ -_All queries together required ≈1 ms (1ms accuracy, total 9 ms)_ +_All queries together required ≈1 ms (1ms accuracy, total 11 ms)_
Show Detailed Results as Json -The analysis required _8.96 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _10.90 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -595,6 +599,7 @@ all calls that start with `read_` to the kind `input` but only if they are not l + ```json [ { @@ -625,6 +630,7 @@ all calls that start with `read_` to the kind `input` but only if they are not l + _Results (prettified and summarized):_ Query: **call-context** (2 ms)\ @@ -633,11 +639,11 @@ Query: **call-context** (2 ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9), _`mean`_ (L.19)\      ╰ **plot**: _`points`_ (L.17) with 1 link (_`plot`_ (L.16))\ -_All queries together required ≈2 ms (1ms accuracy, total 14 ms)_ +_All queries together required ≈2 ms (1ms accuracy, total 13 ms)_
Show Detailed Results as Json -The analysis required _13.84 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _12.64 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -722,6 +728,7 @@ my_test_function() Now let's say we want to query _all_ uses of the `my_test_function`: + ```json [ { @@ -734,16 +741,17 @@ Now let's say we want to query _all_ uses of the `my_test_function`: + _Results (prettified and summarized):_ Query: **call-context** (0 ms)\    ╰ **.**\      ╰ **.**: _`foo`_ (L.2) with 1 alias root (_`my_test_function`_ (L.1)), _`bar`_ (L.4) with 1 alias root (_`my_test_function`_ (L.1)), _`my_test_function`_ (L.5)\ -_All queries together required ≈0 ms (1ms accuracy, total 5 ms)_ +_All queries together required ≈0 ms (1ms accuracy, total 4 ms)_
Show Detailed Results as Json -The analysis required _4.92 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _4.29 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -810,46 +818,76 @@ Responsible for the execution of the Call-Context Query query is `executeCallCon
+----- +### Dataflow Cluster Query -### Dataflow Query +This query automatically calculates clusters in flowR's dataflow graph +and returns a list of all clusters found. +Clusters are to be interpreted as literal clusters on the graph traversing +edges in both directions. From this perspective, +the code `x <- 1; x` has one cluster (given that all code is related), +while the code `x <- 1; y` has two clusters (given that the `y` has no relation to the previous definition). + + +
Example x <- 1; x -Maybe you want to handle only the result of the query execution, or you just need the [dataflow graph](https://github.com/flowr-analysis/flowr/wiki//Dataflow%20Graph) again. -This query type does exactly that! -Using the example code `x + 1`, the following query returns the dataflow graph of the code: ```json [ { - "type": "dataflow" + "type": "dataflow-cluster" } ] ``` + _Results (prettified and summarized):_ -Query: **dataflow** (0 ms)\ -   ╰ [Dataflow Graph](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMChbXCJgIzkxO1JTeW1ib2wjOTM7IHhcbiAgICAgICgwKVxuICAgICAgKjEuMSpgXCJdKVxuICAgIDF7e1wiYCM5MTtSTnVtYmVyIzkzOyAxXG4gICAgICAoMSlcbiAgICAgICoxLjUqYFwifX1cbiAgICAyW1tcImAjOTE7UkJpbmFyeU9wIzkzOyAjNDM7XG4gICAgICAoMilcbiAgICAgICoxLjEtNSpcbiAgICAoMCwgMSlgXCJdXVxuICAgIDIgLS0+fFwicmVhZHMsIGFyZ3VtZW50XCJ8IDBcbiAgICAyIC0tPnxcInJlYWRzLCBhcmd1bWVudFwifCAxIiwibWVybWFpZCI6eyJhdXRvU3luYyI6dHJ1ZX19)\ -_All queries together required ≈0 ms (1ms accuracy, total 2 ms)_ +Query: **dataflow-cluster** (0ms)\ +   ╰ Found 1 cluster\ +      ╰ {3, 0, 1, 2} ([marked](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMXt7XCJgIzkxO1JOdW1iZXIjOTM7IDFcbiAgICAgICgxKVxuICAgICAgKjEuNipgXCJ9fVxuICAgIHN0eWxlIDEgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMFtcImAjOTE7UlN5bWJvbCM5MzsgeFxuICAgICAgKDApXG4gICAgICAqMS4xKmBcIl1cbiAgICBzdHlsZSAwIHN0cm9rZTp0ZWFsLHN0cm9rZS13aWR0aDo3cHgsc3Ryb2tlLW9wYWNpdHk6Ljg7IFxuICAgIDJbW1wiYCM5MTtSQmluYXJ5T3AjOTM7ICM2MDsjNDU7XG4gICAgICAoMilcbiAgICAgICoxLjEtNipcbiAgICAoMCwgMSlgXCJdXVxuICAgIHN0eWxlIDIgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMyhbXCJgIzkxO1JTeW1ib2wjOTM7IHhcbiAgICAgICgzKVxuICAgICAgKjEuOSpgXCJdKVxuICAgIHN0eWxlIDMgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMCAtLT58XCJkZWZpbmVkLWJ5XCJ8IDFcbiAgICAwIC0tPnxcImRlZmluZWQtYnlcInwgMlxuICAgIDIgLS0+fFwiYXJndW1lbnRcInwgMVxuICAgIDIgLS0+fFwicmV0dXJucywgYXJndW1lbnRcInwgMFxuICAgIDMgLS0+fFwicmVhZHNcInwgMCIsIm1lcm1haWQiOnsiYXV0b1N5bmMiOnRydWV9fQ==))\ +_All queries together required ≈0 ms (1ms accuracy, total 3 ms)_
Show Detailed Results as Json -The analysis required _1.89 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _2.51 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. -_As the code is pretty long, we inhibit pretty printing and syntax highlighting (JSON):_ -```text -{"dataflow":{".meta":{"timing":0},"graph":{"_idMap":{"size":7,"k2v":[[0,{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],[1,{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}],[2,{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],[3,{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}}],["2-arg",{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],["0-arg",{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],["1-arg",{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}]],"v2k":{}},"_unknownSideEffects":[],"rootVertices":[0,1,2],"vertexInformation":[[0,{"tag":"use","id":0}],[1,{"tag":"value","id":1}],[2,{"tag":"function-call","id":2,"name":"+","onlyBuiltin":true,"args":[{"nodeId":0,"type":32},{"nodeId":1,"type":32}]}]],"edgeInformation":[[2,[[0,{"types":65}],[1,{"types":65}]]]]}},".meta":{"timing":0}} + +```json +{ + "dataflow-cluster": { + ".meta": { + "timing": 0 + }, + "clusters": [ + { + "startNode": 3, + "members": [ + 3, + 0, + 1, + 2 + ], + "hasUnknownSideEffects": false + } + ] + }, + ".meta": { + "timing": 0 + } +} ``` @@ -857,66 +895,225 @@ _As the code is pretty long, we inhibit pretty printing and syntax highlighting
-
Original Code -```r -x + 1 -``` -
+ -Dataflow Graph of the R Code +
+ -The analysis required _1.35 ms_ (incl. parse and normalize) within the generation environment. -We encountered no unknown side effects during the analysis. +
Example x <- 1; y -```mermaid -flowchart LR - 0(["`#91;RSymbol#93; x - (0) - *1.1*`"]) - 1{{"`#91;RNumber#93; 1 - (1) - *1.5*`"}} - 2[["`#91;RBinaryOp#93; #43; - (2) - *1.1-5* - (0, 1)`"]] - 2 -->|"reads, argument"| 0 - 2 -->|"reads, argument"| 1 + + +```json +[ + { + "type": "dataflow-cluster" + } +] ``` - -
-Mermaid Code + +_Results (prettified and summarized):_ + +Query: **dataflow-cluster** (0ms)\ +   ╰ Found 2 clusters\ +      ╰ {3} ([marked](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMXt7XCJgIzkxO1JOdW1iZXIjOTM7IDFcbiAgICAgICgxKVxuICAgICAgKjEuNipgXCJ9fVxuICAgIDBbXCJgIzkxO1JTeW1ib2wjOTM7IHhcbiAgICAgICgwKVxuICAgICAgKjEuMSpgXCJdXG4gICAgMltbXCJgIzkxO1JCaW5hcnlPcCM5MzsgIzYwOyM0NTtcbiAgICAgICgyKVxuICAgICAgKjEuMS02KlxuICAgICgwLCAxKWBcIl1dXG4gICAgMyhbXCJgIzkxO1JTeW1ib2wjOTM7IHlcbiAgICAgICgzKVxuICAgICAgKjEuOSpgXCJdKVxuICAgIHN0eWxlIDMgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMCAtLT58XCJkZWZpbmVkLWJ5XCJ8IDFcbiAgICAwIC0tPnxcImRlZmluZWQtYnlcInwgMlxuICAgIDIgLS0+fFwiYXJndW1lbnRcInwgMVxuICAgIDIgLS0+fFwicmV0dXJucywgYXJndW1lbnRcInwgMCIsIm1lcm1haWQiOnsiYXV0b1N5bmMiOnRydWV9fQ==))\ +      ╰ {2, 1, 0} ([marked](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMXt7XCJgIzkxO1JOdW1iZXIjOTM7IDFcbiAgICAgICgxKVxuICAgICAgKjEuNipgXCJ9fVxuICAgIHN0eWxlIDEgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMFtcImAjOTE7UlN5bWJvbCM5MzsgeFxuICAgICAgKDApXG4gICAgICAqMS4xKmBcIl1cbiAgICBzdHlsZSAwIHN0cm9rZTp0ZWFsLHN0cm9rZS13aWR0aDo3cHgsc3Ryb2tlLW9wYWNpdHk6Ljg7IFxuICAgIDJbW1wiYCM5MTtSQmluYXJ5T3AjOTM7ICM2MDsjNDU7XG4gICAgICAoMilcbiAgICAgICoxLjEtNipcbiAgICAoMCwgMSlgXCJdXVxuICAgIHN0eWxlIDIgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMyhbXCJgIzkxO1JTeW1ib2wjOTM7IHlcbiAgICAgICgzKVxuICAgICAgKjEuOSpgXCJdKVxuICAgIDAgLS0+fFwiZGVmaW5lZC1ieVwifCAxXG4gICAgMCAtLT58XCJkZWZpbmVkLWJ5XCJ8IDJcbiAgICAyIC0tPnxcImFyZ3VtZW50XCJ8IDFcbiAgICAyIC0tPnxcInJldHVybnMsIGFyZ3VtZW50XCJ8IDAiLCJtZXJtYWlkIjp7ImF1dG9TeW5jIjp0cnVlfX0=))\ +_All queries together required ≈0 ms (1ms accuracy, total 2 ms)_ + +
Show Detailed Results as Json + +The analysis required _2.15 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + + + + +```json +{ + "dataflow-cluster": { + ".meta": { + "timing": 0 + }, + "clusters": [ + { + "startNode": 3, + "members": [ + 3 + ], + "hasUnknownSideEffects": false + }, + { + "startNode": 2, + "members": [ + 2, + 1, + 0 + ], + "hasUnknownSideEffects": false + } + ] + }, + ".meta": { + "timing": 0 + } +} ``` -flowchart LR - 0(["`#91;RSymbol#93; x - (0) - *1.1*`"]) - 1{{"`#91;RNumber#93; 1 - (1) - *1.5*`"}} - 2[["`#91;RBinaryOp#93; #43; - (2) - *1.1-5* - (0, 1)`"]] - 2 -->|"reads, argument"| 0 - 2 -->|"reads, argument"| 1 -``` + +
+ + + + + +
+ + +Using the example code from above, the following query returns all clusters: + + + +```json +[ { "type": "dataflow-cluster" } ] +``` + + + + +_Results (prettified and summarized):_ + +Query: **dataflow-cluster** (0ms)\ +   ╰ Found 5 clusters\ +      ╰ {89, 87, 85, 82, 18, 22, ... (see JSON below)} ([marked](https://mermaid.live/view#base64:))\ +      ╰ {55, 52, 38, 12, 16, 14, ... (see JSON below)} ([marked](https://mermaid.live/view#base64:))\ +      ╰ (has unknown side effect) {11, 9} ([marked](https://mermaid.live/view#base64:))\ +      ╰ (has unknown side effect) {7, 5} ([marked](https://mermaid.live/view#base64:))\ +      ╰ (has unknown side effect) {3, 1} ([marked](https://mermaid.live/view#base64:))\ +_All queries together required ≈1 ms (1ms accuracy, total 8 ms)_ + +
Show Detailed Results as Json + +The analysis required _8.32 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + + + + +```json +{ + "dataflow-cluster": { + ".meta": { + "timing": 0 + }, + "clusters": [ + { + "startNode": 89, + "members": [ + 89, + 87, + 85, + 82, + 18, + 22, + 20, + 23, + 57, + 60, + 58, + 67, + 65, + 62, + 63, + 69, + 72, + 70, + 79, + 77, + 74, + 75, + 83 + ], + "hasUnknownSideEffects": false + }, + { + "startNode": 55, + "members": [ + 55, + 52, + 38, + 12, + 16, + 14, + 17, + 26, + 29, + 27, + 31, + 32, + 24, + 34, + 36, + 50, + 48, + 43, + 44, + 46, + 47, + 54 + ], + "hasUnknownSideEffects": false + }, + { + "startNode": 11, + "members": [ + 11, + 9 + ], + "hasUnknownSideEffects": true + }, + { + "startNode": 7, + "members": [ + 7, + 5 + ], + "hasUnknownSideEffects": true + }, + { + "startNode": 3, + "members": [ + 3, + 1 + ], + "hasUnknownSideEffects": true + } + ] + }, + ".meta": { + "timing": 1 + } +} +```
- + + @@ -927,42 +1124,40 @@ flowchart LR Implementation Details -Responsible for the execution of the Dataflow Query query is `executeDataflowQuery` in [`./src/queries/catalog/dataflow-query/dataflow-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/dataflow-query/dataflow-query-executor.ts). +Responsible for the execution of the Dataflow Cluster Query query is `executeDataflowClusterQuery` in [`./src/queries/catalog/cluster-query/cluster-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/cluster-query/cluster-query-executor.ts).
+----- - -### Normalized AST Query +### Dataflow Query -Maybe you want to handle only the result of the query execution, or you just need the [normalized AST](https://github.com/flowr-analysis/flowr/wiki//Normalized%20AST) again. +Maybe you want to handle only the result of the query execution, or you just need the [dataflow graph](https://github.com/flowr-analysis/flowr/wiki//Dataflow%20Graph) again. This query type does exactly that! -Using the example code `x + 1`, the following query returns the normalized AST of the code: +Using the example code `x + 1`, the following query returns the dataflow graph of the code: + ```json -[ - { - "type": "normalized-ast" - } -] +[ { "type": "dataflow" } ] ``` + _Results (prettified and summarized):_ -Query: **normalized-ast** (0 ms)\ -   ╰ [Normalized AST](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgbjMoW1wiUkV4cHJlc3Npb25MaXN0ICgzKVxuIFwiXSlcbiAgICBuMihbXCJSQmluYXJ5T3AgKDIpXG4jNDM7XCJdKVxuICAgIG4zIC0tPnxcImV4cHItbGlzdC1jaGlsZC0wXCJ8IG4yXG4gICAgbjAoW1wiUlN5bWJvbCAoMClcbnhcIl0pXG4gICAgbjIgLS0+fFwiYmlub3AtbGhzXCJ8IG4wXG4gICAgbjEoW1wiUk51bWJlciAoMSlcbjFcIl0pXG4gICAgbjIgLS0+fFwiYmlub3AtcmhzXCJ8IG4xXG4iLCJtZXJtYWlkIjp7ImF1dG9TeW5jIjp0cnVlfX0=)\ -_All queries together required ≈0 ms (1ms accuracy, total 1 ms)_ +Query: **dataflow** (0 ms)\ +   ╰ [Dataflow Graph](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMChbXCJgIzkxO1JTeW1ib2wjOTM7IHhcbiAgICAgICgwKVxuICAgICAgKjEuMSpgXCJdKVxuICAgIDF7e1wiYCM5MTtSTnVtYmVyIzkzOyAxXG4gICAgICAoMSlcbiAgICAgICoxLjUqYFwifX1cbiAgICAyW1tcImAjOTE7UkJpbmFyeU9wIzkzOyAjNDM7XG4gICAgICAoMilcbiAgICAgICoxLjEtNSpcbiAgICAoMCwgMSlgXCJdXVxuICAgIDIgLS0+fFwicmVhZHMsIGFyZ3VtZW50XCJ8IDBcbiAgICAyIC0tPnxcInJlYWRzLCBhcmd1bWVudFwifCAxIiwibWVybWFpZCI6eyJhdXRvU3luYyI6dHJ1ZX19)\ +_All queries together required ≈0 ms (1ms accuracy, total 2 ms)_
Show Detailed Results as Json -The analysis required _1.25 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _1.94 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -971,7 +1166,7 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int _As the code is pretty long, we inhibit pretty printing and syntax highlighting (JSON):_ ```text -{"normalized-ast":{".meta":{"timing":0},"normalized":{"ast":{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}},"idMap":{"size":7,"k2v":[[0,{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],[1,{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}],[2,{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],[3,{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}}],["2-arg",{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],["0-arg",{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],["1-arg",{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}]],"v2k":{}},".meta":{"timing":0}}},".meta":{"timing":0}} +{"dataflow":{".meta":{"timing":0},"graph":{"_idMap":{"size":7,"k2v":[[0,{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],[1,{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}],[2,{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],[3,{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}}],["2-arg",{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],["0-arg",{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],["1-arg",{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}]],"v2k":{}},"_unknownSideEffects":[],"rootVertices":[0,1,2],"vertexInformation":[[0,{"tag":"use","id":0}],[1,{"tag":"value","id":1}],[2,{"tag":"function-call","id":2,"name":"+","onlyBuiltin":true,"args":[{"nodeId":0,"type":32},{"nodeId":1,"type":32}]}]],"edgeInformation":[[2,[[0,{"types":65}],[1,{"types":65}]]]]}},".meta":{"timing":0}} ``` @@ -990,7 +1185,7 @@ x + 1 Dataflow Graph of the R Code -The analysis required _1.36 ms_ (incl. parse and normalize) within the generation environment. +The analysis required _1.92 ms_ (incl. parse and normalize) within the generation environment. We encountered no unknown side effects during the analysis. @@ -1049,52 +1244,133 @@ flowchart LR Implementation Details -Responsible for the execution of the Normalized AST Query query is `executeNormalizedAstQuery` in [`./src/queries/catalog/normalized-ast-query/normalized-ast-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/normalized-ast-query/normalized-ast-query-executor.ts). +Responsible for the execution of the Dataflow Query query is `executeDataflowQuery` in [`./src/queries/catalog/dataflow-query/dataflow-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/dataflow-query/dataflow-query-executor.ts).
+----- +### Dependencies Query -### Lineage Query +This query extracts all dependencies from an R script, using a combination of [Call-Context Queries](#call-context-query) +and more advanced tracking in the [Dataflow Graph](https://github.com/flowr-analysis/flowr/wiki//Dataflow%20Graph). -This query calculates the _lineage_ of a given slicing criterion. The lineage traces back all parts that the -respective variables stems from given the reads, definitions, and returns in the dataflow graph. +In other words, if you have a script simply reading: `library(x)`, the following query returns the loaded library: -To understand this, let's start with a simple example query, to get the lineage of the second use of `x` in the following code: -```r -x <- 1 -x + +```json +[ { "type": "dependencies" } ] ``` - -For this, we use the criterion `2@x` (which is the first use of `x` in the second line). - + + + +_Results (prettified and summarized):_ + +Query: **dependencies** (1 ms)\ +   ╰ Libraries\ +       ╰ library\ +           ╰ Node Id: 3, `x`\ +_All queries together required ≈1 ms (1ms accuracy, total 4 ms)_ + +
Show Detailed Results as Json + +The analysis required _3.70 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + + ```json -[ - { - "type": "lineage", - "criterion": "2@x" +{ + "dependencies": { + ".meta": { + "timing": 1 + }, + "libraries": [ + { + "nodeId": 3, + "functionName": "library", + "libraryName": "x" + } + ], + "sourcedFiles": [], + "readData": [], + "writtenData": [] + }, + ".meta": { + "timing": 1 } -] +} +``` + + + +
+ + + + + + + +Of course, this works for more complicated scripts too. The query offers information on the loaded _libraries_, _sourced_ files, data which is _read_ and data which is _written_. +For example, consider the following script: + +```r +source("sample.R") +foo <- loadNamespace("bar") + +data <- read.csv("data.csv") + +#' @importFrom ggplot2 ggplot geom_point aes +ggplot(data, aes(x=x, y=y)) + geom_point() + +better::write.csv(data, "data2.csv") +print("hello world!") +``` + +The following query returns the dependencies of the script: + + + +```json +[ { "type": "dependencies" } ] ``` + _Results (prettified and summarized):_ -Query: **lineage** (0 ms)\ -   ╰ 2@x: {3, 0, 1, 2}\ -_All queries together required ≈0 ms (1ms accuracy, total 2 ms)_ +Query: **dependencies** (2 ms)\ +   ╰ Libraries\ +       ╰ loadNamespace\ +           ╰ Node Id: 8, `bar`\ +       ╰ ::\ +           ╰ Node Id: 32, `better`\ +   ╰ Sourced Files\ +       ╰ source\ +           ╰ Node Id: 3, `sample.R`\ +   ╰ Read Data\ +       ╰ read.csv\ +           ╰ Node Id: 14, `data.csv`\ +   ╰ Written Data\ +       ╰ write.csv\ +           ╰ Node Id: 37, `data2.csv`\ +       ╰ print\ +           ╰ Node Id: 41, `stdout`\ +_All queries together required ≈2 ms (1ms accuracy, total 7 ms)_
Show Detailed Results as Json -The analysis required _2.08 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _7.08 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -1104,21 +1380,51 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int ```json { - "lineage": { + "dependencies": { ".meta": { - "timing": 0 + "timing": 2 }, - "lineages": { - "2@x": [ - 3, - 0, - 1, - 2 - ] - } + "libraries": [ + { + "nodeId": 8, + "functionName": "loadNamespace", + "libraryName": "bar" + }, + { + "nodeId": 32, + "functionName": "::", + "libraryName": "better" + } + ], + "sourcedFiles": [ + { + "nodeId": 3, + "functionName": "source", + "file": "sample.R" + } + ], + "readData": [ + { + "nodeId": 14, + "functionName": "read.csv", + "source": "data.csv" + } + ], + "writtenData": [ + { + "nodeId": 37, + "functionName": "write.csv", + "destination": "data2.csv" + }, + { + "nodeId": 41, + "functionName": "print", + "destination": "stdout" + } + ] }, ".meta": { - "timing": 0 + "timing": 2 } } ``` @@ -1133,209 +1439,181 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int -In this simple scenario, the _lineage_ is equivalent to the slice (and in-fact the complete code). -In general the lineage is smaller and makes no executability guarantees. -It is just a quick and neither complete nor sound way to get information on where the variable originates from. - -This query replaces the old [`request-lineage`](https://github.com/flowr-analysis/flowr/wiki//Interface#message-request-lineage) message. +TODO: support a::b, TODO: union on ids if re-loaded?
Implementation Details -Responsible for the execution of the Lineage Query query is `executeLineageQuery` in [`./src/queries/catalog/lineage-query/lineage-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/lineage-query/lineage-query-executor.ts). +Responsible for the execution of the Dependencies Query query is `executeDependenciesQuery` in [`./src/queries/catalog/dependencies-query/dependencies-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/dependencies-query/dependencies-query-executor.ts).
+----- +### Id-Map Query -### Dataflow Cluster Query - - -This query automatically calculates clusters in flowR's dataflow graph -and returns a list of all clusters found. -Clusters are to be interpreted as literal clusters on the graph traversing -edges in both directions. From this perspective, -the code `x <- 1; x` has one cluster (given that all code is related), -while the code `x <- 1; y` has two clusters (given that the `y` has no relation to the previous definition). +This query provides access to all nodes in the [normalized AST](https://github.com/flowr-analysis/flowr/wiki//Normalized%20AST) as a mapping from their id to the node itself. -
Example x <- 1; x +Using the example code `x + 1`, the following query returns all nodes from the code: ```json -[ - { - "type": "dataflow-cluster" - } -] +[ { "type": "id-map" } ] ``` + _Results (prettified and summarized):_ -Query: **dataflow-cluster** (0ms)\ -   ╰ Found 1 cluster\ -      ╰ {3, 0, 1, 2} ([marked](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMXt7XCJgIzkxO1JOdW1iZXIjOTM7IDFcbiAgICAgICgxKVxuICAgICAgKjEuNipgXCJ9fVxuICAgIHN0eWxlIDEgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMFtcImAjOTE7UlN5bWJvbCM5MzsgeFxuICAgICAgKDApXG4gICAgICAqMS4xKmBcIl1cbiAgICBzdHlsZSAwIHN0cm9rZTp0ZWFsLHN0cm9rZS13aWR0aDo3cHgsc3Ryb2tlLW9wYWNpdHk6Ljg7IFxuICAgIDJbW1wiYCM5MTtSQmluYXJ5T3AjOTM7ICM2MDsjNDU7XG4gICAgICAoMilcbiAgICAgICoxLjEtNipcbiAgICAoMCwgMSlgXCJdXVxuICAgIHN0eWxlIDIgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMyhbXCJgIzkxO1JTeW1ib2wjOTM7IHhcbiAgICAgICgzKVxuICAgICAgKjEuOSpgXCJdKVxuICAgIHN0eWxlIDMgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMCAtLT58XCJkZWZpbmVkLWJ5XCJ8IDFcbiAgICAwIC0tPnxcImRlZmluZWQtYnlcInwgMlxuICAgIDIgLS0+fFwiYXJndW1lbnRcInwgMVxuICAgIDIgLS0+fFwicmV0dXJucywgYXJndW1lbnRcInwgMFxuICAgIDMgLS0+fFwicmVhZHNcInwgMCIsIm1lcm1haWQiOnsiYXV0b1N5bmMiOnRydWV9fQ==))\ +Query: **id-map** (0 ms)\ +   ╰ Id List: {0, 1, 2, 3, 2-arg, 0-arg, ... (see JSON below)}\ _All queries together required ≈0 ms (1ms accuracy, total 2 ms)_
Show Detailed Results as Json -The analysis required _1.91 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _2.02 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. +_As the code is pretty long, we inhibit pretty printing and syntax highlighting (JSON):_ +```text +{"id-map":{".meta":{"timing":0},"idMap":{"size":7,"k2v":[[0,{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],[1,{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}],[2,{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],[3,{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}}],["2-arg",{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],["0-arg",{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],["1-arg",{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}]],"v2k":{}}},".meta":{"timing":0}} +``` -```json -{ - "dataflow-cluster": { - ".meta": { - "timing": 0 - }, - "clusters": [ - { - "startNode": 3, - "members": [ - 3, - 0, - 1, - 2 - ], - "hasUnknownSideEffects": false - } - ] - }, - ".meta": { - "timing": 0 - } -} + + +
+ + +
Original Code + + + +```r +x + 1 ``` +
+ +Dataflow Graph of the R Code + +The analysis required _1.44 ms_ (incl. parse and normalize) within the generation environment. +We encountered no unknown side effects during the analysis. + + +```mermaid +flowchart LR + 0(["`#91;RSymbol#93; x + (0) + *1.1*`"]) + 1{{"`#91;RNumber#93; 1 + (1) + *1.5*`"}} + 2[["`#91;RBinaryOp#93; #43; + (2) + *1.1-5* + (0, 1)`"]] + 2 -->|"reads, argument"| 0 + 2 -->|"reads, argument"| 1 +``` + + +
+Mermaid Code +``` +flowchart LR + 0(["`#91;RSymbol#93; x + (0) + *1.1*`"]) + 1{{"`#91;RNumber#93; 1 + (1) + *1.5*`"}} + 2[["`#91;RBinaryOp#93; #43; + (2) + *1.1-5* + (0, 1)`"]] + 2 -->|"reads, argument"| 0 + 2 -->|"reads, argument"| 1 +```
+
- - -
- + -
Example x <- 1; y + + -```json -[ - { - "type": "dataflow-cluster" - } -] -``` +
+Implementation Details +Responsible for the execution of the Id-Map Query query is `executeIdMapQuery` in [`./src/queries/catalog/id-map-query/id-map-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/id-map-query/id-map-query-executor.ts). -_Results (prettified and summarized):_ +
-Query: **dataflow-cluster** (0ms)\ -   ╰ Found 2 clusters\ -      ╰ {3} ([marked](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMXt7XCJgIzkxO1JOdW1iZXIjOTM7IDFcbiAgICAgICgxKVxuICAgICAgKjEuNipgXCJ9fVxuICAgIDBbXCJgIzkxO1JTeW1ib2wjOTM7IHhcbiAgICAgICgwKVxuICAgICAgKjEuMSpgXCJdXG4gICAgMltbXCJgIzkxO1JCaW5hcnlPcCM5MzsgIzYwOyM0NTtcbiAgICAgICgyKVxuICAgICAgKjEuMS02KlxuICAgICgwLCAxKWBcIl1dXG4gICAgMyhbXCJgIzkxO1JTeW1ib2wjOTM7IHlcbiAgICAgICgzKVxuICAgICAgKjEuOSpgXCJdKVxuICAgIHN0eWxlIDMgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMCAtLT58XCJkZWZpbmVkLWJ5XCJ8IDFcbiAgICAwIC0tPnxcImRlZmluZWQtYnlcInwgMlxuICAgIDIgLS0+fFwiYXJndW1lbnRcInwgMVxuICAgIDIgLS0+fFwicmV0dXJucywgYXJndW1lbnRcInwgMCIsIm1lcm1haWQiOnsiYXV0b1N5bmMiOnRydWV9fQ==))\ -      ╰ {2, 1, 0} ([marked](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgMXt7XCJgIzkxO1JOdW1iZXIjOTM7IDFcbiAgICAgICgxKVxuICAgICAgKjEuNipgXCJ9fVxuICAgIHN0eWxlIDEgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMFtcImAjOTE7UlN5bWJvbCM5MzsgeFxuICAgICAgKDApXG4gICAgICAqMS4xKmBcIl1cbiAgICBzdHlsZSAwIHN0cm9rZTp0ZWFsLHN0cm9rZS13aWR0aDo3cHgsc3Ryb2tlLW9wYWNpdHk6Ljg7IFxuICAgIDJbW1wiYCM5MTtSQmluYXJ5T3AjOTM7ICM2MDsjNDU7XG4gICAgICAoMilcbiAgICAgICoxLjEtNipcbiAgICAoMCwgMSlgXCJdXVxuICAgIHN0eWxlIDIgc3Ryb2tlOnRlYWwsc3Ryb2tlLXdpZHRoOjdweCxzdHJva2Utb3BhY2l0eTouODsgXG4gICAgMyhbXCJgIzkxO1JTeW1ib2wjOTM7IHlcbiAgICAgICgzKVxuICAgICAgKjEuOSpgXCJdKVxuICAgIDAgLS0+fFwiZGVmaW5lZC1ieVwifCAxXG4gICAgMCAtLT58XCJkZWZpbmVkLWJ5XCJ8IDJcbiAgICAyIC0tPnxcImFyZ3VtZW50XCJ8IDFcbiAgICAyIC0tPnxcInJldHVybnMsIGFyZ3VtZW50XCJ8IDAiLCJtZXJtYWlkIjp7ImF1dG9TeW5jIjp0cnVlfX0=))\ -_All queries together required ≈0 ms (1ms accuracy, total 1 ms)_ -
Show Detailed Results as Json +----- -The analysis required _1.30 ms_ (including parsing and normalization and the query) within the generation environment. -In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. -Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. +### Lineage Query +This query calculates the _lineage_ of a given slicing criterion. The lineage traces back all parts that the +respective variables stems from given the reads, definitions, and returns in the dataflow graph. +To understand this, let's start with a simple example query, to get the lineage of the second use of `x` in the following code: -```json -{ - "dataflow-cluster": { - ".meta": { - "timing": 0 - }, - "clusters": [ - { - "startNode": 3, - "members": [ - 3 - ], - "hasUnknownSideEffects": false - }, - { - "startNode": 2, - "members": [ - 2, - 1, - 0 - ], - "hasUnknownSideEffects": false - } - ] - }, - ".meta": { - "timing": 0 - } -} +```r +x <- 1 +x ``` + +For this, we use the criterion `2@x` (which is the first use of `x` in the second line). + -
- - - - - - - -
- - -Using the example code from above, the following query returns all clusters: - ```json [ { - "type": "dataflow-cluster" + "type": "lineage", + "criterion": "2@x" } ] ``` + _Results (prettified and summarized):_ -Query: **dataflow-cluster** (0ms)\ -   ╰ Found 5 clusters\ -      ╰ {89, 87, 85, 82, 18, 22, ... (see JSON below)} ([marked](https://mermaid.live/view#base64:))\ -      ╰ {55, 52, 38, 12, 16, 14, ... (see JSON below)} ([marked](https://mermaid.live/view#base64:))\ -      ╰ (has unknown side effect) {11, 9} ([marked](https://mermaid.live/view#base64:))\ -      ╰ (has unknown side effect) {7, 5} ([marked](https://mermaid.live/view#base64:))\ -      ╰ (has unknown side effect) {3, 1} ([marked](https://mermaid.live/view#base64:))\ -_All queries together required ≈0 ms (1ms accuracy, total 5 ms)_ +Query: **lineage** (1 ms)\ +   ╰ 2@x: {3, 0, 1, 2}\ +_All queries together required ≈1 ms (1ms accuracy, total 2 ms)_
Show Detailed Results as Json -The analysis required _5.41 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _2.17 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -1345,96 +1623,21 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int ```json { - "dataflow-cluster": { + "lineage": { ".meta": { - "timing": 0 + "timing": 1 }, - "clusters": [ - { - "startNode": 89, - "members": [ - 89, - 87, - 85, - 82, - 18, - 22, - 20, - 23, - 57, - 60, - 58, - 67, - 65, - 62, - 63, - 69, - 72, - 70, - 79, - 77, - 74, - 75, - 83 - ], - "hasUnknownSideEffects": false - }, - { - "startNode": 55, - "members": [ - 55, - 52, - 38, - 12, - 16, - 14, - 17, - 26, - 29, - 27, - 31, - 32, - 24, - 34, - 36, - 50, - 48, - 43, - 44, - 46, - 47, - 54 - ], - "hasUnknownSideEffects": false - }, - { - "startNode": 11, - "members": [ - 11, - 9 - ], - "hasUnknownSideEffects": true - }, - { - "startNode": 7, - "members": [ - 7, - 5 - ], - "hasUnknownSideEffects": true - }, - { - "startNode": 3, - "members": [ - 3, - 1 - ], - "hasUnknownSideEffects": true - } - ] + "lineages": { + "2@x": [ + 3, + 0, + 1, + 2 + ] + } }, ".meta": { - "timing": 0 + "timing": 1 } } ``` @@ -1448,47 +1651,53 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int + +In this simple scenario, the _lineage_ is equivalent to the slice (and in-fact the complete code). +In general the lineage is smaller and makes no executability guarantees. +It is just a quick and neither complete nor sound way to get information on where the variable originates from. + +This query replaces the old [`request-lineage`](https://github.com/flowr-analysis/flowr/wiki//Interface#message-request-lineage) message. +
Implementation Details -Responsible for the execution of the Dataflow Cluster Query query is `executeDataflowClusterQuery` in [`./src/queries/catalog/cluster-query/cluster-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/cluster-query/cluster-query-executor.ts). +Responsible for the execution of the Lineage Query query is `executeLineageQuery` in [`./src/queries/catalog/lineage-query/lineage-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/lineage-query/lineage-query-executor.ts).
+----- +### Normalized AST Query -### Id-Map Query +Maybe you want to handle only the result of the query execution, or you just need the [normalized AST](https://github.com/flowr-analysis/flowr/wiki//Normalized%20AST) again. +This query type does exactly that! -This query provides access to all nodes in the [normalized AST](https://github.com/flowr-analysis/flowr/wiki//Normalized%20AST) as a mapping from their id to the node itself. +Using the example code `x + 1`, the following query returns the normalized AST of the code: -Using the example code `x + 1`, the following query returns all nodes from the code: ```json -[ - { - "type": "id-map" - } -] +[ { "type": "normalized-ast" } ] ``` + _Results (prettified and summarized):_ -Query: **id-map** (0 ms)\ -   ╰ Id List: {0, 1, 2, 3, 2-arg, 0-arg, ... (see JSON below)}\ +Query: **normalized-ast** (0 ms)\ +   ╰ [Normalized AST](https://mermaid.live/view#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgbjMoW1wiUkV4cHJlc3Npb25MaXN0ICgzKVxuIFwiXSlcbiAgICBuMihbXCJSQmluYXJ5T3AgKDIpXG4jNDM7XCJdKVxuICAgIG4zIC0tPnxcImV4cHItbGlzdC1jaGlsZC0wXCJ8IG4yXG4gICAgbjAoW1wiUlN5bWJvbCAoMClcbnhcIl0pXG4gICAgbjIgLS0+fFwiYmlub3AtbGhzXCJ8IG4wXG4gICAgbjEoW1wiUk51bWJlciAoMSlcbjFcIl0pXG4gICAgbjIgLS0+fFwiYmlub3AtcmhzXCJ8IG4xXG4iLCJtZXJtYWlkIjp7ImF1dG9TeW5jIjp0cnVlfX0=)\ _All queries together required ≈0 ms (1ms accuracy, total 2 ms)_
Show Detailed Results as Json -The analysis required _2.08 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _1.58 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -1497,7 +1706,7 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int _As the code is pretty long, we inhibit pretty printing and syntax highlighting (JSON):_ ```text -{"id-map":{".meta":{"timing":0},"idMap":{"size":7,"k2v":[[0,{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],[1,{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}],[2,{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],[3,{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}}],["2-arg",{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],["0-arg",{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],["1-arg",{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}]],"v2k":{}}},".meta":{"timing":0}} +{"normalized-ast":{".meta":{"timing":0},"normalized":{"ast":{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}},"idMap":{"size":7,"k2v":[[0,{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],[1,{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}],[2,{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],[3,{"type":"RExpressionList","children":[{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],"info":{"additionalTokens":[],"id":3,"nesting":0,"role":"root","index":0}}],["2-arg",{"type":"RBinaryOp","location":[1,3,1,3],"lhs":{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}},"rhs":{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}},"operator":"+","lexeme":"+","info":{"fullRange":[1,1,1,5],"additionalTokens":[],"fullLexeme":"x + 1","id":2,"parent":3,"nesting":0,"index":0,"role":"expr-list-child"}}],["0-arg",{"type":"RSymbol","location":[1,1,1,1],"content":"x","lexeme":"x","info":{"fullRange":[1,1,1,1],"additionalTokens":[],"fullLexeme":"x","id":0,"parent":2,"role":"binop-lhs","index":0,"nesting":0}}],["1-arg",{"location":[1,5,1,5],"lexeme":"1","info":{"fullRange":[1,5,1,5],"additionalTokens":[],"fullLexeme":"1","id":1,"parent":2,"role":"binop-rhs","index":1,"nesting":0},"type":"RNumber","content":{"num":1,"complexNumber":false,"markedAsInt":false}}]],"v2k":{}},".meta":{"timing":0}}},".meta":{"timing":0}} ``` @@ -1516,7 +1725,7 @@ x + 1 Dataflow Graph of the R Code -The analysis required _1.23 ms_ (incl. parse and normalize) within the generation environment. +The analysis required _1.48 ms_ (incl. parse and normalize) within the generation environment. We encountered no unknown side effects during the analysis. @@ -1575,12 +1784,12 @@ flowchart LR Implementation Details -Responsible for the execution of the Id-Map Query query is `executeIdMapQuery` in [`./src/queries/catalog/id-map-query/id-map-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/id-map-query/id-map-query-executor.ts). +Responsible for the execution of the Normalized AST Query query is `executeNormalizedAstQuery` in [`./src/queries/catalog/normalized-ast-query/normalized-ast-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/catalog/normalized-ast-query/normalized-ast-query-executor.ts).
- +----- ### Static Slice Query @@ -1603,6 +1812,7 @@ If you are interested in the parts required for the use of `x` in the last line, + ```json [ { @@ -1616,16 +1826,17 @@ If you are interested in the parts required for the use of `x` in the last line, + _Results (prettified and summarized):_ -Query: **static-slice** (2 ms)\ +Query: **static-slice** (3 ms)\    ╰ Slice for {3@x} \      ╰ Code (newline as \n): x <- 1\\nx\ -_All queries together required ≈2 ms (1ms accuracy, total 4 ms)_ +_All queries together required ≈3 ms (1ms accuracy, total 5 ms)_
Show Detailed Results as Json -The analysis required _3.74 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _5.03 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -1637,7 +1848,7 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int { "static-slice": { ".meta": { - "timing": 2 + "timing": 3 }, "results": { "{\"type\":\"static-slice\",\"criteria\":[\"3@x\"]}": { @@ -1663,14 +1874,14 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int "code": "x <- 1\nx", "linesWithAutoSelected": 0, ".meta": { - "timing": 0 + "timing": 1 } } } } }, ".meta": { - "timing": 2 + "timing": 3 } } ``` @@ -1693,6 +1904,7 @@ you can use the `noReconstruction` flag. + ```json [ { @@ -1707,16 +1919,17 @@ you can use the `noReconstruction` flag. + _Results (prettified and summarized):_ Query: **static-slice** (1 ms)\    ╰ Slice for {3@x} no reconstruction\      ╰ Id List: {6, 0, 1, 2}\ -_All queries together required ≈1 ms (1ms accuracy, total 2 ms)_ +_All queries together required ≈1 ms (1ms accuracy, total 4 ms)_
Show Detailed Results as Json -The analysis required _2.46 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _3.69 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -1802,6 +2015,7 @@ assigned to the kind `visualize` and the subkind `text` (using the example code + ```json [ { @@ -1825,16 +2039,17 @@ assigned to the kind `visualize` and the subkind `text` (using the example code + _Results (prettified and summarized):_ Query: **call-context** (0 ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9), _`print`_ (L.10), _`mean`_ (L.19), _`print`_ (L.19)\ -_All queries together required ≈0 ms (1ms accuracy, total 5 ms)_ +_All queries together required ≈0 ms (1ms accuracy, total 10 ms)_
Show Detailed Results as Json -The analysis required _5.18 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _9.52 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -1889,6 +2104,7 @@ Of course, in this specific scenario, the following query would be equivalent: + ```json [ { @@ -1900,18 +2116,19 @@ Of course, in this specific scenario, the following query would be equivalent: ] ``` +
Show Results _Results (prettified and summarized):_ -Query: **call-context** (0 ms)\ +Query: **call-context** (1 ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9), _`print`_ (L.10), _`mean`_ (L.19), _`print`_ (L.19)\ -_All queries together required ≈0 ms (1ms accuracy, total 5 ms)_ +_All queries together required ≈1 ms (1ms accuracy, total 7 ms)_
Show Detailed Results as Json -The analysis required _5.17 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _7.42 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -1923,7 +2140,7 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int { "call-context": { ".meta": { - "timing": 0 + "timing": 1 }, "kinds": { "visualize": { @@ -1947,7 +2164,7 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int } }, ".meta": { - "timing": 0 + "timing": 1 } } ``` @@ -1969,6 +2186,7 @@ want to resolve to a local definition: + ```json [ { @@ -1994,16 +2212,17 @@ want to resolve to a local definition: + _Results (prettified and summarized):_ Query: **call-context** (0 ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9) with 1 call (_built-in_), _`mean`_ (L.19) with 1 call (_built-in_)\ -_All queries together required ≈0 ms (1ms accuracy, total 5 ms)_ +_All queries together required ≈0 ms (1ms accuracy, total 8 ms)_
Show Detailed Results as Json -The analysis required _5.44 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _7.87 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those.