Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

do not track default values for parameters if given as argument #783

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/dataflow/internal/linker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ export function produceNameSharedIdMap(references: IdentifierReference[]): NameI
return nameIdShares
}

// TODO: pmatch + identify given arguments, mark them with the call id

export function linkArgumentsOnCall(args: FunctionArgument[], params: RParameter<ParentInformation>[], graph: DataflowGraph): void {
const nameArgMap = new Map<string, IdentifierReference>(args.filter(isNamedArgument).map(a => [a.name, a] as const))
const nameParamMap = new Map<string, RParameter<ParentInformation>>(params.map(p => [p.name.content, p]))
Expand Down
37 changes: 25 additions & 12 deletions src/slicing/static/slice-call.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ export function sliceForCall(current: NodeToSlice, callerInfo: DataflowGraphVert
const activeEnvironment = retrieveActiveEnvironment(callerInfo, baseEnvironment)
const activeEnvironmentFingerprint = envFingerprint(activeEnvironment)

const name = callerInfo.name
guard(name !== undefined, () => `name of id: ${callerInfo.id} can not be found in id map`)
const functionCallDefs = resolveByName(name, activeEnvironment)?.filter(d => d.definedAt !== BuiltIn)?.map(d => d.nodeId) ?? []
const functionCallDefs = resolveByName(callerInfo.name, activeEnvironment)?.filter(d => d.definedAt !== BuiltIn)?.map(d => d.nodeId) ?? []

for(const [target, outgoingEdge] of outgoingEdges[1].entries()) {
if(edgeIncludesType(outgoingEdge.types, EdgeType.Calls)) {
Expand All @@ -56,26 +54,38 @@ export function sliceForCall(current: NodeToSlice, callerInfo: DataflowGraphVert

const functionCallTargets = getAllLinkedFunctionDefinitions(new Set(functionCallDefs), dataflowGraph)

for(const [_, functionCallTarget] of functionCallTargets) {
const callStack = [...current.callStack ?? [], callerInfo.id]

for(const [, functionCallTarget] of functionCallTargets) {
// all those linked within the scopes of other functions are already linked when exiting a function definition
for(const openIn of (functionCallTarget as DataflowGraphVertexFunctionDefinition).subflow.in) {
const defs = openIn.name ? resolveByName(openIn.name, activeEnvironment) : undefined
if(defs === undefined) {
continue
}
for(const def of defs.filter(d => d.nodeId !== BuiltIn)) {
queue.add(def.nodeId, baseEnvironment, baseEnvPrint, current.onlyForSideEffects)
queue.add({
id: def.nodeId,
baseEnvironment,
onlyForSideEffects: current.onlyForSideEffects,
callStack
}, baseEnvPrint)
}
}

for(const exitPoint of (functionCallTarget as DataflowGraphVertexFunctionDefinition).exitPoints) {
queue.add(exitPoint, activeEnvironment, activeEnvironmentFingerprint, current.onlyForSideEffects)
queue.add({
id: exitPoint,
baseEnvironment: activeEnvironment,
onlyForSideEffects: current.onlyForSideEffects,
callStack
}, activeEnvironmentFingerprint)
}
}
}

/** Returns true if we found at least one return edge */
export function handleReturns(queue: VisitingQueue, currentEdges: OutgoingEdges, baseEnvFingerprint: Fingerprint, baseEnvironment: REnvironmentInformation): boolean {
/** Returns true if we found at least one return edge, applies all of them to the queue */
export function handleReturns(current: NodeToSlice, queue: VisitingQueue, currentEdges: OutgoingEdges, baseEnvFingerprint: Fingerprint, baseEnvironment: REnvironmentInformation): boolean {
let found = false
for(const [, edge] of currentEdges) {
if(edgeIncludesType(edge.types, EdgeType.Returns)) {
Expand All @@ -87,10 +97,13 @@ export function handleReturns(queue: VisitingQueue, currentEdges: OutgoingEdges,
return false
}
for(const [target, edge] of currentEdges) {
if(edgeIncludesType(edge.types, EdgeType.Returns)) {
queue.add(target, baseEnvironment, baseEnvFingerprint, false)
} else if(edgeIncludesType(edge.types, EdgeType.Reads)) {
queue.add(target, baseEnvironment, baseEnvFingerprint, false)
if(edgeIncludesType(edge.types, EdgeType.Returns | EdgeType.Reads)) {
queue.add({
id: target,
baseEnvironment,
onlyForSideEffects: false,
callStack: current.callStack
}, baseEnvFingerprint)
} else if(edgeIncludesType(edge.types, EdgeType.Argument)) {
queue.potentialArguments.add(target)
}
Expand Down
12 changes: 12 additions & 0 deletions src/slicing/static/slicer-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ import type { DecodedCriteria } from '../criterion/parse'
*/
export interface NodeToSlice {
readonly id: NodeId
/**
* The call stack the node is visited in.
* Please note that this is explicitly not part of the node-fingerprint. The same call stack
* can have different consequences (e.g., in a loop in which the iteration does not push a new call
* frame but may have implications on the values of variables etc.).
* Such changes are always implied by the (base)Environment.
* On the other hand, the call stack is _not_ a set as it is easily possible to have the same node
* within it multiple times (e.g., in the case of recursion).
*
* Call stacks may be undefined if we do not need/want to know it
*/
readonly callStack: readonly NodeId[] | undefined
/** used for calling context, etc. */
readonly baseEnvironment: REnvironmentInformation
/** if we add a function call, we may need it only for its side effects (e.g., a redefinition of a global variable), if so, 'returns' links will not be traced */
Expand Down
37 changes: 31 additions & 6 deletions src/slicing/static/static-slicer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ export function staticSlicing(graph: DataflowGraph, ast: NormalizedAst, criteria
const emptyEnv = initializeCleanEnvironments()
const basePrint = envFingerprint(emptyEnv)
for(const startId of decodedCriteria) {
queue.add(startId.id, emptyEnv, basePrint, false)
queue.add({
id: startId.id,
callStack: [],
baseEnvironment: emptyEnv,
onlyForSideEffects: false
}, basePrint)
// retrieve the minimum depth of all nodes to only add control dependencies if they are "part" of the current execution
minDepth = Math.min(minDepth, ast.idMap.get(startId.id)?.info.depth ?? minDepth)
sliceSeedIds.add(startId.id)
Expand All @@ -59,7 +64,12 @@ export function staticSlicing(graph: DataflowGraph, ast: NormalizedAst, criteria
const topLevel = graph.isRoot(id) || sliceSeedIds.has(id)
for(const cd of currentVertex.controlDependencies) {
if(!topLevel || (ast.idMap.get(cd.id)?.info.depth ?? 0) <= minDepth) {
queue.add(cd.id, baseEnvironment, baseEnvFingerprint, false)
queue.add({
id: cd.id,
baseEnvironment,
callStack: [],
onlyForSideEffects: false
}, baseEnvFingerprint)
}
}
}
Expand All @@ -69,7 +79,7 @@ export function staticSlicing(graph: DataflowGraph, ast: NormalizedAst, criteria
sliceForCall(current, currentVertex, graph, queue)
}

const ret = handleReturns(queue, currentEdges, baseEnvFingerprint, baseEnvironment)
const ret = handleReturns(current, queue, currentEdges, baseEnvFingerprint, baseEnvironment)
if(ret) {
continue
}
Expand All @@ -81,12 +91,27 @@ export function staticSlicing(graph: DataflowGraph, ast: NormalizedAst, criteria
}
const t = shouldTraverseEdge(types)
if(t === TraverseEdge.Always) {
queue.add(target, baseEnvironment, baseEnvFingerprint, false)
queue.add({
id: target,
baseEnvironment,
callStack: current.callStack,
onlyForSideEffects: false
}, baseEnvFingerprint)
} else if(t === TraverseEdge.DefinedByOnCall && queue.potentialArguments.has(target)) {
queue.add(target, baseEnvironment, baseEnvFingerprint, false)
queue.add({
id: target,
baseEnvironment,
callStack: current.callStack,
onlyForSideEffects: false
}, baseEnvFingerprint)
queue.potentialArguments.delete(target)
} else if(t === TraverseEdge.SideEffect) {
queue.add(target, baseEnvironment, baseEnvFingerprint, true)
queue.add({
id: target,
baseEnvironment,
callStack: current.callStack,
onlyForSideEffects: true
}, baseEnvFingerprint)
}
}
}
Expand Down
22 changes: 10 additions & 12 deletions src/slicing/static/visiting-queue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import type { Fingerprint } from './fingerprint'
import { fingerprint } from './fingerprint'
import type { NodeToSlice, SliceResult } from './slicer-types'
import { slicerLogger } from './static-slicer'
import type { REnvironmentInformation } from '../../dataflow/environments/environment'
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'

export class VisitingQueue {
Expand All @@ -20,26 +19,25 @@ export class VisitingQueue {

/**
* Adds a node to the queue if it has not been seen before.
* @param target - the node to add
* @param env - the environment the node is traversed in
* @param envFingerprint - the fingerprint of the environment
* @param onlyForSideEffects - whether the node is only used for its side effects
* @param node - the {@link NodeToSlice|node} to add
* @param envFingerprint - The environment fingerprint is passed separately to encourage external caching.
*/
public add(target: NodeId, env: REnvironmentInformation, envFingerprint: string, onlyForSideEffects: boolean): void {
const idCounter = this.idThreshold.get(target) ?? 0
public add(node: NodeToSlice, envFingerprint: string): void {
const { id, onlyForSideEffects } = node
const idCounter = this.idThreshold.get(id) ?? 0
if(idCounter > this.threshold) {
slicerLogger.warn(`id: ${target} has been visited ${idCounter} times, skipping`)
slicerLogger.warn(`id: ${id} has been visited ${idCounter} times, skipping`)
this.timesHitThreshold++
return
}

/* we do not include the in call part in the fingerprint as it is 'deterministic' from the source position */
const print = fingerprint(target, envFingerprint, onlyForSideEffects)
const print = fingerprint(id, envFingerprint, onlyForSideEffects)

if(!this.seen.has(print)) {
this.idThreshold.set(target, idCounter + 1)
this.seen.set(print, target)
this.queue.push({ id: target, baseEnvironment: env, onlyForSideEffects })
this.idThreshold.set(id, idCounter + 1)
this.seen.set(print, id)
this.queue.push(node)
}
}

Expand Down
13 changes: 9 additions & 4 deletions test/functionality/slicing/static-program-slices/calls-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,15 @@ x <- 2
b()`)
})
describe('Functions with named arguments', () => {
const code = `a <- function(x=4) { x }
a(x = 3)`
assertSliced(label('Must include function definition', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'implicit-return', 'newlines', 'named-arguments','resolve-arguments', 'numbers']),
shell, code, ['2@a'], code)
/* if we give an explicit value to the parameter, we do not need the default value */
assertSliced(label('Must include function definition (named arg)', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'implicit-return', 'newlines', 'named-arguments','resolve-arguments', 'numbers']),
shell, 'a <- function(x=4) { x }\na(x = 3)', ['2@a'], 'a <- function(x) { x }\na(x = 3)')
assertSliced(label('Must include function definition (unnamed arg)', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'implicit-return', 'newlines', 'unnamed-arguments','resolve-arguments', 'numbers']),
shell, 'a <- function(x=4) { x }\na(3)', ['2@a'], 'a <- function(x) { x }\na(3)')
assertSliced(label('Must include function definition (no arg)', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'implicit-return', 'newlines','resolve-arguments', 'numbers']),
shell, 'a <- function(x=4) { x }\na()', ['2@a'], 'a <- function(x=4) { x }\na()')
assertSliced(label('Must include function definition (combined)', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'implicit-return', 'newlines', 'named-arguments', 'unnamed-arguments','resolve-arguments', 'numbers', ...OperatorDatabase['+'].capabilities]),
shell, 'a <- function(x=4) { x }\ny <- a() + a(x = 3)', ['2@y'], 'a <- function(x=4) { x }\ny <- a() + a(x = 3)')

assertSliced(label('Must work for same named arguments too', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'named-arguments', 'newlines']),
shell, 'a <- 3\nb <- foo(a=a)', ['2@b'], 'a <- 3\nb <- foo(a=a)')
Expand Down
Loading