Skip to content

Commit

Permalink
[Bug]: Linking Overshadowed Built-Ins in Loops (#957)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce authored Sep 9, 2024
2 parents 57df8b4 + 5825043 commit adb9748
Show file tree
Hide file tree
Showing 12 changed files with 166 additions and 130 deletions.
24 changes: 12 additions & 12 deletions src/dataflow/environments/built-in.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,24 +174,24 @@ registerSimpleFunctions(
'do.call', 'rbind', 'nrow', 'ncol', 'tryCatch', 'expression', 'factor',
'missing', 'as.data.frame', 'data.frame', 'na.omit', 'rownames', 'names', 'order', 'length', 'any', 'dim', 'matrix', 'cbind', 'nchar', 't'
)
registerBuiltInFunctions(true, ['lapply', 'sapply', 'vapply', 'mapply'], processApply, { indexOfFunction: 1, nameOfFunctionArgument: 'FUN' } )
registerBuiltInFunctions(false, ['lapply', 'sapply', 'vapply', 'mapply'], processApply, { indexOfFunction: 1, nameOfFunctionArgument: 'FUN' } )
/* functool wrappers */
registerBuiltInFunctions(true, ['Lapply', 'Sapply', 'Vapply', 'Mapply'], processApply, { indexOfFunction: 1, nameOfFunctionArgument: 'FUN' } )
registerBuiltInFunctions(true, ['apply', 'tapply', 'Tapply'], processApply, { indexOfFunction: 2, nameOfFunctionArgument: 'FUN' } )
registerBuiltInFunctions(true, ['print'], defaultBuiltInProcessor, { returnsNthArgument: 0, forceArgs: 'all' as const } )
registerBuiltInFunctions(false, ['Lapply', 'Sapply', 'Vapply', 'Mapply'], processApply, { indexOfFunction: 1, nameOfFunctionArgument: 'FUN' } )
registerBuiltInFunctions(false, ['apply', 'tapply', 'Tapply'], processApply, { indexOfFunction: 2, nameOfFunctionArgument: 'FUN' } )
registerBuiltInFunctions(false, ['print'], defaultBuiltInProcessor, { returnsNthArgument: 0, forceArgs: 'all' as const } )
registerBuiltInFunctions(true, ['('], defaultBuiltInProcessor, { returnsNthArgument: 0 } )
registerBuiltInFunctions(true, ['load', 'load_all', 'setwd', 'set.seed'], defaultBuiltInProcessor, { hasUnknownSideEffects: true, forceArgs: [true] } )
registerBuiltInFunctions(false, ['load', 'load_all', 'setwd', 'set.seed'], defaultBuiltInProcessor, { hasUnknownSideEffects: true, forceArgs: [true] } )
registerBuiltInFunctions(false, ['cat'], defaultBuiltInProcessor, { forceArgs: 'all' as const } ) /* returns null */
registerBuiltInFunctions(false, ['switch'], defaultBuiltInProcessor, {} ) /* returns null */
registerBuiltInFunctions(true, ['return'], defaultBuiltInProcessor, { returnsNthArgument: 0, cfg: ExitPointType.Return } )
registerBuiltInFunctions(true, ['break'], defaultBuiltInProcessor, { cfg: ExitPointType.Break } )
registerBuiltInFunctions(true, ['next'], defaultBuiltInProcessor, { cfg: ExitPointType.Next } )
registerBuiltInFunctions(true, ['{'], processExpressionList, {} )
registerBuiltInFunctions(true, ['source'], processSourceCall, { includeFunctionCall: true, forceFollow: false } )
registerBuiltInFunctions(false, ['source'], processSourceCall, { includeFunctionCall: true, forceFollow: false } )
registerBuiltInFunctions(true, ['[', '[['], processAccess, { treatIndicesAsString: false } )
registerBuiltInFunctions(true, ['$', '@'], processAccess, { treatIndicesAsString: true } )
registerBuiltInFunctions(true, ['if', 'ifelse'], processIfThenElse, {} )
registerBuiltInFunctions(true, ['get'], processGet, {} )
registerBuiltInFunctions(false, ['get'], processGet, {} )
registerBuiltInFunctions(false, ['library', 'require'], processLibrary, {} )
registerBuiltInFunctions(true, ['<-', '='], processAssignment, { canBeReplacement: true } )
registerBuiltInFunctions(true, [':=', 'assign'], processAssignment, {} )
Expand All @@ -207,14 +207,14 @@ registerBuiltInFunctions(true, ['quote', 'substitute', 'bquote'],
registerBuiltInFunctions(true, ['for'], processForLoop, {} )
registerBuiltInFunctions(true, ['repeat'], processRepeatLoop, {} )
registerBuiltInFunctions(true, ['while'], processWhileLoop, {} )
registerBuiltInFunctions(true, ['options'], defaultBuiltInProcessor, { hasUnknownSideEffects: true, forceArgs: 'all' as const } )
registerBuiltInFunctions(true, ['on.exit', 'sys.on.exit'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
registerBuiltInFunctions(false, ['options'], defaultBuiltInProcessor, { hasUnknownSideEffects: true, forceArgs: 'all' as const } )
registerBuiltInFunctions(false, ['on.exit', 'sys.on.exit'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
/* library and require is handled above */
registerBuiltInFunctions(true, ['requireNamespace', 'loadNamespace', 'attachNamespace', 'asNamespace'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
registerBuiltInFunctions(false, ['requireNamespace', 'loadNamespace', 'attachNamespace', 'asNamespace'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
/* downloader and installer functions (R, devtools, BiocManager) */
registerBuiltInFunctions(true, ['library.dynam', 'install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
registerBuiltInFunctions(false, ['library.dynam', 'install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
/* weird env attachments */
registerBuiltInFunctions(true, ['attach'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
registerBuiltInFunctions(false, ['attach'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )

/* they are all mapped to `<-` but we separate super assignments */
registerReplacementFunctions({ makeMaybe: true }, ['<-', '<<-'], '[', '[[', '$', '@', 'names', 'dimnames', 'attributes', 'attr', 'class', 'levels', 'rownames', 'colnames')
32 changes: 31 additions & 1 deletion src/dataflow/internal/linker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,36 @@ import type { REnvironmentInformation } from '../environments/environment'

export type NameIdMap = DefaultMap<string, IdentifierReference[]>

export function findNonLocalReads(graph: DataflowGraph): IdentifierReference[] {
const ids = new Set(
[...graph.vertices(true)]
.filter(([_, info]) => info.tag === VertexType.Use || info.tag === VertexType.FunctionCall)
.map(([id, _]) => id)
)
/* find all variable use ids which do not link to a given id */
const nonLocalReads: IdentifierReference[] = []
for(const id of ids) {
const outgoing = graph.outgoingEdges(id)
if(outgoing === undefined) {
continue
}
for(const [target, { types }] of outgoing) {
if(edgeIncludesType(types, EdgeType.Reads) && !ids.has(target)) {
const name = recoverName(id, graph.idMap)
if(!name) {
dataflowLogger.warn('found non-local read without name for id ' + id)
}
nonLocalReads.push({
name: recoverName(id, graph.idMap),
nodeId: id,
controlDependencies: undefined
})
}
}
}
return nonLocalReads
}

export function produceNameSharedIdMap(references: IdentifierReference[]): NameIdMap {
const nameIdShares = new DefaultMap<string, IdentifierReference[]>(() => [])
for(const reference of references) {
Expand Down Expand Up @@ -270,7 +300,7 @@ export function linkInputs(referencesToLinkAgainstEnvironment: readonly Identifi
* `x_2` must get a read marker to `x_1` as `x_1` is the active redefinition in the second loop iteration.
*/
export function linkCircularRedefinitionsWithinALoop(graph: DataflowGraph, openIns: NameIdMap, outgoing: readonly IdentifierReference[]): void {
// first we preprocess out so that only the last definition of a given identifier survives
// first, we preprocess out so that only the last definition of a given identifier survives
// this implicitly assumes that the outgoing references are ordered
const lastOutgoing = new Map<string, IdentifierReference>()
for(const out of outgoing) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ export function processExpressionList<OtherInfo>(
out,
environment: environment,
graph: nextGraph,
/* if we have no group we take the last evaluated expr */
/* if we have no group, we take the last evaluated expr */
entryPoint: meId,
exitPoints: withGroup ? [{ nodeId: rootId, type: ExitPointType.Default, controlDependencies: data.controlDependencies }]
: exitPoints
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { processDataflowFor } from '../../../../../processor'
import type { DataflowInformation } from '../../../../../info'
import { filterOutLoopExitPoints, alwaysExits } from '../../../../../info'
import {
findNonLocalReads,
linkCircularRedefinitionsWithinALoop,
produceNameSharedIdMap
} from '../../../../linker'
Expand All @@ -17,10 +18,9 @@ import type { NodeId } from '../../../../../../r-bridge/lang-4.x/ast/model/proce
import { overwriteEnvironment } from '../../../../../environments/overwrite'
import { define } from '../../../../../environments/define'
import { appendEnvironment } from '../../../../../environments/append'
import { initializeCleanEnvironments, makeAllMaybe } from '../../../../../environments/environment'
import { makeAllMaybe } from '../../../../../environments/environment'
import { EdgeType } from '../../../../../graph/edge'
import type { RSymbol } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol'
import { pushLocalEnvironment } from '../../../../../environments/scoping'

export function processForLoop<OtherInfo>(
name: RSymbol<OtherInfo & ParentInformation>,
Expand Down Expand Up @@ -56,41 +56,19 @@ export function processForLoop<OtherInfo>(
headEnvironments = define({ ...write, definedAt: name.info.id, kind: 'variable' }, false, headEnvironments)
}
data = { ...data, environment: headEnvironments }
/* process the body without any environment first, to retrieve all open references */
let environment = initializeCleanEnvironments(false)
while(headEnvironments.level > environment.level) {
environment = pushLocalEnvironment(environment)
}
const body = processDataflowFor(bodyArg, { ...data, environment })

const body = processDataflowFor(bodyArg, data)

const nextGraph = headGraph.mergeWith(body.graph)
const outEnvironment = appendEnvironment(headEnvironments, body.environment)

// again within an if-then-else we consider all actives to be read
// currently I add it at the end, but is this correct?
const ingoing = [
...vector.in,
...makeAllMaybe(body.in, nextGraph, outEnvironment, false),
...vector.unknownReferences,
...makeAllMaybe(body.unknownReferences, nextGraph, outEnvironment, false)
]

// now we have to bind all open reads with the given name to the locally defined writtenVariable!
const nameIdShares = produceNameSharedIdMap(ingoing)
// now we have to identify all reads that may be effected by a circular redefinition
// for this, we search for all reads with a non-local read resolve!
const nameIdShares = produceNameSharedIdMap(findNonLocalReads(nextGraph))

for(const write of writtenVariable) {
nextGraph.addEdge(write.nodeId, vector.entryPoint, { type: EdgeType.DefinedBy })

const name = write.name
if(name) {
const readIdsToLink = nameIdShares.get(name)
for(const readId of readIdsToLink) {
nextGraph.addEdge(readId.nodeId, write.nodeId, { type: EdgeType.Reads })
}
// now, we remove the name from the id shares as they are no longer necessary
nameIdShares.delete(name)
nextGraph.setDefinitionOfVertex(write)
}
nextGraph.setDefinitionOfVertex(write)
}

const outgoing = [...variable.out, ...writtenVariable, ...makeAllMaybe(body.out, nextGraph, outEnvironment, true)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export function processFunctionDefinition<OtherInfo>(
/* we remove the last argument, as it is the body */
const parameters = args.slice(0, -1)
const bodyArg = unpackArgument(args[args.length - 1])
guard(bodyArg !== undefined, () => `Function Definition ${JSON.stringify(args)} has missing body! Bad!`)
guard(bodyArg !== undefined, () => `Function Definition ${JSON.stringify(args)} has missing body! This is bad!`)

const originalEnvironment = data.environment
// within a function def we do not pass on the outer binds as they could be overwritten when called
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { DataflowProcessorInformation } from '../../../../../processor'
import type { DataflowInformation } from '../../../../../info'
import { filterOutLoopExitPoints } from '../../../../../info'
import {
findNonLocalReads,
linkCircularRedefinitionsWithinALoop,
produceNameSharedIdMap
} from '../../../../linker'
Expand Down Expand Up @@ -32,14 +33,19 @@ export function processRepeatLoop<OtherInfo>(
args: unpacked ? [unpacked] : args,
rootId,
data,
patchData: (d, i) => {
if(i === 0) {
return { ...d, controlDependencies: [...d.controlDependencies ?? [], { id: name.info.id }] }
}
return d
},
markAsNSE: [0]
})

const body = processedArguments[0]
guard(body !== undefined, () => `Repeat-Loop ${name.content} has no body, impossible!`)

const namedIdShares = produceNameSharedIdMap([...body.in, ...body.unknownReferences])
linkCircularRedefinitionsWithinALoop(information.graph, namedIdShares, body.out)
linkCircularRedefinitionsWithinALoop(information.graph, produceNameSharedIdMap(findNonLocalReads(information.graph)), body.out)

information.exitPoints = filterOutLoopExitPoints(information.exitPoints)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { DataflowProcessorInformation } from '../../../../../processor'
import type { DataflowInformation } from '../../../../../info'
import { alwaysExits , filterOutLoopExitPoints } from '../../../../../info'
import {
findNonLocalReads,
linkCircularRedefinitionsWithinALoop, linkInputs,
produceNameSharedIdMap
} from '../../../../linker'
Expand Down Expand Up @@ -63,7 +64,7 @@ export function processWhileLoop<OtherInfo>(
...makeAllMaybe(body.unknownReferences, information.graph, information.environment, false),
...makeAllMaybe(body.in, information.graph, information.environment, false)
], information.environment, [...condition.in, ...condition.unknownReferences], information.graph, true)
linkCircularRedefinitionsWithinALoop(information.graph, produceNameSharedIdMap(remainingInputs), body.out)
linkCircularRedefinitionsWithinALoop(information.graph, produceNameSharedIdMap(findNonLocalReads(information.graph)), body.out)

// as the while-loop always evaluates its condition
information.graph.addEdge(name.info.id, condition.entryPoint, { type: EdgeType.Reads })
Expand Down
1 change: 0 additions & 1 deletion src/dataflow/internal/process/functions/call/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ export function processAllArguments<OtherInfo>(
// resolve reads within argument, we resolve before adding the `processed.environment` to avoid cyclic dependencies
for(const ingoing of [...processed.in, ...processed.unknownReferences]) {
const tryToResolve = ingoing.name ? resolveByName(ingoing.name, argEnv) : undefined

if(tryToResolve === undefined) {
remainingReadInArgs.push(ingoing)
} else {
Expand Down
Loading

3 comments on commit adb9748

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: adb9748 Previous: 2ba4d77 Ratio
Retrieve AST from R code 240.402911 ms (103.58868658601637) 234.1387735909091 ms (95.70717517929441) 1.03
Normalize R AST 20.398689318181816 ms (36.58397788979928) 19.71587190909091 ms (34.16826339270419) 1.03
Produce dataflow information 38.31187881818182 ms (82.12522809135105) 37.632453727272726 ms (80.25944650490707) 1.02
Total per-file 815.8510385 ms (1446.3353884130195) 793.4031634545455 ms (1405.318772592594) 1.03
Static slicing 2.3326031917532277 ms (1.4829504519403889) 2.1778708430615645 ms (1.2107167287047964) 1.07
Reconstruct code 0.2363838266932331 ms (0.19572801348147872) 0.2263642556384633 ms (0.18405690719686232) 1.04
Total per-slice 2.586178172299328 ms (1.5633985983946312) 2.4207134978896474 ms (1.2838157423981884) 1.07
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869360165281424 # 0.7869724682442361 # 1.00
reduction (normalized tokens) 0.7639690077689504 # 0.7640044233283717 # 1.00
memory (df-graph) 147.42458274147728 KiB (358.6827375397903) 147.58589311079547 KiB (359.2574768951678) 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: adb9748 Previous: 2ba4d77 Ratio
Retrieve AST from R code 244.33700608 ms (47.263138478663016) 243.98377468 ms (44.65454208073405) 1.00
Normalize R AST 22.66417048 ms (17.741750338567194) 22.16135236 ms (16.672667480590608) 1.02
Produce dataflow information 74.45763856 ms (88.63591678615389) 72.23525608 ms (86.60187300046161) 1.03
Total per-file 10852.28108126 ms (52460.15553229867) 4031.8785642 ms (8285.555073707617) 2.69
Static slicing 21.098400537014303 ms (79.3760139888159) 8.512326865448639 ms (20.852034679315373) 2.48
Reconstruct code 0.24356118807030852 ms (0.1492490018677543) 0.23621496079457663 ms (0.15042289859405333) 1.03
Total per-slice 21.35006521971492 ms (79.3986937110879) 8.756647337180182 ms (20.88345286926803) 2.44
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.8944619525615458 # 0.9038287698156573 # 0.99
reduction (normalized tokens) 0.8534320485134076 # 0.8715309581660212 # 0.98
memory (df-graph) 146.770703125 KiB (154.0029022815246) 142.5441796875 KiB (146.66042171252732) 1.03

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark '"social-science" Benchmark Suite'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: adb9748 Previous: 2ba4d77 Ratio
Total per-file 10852.28108126 ms (52460.15553229867) 4031.8785642 ms (8285.555073707617) 2.69
Static slicing 21.098400537014303 ms (79.3760139888159) 8.512326865448639 ms (20.852034679315373) 2.48
Total per-slice 21.35006521971492 ms (79.3986937110879) 8.756647337180182 ms (20.88345286926803) 2.44

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.