Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for named list arguments #1151

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/dataflow/environments/built-in.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import { processApply } from '../internal/process/functions/call/built-in/built-
import { registerBuiltInDefinitions } from './built-in-config';
import { DefaultBuiltinConfig } from './default-builtin-config';
import type { LinkTo } from '../../queries/catalog/call-context-query/call-context-query-format';
import { processList } from '../internal/process/functions/call/built-in/built-in-list';



Expand Down Expand Up @@ -76,6 +77,7 @@ function defaultBuiltInProcessor<OtherInfo>(
data: DataflowProcessorInformation<OtherInfo & ParentInformation>,
config: DefaultBuiltInProcessorConfiguration
): DataflowInformation {
console.log('processing default:', name.content);
const { information: res, processedArguments } = processKnownFunctionCall({ name, args, rootId, data, forceArgs: config.forceArgs });
if(config.returnsNthArgument !== undefined) {
const arg = config.returnsNthArgument === 'last' ? processedArguments[args.length - 1] : processedArguments[config.returnsNthArgument];
Expand Down Expand Up @@ -147,6 +149,7 @@ export const BuiltInProcessorMapper = {
'builtin:repeat-loop': processRepeatLoop,
'builtin:while-loop': processWhileLoop,
'builtin:replacement': processReplacementFunction,
'builtin:list': processList,
} as const satisfies Record<`builtin:${string}`, BuiltInIdentifierProcessorWithConfig<never>>;

export type BuiltInMappingName = keyof typeof BuiltInProcessorMapper;
Expand Down
3 changes: 2 additions & 1 deletion src/dataflow/environments/default-builtin-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
{
type: 'function',
names: [
'~', '+', '-', '*', '/', '^', '!', '?', '**', '==', '!=', '>', '<', '>=', '<=', '%%', '%/%', '%*%', '%in%', ':', 'list',
'~', '+', '-', '*', '/', '^', '!', '?', '**', '==', '!=', '>', '<', '>=', '<=', '%%', '%/%', '%*%', '%in%', ':',
'rep', 'seq', 'seq_len', 'seq_along', 'seq.int', 'gsub', 'which', 'class', 'dimnames', 'min', 'max',
'intersect', 'subset', 'match', 'sqrt', 'abs', 'round', 'floor', 'ceiling', 'signif', 'trunc', 'log', 'log10', 'log2', 'sum', 'mean',
'unique', 'paste', 'paste0', 'read.csv', 'stop', 'is.null', 'numeric', 'as.character', 'as.integer', 'as.logical', 'as.numeric', 'as.matrix',
Expand Down Expand Up @@ -74,6 +74,7 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
{ type: 'function', names: ['repeat'], processor: 'builtin:repeat-loop', config: {}, assumePrimitive: true },
{ type: 'function', names: ['while'], processor: 'builtin:while-loop', config: {}, assumePrimitive: true },
{ type: 'function', names: ['do.call'], processor: 'builtin:apply', config: { indexOfFunction: 0, unquoteFunction: true }, assumePrimitive: true },
{ type: 'function', names: ['list'], processor: 'builtin:list', config: {}, assumePrimitive: true },
{
type: 'function',
names: [
Expand Down
5 changes: 5 additions & 0 deletions src/dataflow/environments/define.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ import type { IdentifierDefinition } from './identifier';
function defInEnv(newEnvironments: IEnvironment, name: string, definition: IdentifierDefinition) {
const existing = newEnvironments.memory.get(name);
// check if it is maybe or not
// console.log('existing:', existing, 'definition:', definition);
// TODO: check if indices are already defined regarding overwrite
if(existing === undefined || definition.controlDependencies === undefined) {
newEnvironments.memory.set(name, [definition]);
} else {
existing.push(definition);
}
// console.log('after:', newEnvironments.memory.get(name));
}

/**
Expand All @@ -21,6 +24,8 @@ function defInEnv(newEnvironments: IEnvironment, name: string, definition: Ident
*/
export function define(definition: IdentifierDefinition, superAssign: boolean | undefined, environment: REnvironmentInformation): REnvironmentInformation {
const name = definition.name;
// console.log('defining:', name);
// console.log('definition:', definition);
guard(name !== undefined, () => `Name must be defined, but isn't for ${JSON.stringify(definition)}`);
let newEnvironment;
if(superAssign) {
Expand Down
5 changes: 4 additions & 1 deletion src/dataflow/environments/identifier.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import type { BuiltInIdentifierConstant, BuiltInIdentifierDefinition } from './built-in';
import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id';
import type { ControlDependency } from '../info';
import type { ContainerIndices } from '../graph/vertex';

export type Identifier = string & { __brand?: 'identifier' }

Expand Down Expand Up @@ -59,10 +60,12 @@ export interface IdentifierReference {
}


interface InGraphIdentifierDefinition extends IdentifierReference {
export interface InGraphIdentifierDefinition extends IdentifierReference {
readonly type: InGraphReferenceType
/** The assignment (or whatever, like `assign` function call) node which ultimately defined this identifier */
readonly definedAt: NodeId

indices?: ContainerIndices | undefined
}

/**
Expand Down
1 change: 1 addition & 0 deletions src/dataflow/environments/resolve-by-name.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const TargetTypePredicate = {
* @returns A list of possible definitions of the identifier (one if the definition location is exactly and always known), or `undefined` if the identifier is undefined in the current scope/with the current environment information.
*/
export function resolveByName(name: Identifier, environment: REnvironmentInformation, target: ReferenceType = ReferenceType.Unknown): IdentifierDefinition[] | undefined {
// console.log('resolving:', name);
let current: IEnvironment = environment.current;
let definitions: IdentifierDefinition[] | undefined = undefined;
const wantedType = TargetTypePredicate[target];
Expand Down
8 changes: 8 additions & 0 deletions src/dataflow/graph/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ export enum VertexType {
FunctionDefinition = 'function-definition'
}

export interface ContainerIndex{
readonly lexeme: string,
readonly nodeId: NodeId,
}
export type ContainerIndices = ContainerIndex[] | undefined

/**
* Arguments required to construct a vertex in the dataflow graph.
*
Expand All @@ -39,6 +45,8 @@ interface DataflowGraphVertexBase extends MergeableRecord {
* See {@link IdentifierReference}
*/
controlDependencies: ControlDependency[] | undefined

indices?: ContainerIndices
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ import type { ForceArguments } from '../common';
import { BuiltIn } from '../../../../../environments/built-in';
import { markAsAssignment } from './built-in-assignment';
import { ReferenceType } from '../../../../../environments/identifier';
import type { InGraphIdentifierDefinition } from '../../../../../environments/identifier';
import { resolveByName } from '../../../../../environments/resolve-by-name';
import type { ContainerIndex, ContainerIndices } from '../../../../../graph/vertex';

interface TableAssignmentProcessorMarker {
definitionRootNodes: NodeId[]
Expand All @@ -32,6 +35,17 @@ function tableAssignmentProcessor<OtherInfo>(
return processKnownFunctionCall({ name, args, rootId, data }).information;
}

/**
* Processes different types of access operations.
*
* Example:
* ```r
* a[i]
* a$foo
* a[[i]]
* a@foo
* ```
*/
export function processAccess<OtherInfo>(
name: RSymbol<OtherInfo & ParentInformation>,
args: readonly RFunctionArgument<OtherInfo & ParentInformation>[],
Expand All @@ -43,6 +57,7 @@ export function processAccess<OtherInfo>(
dataflowLogger.warn(`Access ${name.content} has less than 2 arguments, skipping`);
return processKnownFunctionCall({ name, args, rootId, data, forceArgs: config.forceArgs }).information;
}
console.log('processing access:', name.content);
const head = args[0];
guard(head !== EmptyArgument, () => `Access ${name.content} has no source, impossible!`);

Expand Down Expand Up @@ -93,7 +108,39 @@ export function processAccess<OtherInfo>(
};
}
}
fnCall = processKnownFunctionCall({ name, args: newArgs, rootId, data, forceArgs: config.forceArgs });
// a$foo a@foo
let accessedArgument: ContainerIndex | undefined;
let resolvedFirstParameterIndices: ContainerIndices;
if(newArgs[0] !== EmptyArgument) {
const accessArg = newArgs[1] === EmptyArgument ? 'all' : newArgs[1].lexeme;
const resolvedFirstParameter = resolveByName(newArgs[0].lexeme ?? '', data.environment);
// console.log('requesting to access', accessArg);
// resolvedFirstParameterIndices = resolvedFirstParameter?.flatMap(param => (param as InGraphIdentifierDefinition)?.indices ?? []);
resolvedFirstParameter?.forEach(param => {
const definition = param as InGraphIdentifierDefinition;
if(definition.indices) {
if(resolvedFirstParameterIndices) {
resolvedFirstParameterIndices = resolvedFirstParameterIndices.concat(definition.indices);
} else {
resolvedFirstParameterIndices = definition.indices;
}
// console.log('pushed indices', definition.indices);
} else {
// console.log('no indices found for', definition);
}
});
console.log('resolved', newArgs[0].lexeme, 'to', resolvedFirstParameterIndices);
accessedArgument = resolvedFirstParameterIndices?.find(index => index.lexeme === accessArg);
}

const indices = accessedArgument === undefined ? undefined : [accessedArgument];
fnCall = processKnownFunctionCall({ name, args: newArgs, rootId, data, forceArgs: config.forceArgs }, indices);
if(accessedArgument !== undefined) {
// console.log('Accessing known index');
fnCall.information.graph.addEdge(name.info.id, accessedArgument.nodeId, EdgeType.Reads);
} else {
// console.log('Accessing unknown index');
}
}

const info = fnCall.information;
Expand All @@ -111,16 +158,16 @@ export function processAccess<OtherInfo>(
return {
...info,
/*
* Keep active nodes in case of assignments etc.
* We make them maybe as a kind of hack.
* This way when using
* ```ts
* a[[1]] <- 3
* a[[2]] <- 4
* a
* ```
* the read for a will use both accesses as potential definitions and not just the last one!
*/
* Keep active nodes in case of assignments etc.
* We make them maybe as a kind of hack.
* This way when using
* ```ts
* a[[1]] <- 3
* a[[2]] <- 4
* a
* ```
* the read for a will use both accesses as potential definitions and not just the last one!
*/
unknownReferences: makeAllMaybe(info.unknownReferences, info.graph, info.environment, false),
entryPoint: rootId,
/** it is, to be precise, the accessed element we want to map to maybe */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ import type {
import type { Base, Location, RNode } from '../../../../../../r-bridge/lang-4.x/ast/model/model';
import type { RSymbol } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol';
import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type';
import type { RFunctionArgument } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
import { EmptyArgument, type RFunctionArgument } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
import type { NodeId } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/node-id';
import { dataflowLogger } from '../../../../../logger';
import type {
IdentifierDefinition,
IdentifierReference,
InGraphIdentifierDefinition,
InGraphReferenceType } from '../../../../../environments/identifier';
import { ReferenceType
} from '../../../../../environments/identifier';
Expand All @@ -27,6 +27,7 @@ import type { RString } from '../../../../../../r-bridge/lang-4.x/ast/model/node
import { removeRQuotes } from '../../../../../../r-bridge/retriever';
import type { RUnnamedArgument } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-argument';
import { VertexType } from '../../../../../graph/vertex';
import type { ContainerIndices } from '../../../../../graph/vertex';
import { define } from '../../../../../environments/define';
import { EdgeType } from '../../../../../graph/edge';
import type { ForceArguments } from '../common';
Expand Down Expand Up @@ -85,6 +86,7 @@ export function processAssignment<OtherInfo>(
data: DataflowProcessorInformation<OtherInfo & ParentInformation>,
config: AssignmentConfiguration
): DataflowInformation {
console.log('processing assignment:', name.content, 'with id', name.info.id);
if(args.length != 2) {
dataflowLogger.warn(`Assignment ${name.content} has something else than 2 arguments, skipping`);
return processKnownFunctionCall({ name, args, rootId, data, forceArgs: config.forceArgs }).information;
Expand All @@ -93,8 +95,18 @@ export function processAssignment<OtherInfo>(
const effectiveArgs = getEffectiveOrder(config, args as [RFunctionArgument<OtherInfo & ParentInformation>, RFunctionArgument<OtherInfo & ParentInformation>]);
const { target, source } = extractSourceAndTarget(effectiveArgs, name);
const { type, named } = target;
// console.log('target', target);
// console.log('source:', source);
// for(const arg of effectiveArgs) {
// if(arg === EmptyArgument) {
// continue;
// }
// const resolved = resolveByName(arg.lexeme ?? '', data.environment);
// console.log('arg:', arg.lexeme, 'resolved: ', resolved?.map((r) => `{ ${r.name}, [${(r as InGraphIdentifierDefinition).indices?.map((r1 => `{ ${r1.lexeme}, ${r1.nodeId} }`)).join('; ')}]}`).join(','));
// }

if(type === RType.Symbol) {
console.log('Symbol type');
const res = processKnownFunctionCall({ name, args, rootId, data, reverseOrder: !config.swapSourceAndTarget, forceArgs: config.forceArgs });
return processAssignmentToSymbol<OtherInfo & ParentInformation>({
...config,
Expand All @@ -107,15 +119,20 @@ export function processAssignment<OtherInfo>(
information: res.information,
});
} else if(config.canBeReplacement && type === RType.FunctionCall && named) {
console.log('Function call type');
/* as replacement functions take precedence over the lhs fn-call (i.e., `names(x) <- ...` is independent from the definition of `names`), we do not have to process the call */
dataflowLogger.debug(`Assignment ${name.content} has a function call as target => replacement function ${target.lexeme}`);
const replacement = toReplacementSymbol(target, target.functionName.content, config.superAssignment ?? false);
return processAsNamedCall(replacement, data, replacement.content, [...target.arguments, source]);
} else if(config.canBeReplacement && type === RType.Access) {
console.log('Access type, canbeReplacement');
const acces = target.access?.map((a) => a === EmptyArgument ? '' : a.lexeme).join(' ');
console.log(`'${acces}' of '${target.accessed.lexeme}' is accessed and replaced by '${source.lexeme}'`);
dataflowLogger.debug(`Assignment ${name.content} has an access as target => replacement function ${target.lexeme}`);
const replacement = toReplacementSymbol(target, target.operator, config.superAssignment ?? false);
return processAsNamedCall(replacement, data, replacement.content, [toUnnamedArgument(target.accessed, data.completeAst.idMap), ...target.access, source]);
} else if(type === RType.Access) {
console.log('Access type');
const rootArg = findRootAccess(target);
if(rootArg) {
const res = processKnownFunctionCall({
Expand Down Expand Up @@ -156,7 +173,7 @@ function extractSourceAndTarget<OtherInfo>(args: readonly RFunctionArgument<Othe
return { source, target };
}

function produceWrittenNodes<OtherInfo>(rootId: NodeId, target: DataflowInformation, referenceType: InGraphReferenceType, data: DataflowProcessorInformation<OtherInfo>, makeMaybe: boolean): IdentifierDefinition[] {
function produceWrittenNodes<OtherInfo>(rootId: NodeId, target: DataflowInformation, referenceType: InGraphReferenceType, data: DataflowProcessorInformation<OtherInfo>, makeMaybe: boolean): InGraphIdentifierDefinition[] {
return [...target.in, ...target.unknownReferences].map(ref => ({
...ref,
type: referenceType,
Expand Down Expand Up @@ -246,12 +263,22 @@ export function markAsAssignment(
environment: REnvironmentInformation,
graph: DataflowGraph
},
nodeToDefine: IdentifierDefinition,
nodeToDefine: InGraphIdentifierDefinition,
sourceIds: readonly NodeId[],
rootIdOfAssignment: NodeId,
quoteSource?: boolean,
superAssignment?: boolean,
) {
let indices: ContainerIndices;
if(sourceIds.length === 1) {
// support for tracking indices
indices = information.graph.getVertex(sourceIds[0])?.indices;
if(indices) {
console.log(`Defining indices ${indices.map((index) => `{ lexeme: ${index.lexeme}, nodeId: ${index.nodeId} }`).join(',')} for ${nodeToDefine.name}`);
}
}
nodeToDefine.indices ??= indices;

information.environment = define(nodeToDefine, superAssignment, information.environment);
information.graph.setDefinitionOfVertex(nodeToDefine);
if(!quoteSource) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import type { RFunctionArgument } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
import { EmptyArgument } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call';
import type { RSymbol } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol';
import type { ParentInformation } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/decorate';
import type { NodeId } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/node-id';
import type { ContainerIndex } from '../../../../../graph/vertex';
import type { DataflowInformation } from '../../../../../info';
import type { DataflowProcessorInformation } from '../../../../../processor';
import { processKnownFunctionCall } from '../known-call-handling';

/**
* Process a list call.
*
* Example:
* ```r
* list(a = 1, b = 2)
* ```
*/
export function processList<OtherInfo>(
name: RSymbol<OtherInfo & ParentInformation>,
args: readonly RFunctionArgument<OtherInfo & ParentInformation>[],
rootId: NodeId,
data: DataflowProcessorInformation<OtherInfo & ParentInformation>,
): DataflowInformation {
const namedArguments: ContainerIndex[] = [];
for(const arg of args) {
// Skip non named arguments
if(arg === EmptyArgument || arg.type !== 'RArgument' || arg.name === undefined) {
continue;
}

namedArguments.push({ lexeme: arg.name.content, nodeId: arg.info.id });
}
console.log('namedArguments:', namedArguments);

return processKnownFunctionCall({ name, args, rootId, data }, namedArguments).information;
}
3 changes: 2 additions & 1 deletion src/dataflow/internal/process/functions/call/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ function forceVertexArgumentValueReferences(rootId: NodeId, value: DataflowInfor


export function processAllArguments<OtherInfo>(
{ functionName, args, data, finalGraph, functionRootId, forceArgs = [], patchData = d => d }: ProcessAllArgumentInput<OtherInfo>
{ functionName, args, data, finalGraph, functionRootId, forceArgs = [], patchData = d => d }: ProcessAllArgumentInput<OtherInfo>,
// indices: ContainerIndices = undefined,
): ProcessAllArgumentResult {
let finalEnv = functionName.environment;
// arg env contains the environments with other args defined
Expand Down
Loading