Skip to content

Commit

Permalink
Extend the API of the validation registry for more performant custom …
Browse files Browse the repository at this point in the history
…validations (#1614)

* new API for registering set-up/tear-down logic which is executed once before/after validating an AST
* test cases for the new validation API
* improvements according to the review: comments regarding stateful validation, typed and simplified test cases
* use get instead of getter, according to the review
  • Loading branch information
JohannesMeierSE authored Oct 25, 2024
1 parent 489e12b commit b2b1cbe
Show file tree
Hide file tree
Showing 3 changed files with 272 additions and 21 deletions.
25 changes: 24 additions & 1 deletion packages/langium/src/validation/document-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,37 @@ export class DefaultDocumentValidator implements DocumentValidator {
validationItems.push(this.toDiagnostic(severity, message, info));
};

await this.validateAstBefore(rootNode, options, acceptor, cancelToken);
await this.validateAstNodes(rootNode, options, acceptor, cancelToken);
await this.validateAstAfter(rootNode, options, acceptor, cancelToken);

return validationItems;
}

protected async validateAstBefore(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise<void> {
const checksBefore = this.validationRegistry.checksBefore;
for (const checkBefore of checksBefore) {
await interruptAndCheck(cancelToken);
await checkBefore(rootNode, acceptor, options.categories ?? [], cancelToken);
}
}

protected async validateAstNodes(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise<void> {
await Promise.all(streamAst(rootNode).map(async node => {
await interruptAndCheck(cancelToken);
const checks = this.validationRegistry.getChecks(node.$type, options.categories);
for (const check of checks) {
await check(node, acceptor, cancelToken);
}
}));
return validationItems;
}

protected async validateAstAfter(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise<void> {
const checksAfter = this.validationRegistry.checksAfter;
for (const checkAfter of checksAfter) {
await interruptAndCheck(cancelToken);
await checkAfter(rootNode, acceptor, options.categories ?? [], cancelToken);
}
}

protected toDiagnostic<N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N, string>): Diagnostic {
Expand Down
112 changes: 95 additions & 17 deletions packages/langium/src/validation/validation-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
******************************************************************************/

import type { CodeDescription, DiagnosticRelatedInformation, DiagnosticTag, integer, Range } from 'vscode-languageserver-types';
import type { CancellationToken } from '../utils/cancellation.js';
import { assertUnreachable } from '../index.js';
import type { LangiumCoreServices } from '../services.js';
import type { AstNode, AstReflection, Properties } from '../syntax-tree.js';
import type { MaybePromise } from '../utils/promise-utils.js';
import type { Stream } from '../utils/stream.js';
import type { DocumentSegment } from '../workspace/documents.js';
import type { CancellationToken } from '../utils/cancellation.js';
import { MultiMap } from '../utils/collections.js';
import type { MaybePromise } from '../utils/promise-utils.js';
import { isOperationCancelled } from '../utils/promise-utils.js';
import type { Stream } from '../utils/stream.js';
import { stream } from '../utils/stream.js';
import type { DocumentSegment } from '../workspace/documents.js';

export type DiagnosticInfo<N extends AstNode, P extends string = Properties<N>> = {
/** The AST node to which the diagnostic is attached. */
Expand Down Expand Up @@ -63,6 +64,20 @@ export type ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverit

export type ValidationCheck<T extends AstNode = AstNode> = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise<void>;

/**
* A utility type for describing functions which will be called once before or after all the AstNodes of an AST/Langium document are validated.
*
* The AST is represented by its root AstNode.
*
* The given validation acceptor helps to report some early or lately detected issues.
*
* The 'categories' indicate, which validation categories are executed for all the AstNodes.
* This helps to tailor the preparations/tear-down logic to the actually executed checks on the nodes.
*
* It is recommended to support interrupts during long-running logic with 'interruptAndCheck(cancelToken)'.
*/
export type ValidationPreparation = (rootNode: AstNode, accept: ValidationAcceptor, categories: ValidationCategory[], cancelToken: CancellationToken) => MaybePromise<void>;

/**
* A utility type for associating non-primitive AST types to corresponding validation checks. For example:
*
Expand Down Expand Up @@ -110,6 +125,9 @@ export class ValidationRegistry {
private readonly entries = new MultiMap<string, ValidationCheckEntry>();
private readonly reflection: AstReflection;

private entriesBefore: ValidationPreparation[] = [];
private entriesAfter: ValidationPreparation[] = [];

constructor(services: LangiumCoreServices) {
this.reflection = services.shared.AstReflection;
}
Expand Down Expand Up @@ -142,28 +160,34 @@ export class ValidationRegistry {
category
};
this.addEntry(type, entry);
} else {
assertUnreachable(callbacks);
}
}
}

protected wrapValidationException(check: ValidationCheck, thisObj: unknown): ValidationCheck {
return async (node, accept, cancelToken) => {
try {
await check.call(thisObj, node, accept, cancelToken);
} catch (err) {
if (isOperationCancelled(err)) {
throw err;
}
console.error('An error occurred during validation:', err);
const message = err instanceof Error ? err.message : String(err);
if (err instanceof Error && err.stack) {
console.error(err.stack);
}
accept('error', 'An error occurred during validation: ' + message, { node });
}
await this.handleException(() => check.call(thisObj, node, accept, cancelToken), 'An error occurred during validation', accept, node);
};
}

protected async handleException(functionality: () => MaybePromise<void>, messageContext: string, accept: ValidationAcceptor, node: AstNode): Promise<void> {
try {
await functionality();
} catch (err) {
if (isOperationCancelled(err)) {
throw err;
}
console.error(`${messageContext}:`, err);
if (err instanceof Error && err.stack) {
console.error(err.stack);
}
const messageDetails = err instanceof Error ? err.message : String(err);
accept('error', `${messageContext}: ${messageDetails}`, { node });
}
}

protected addEntry(type: string, entry: ValidationCheckEntry): void {
if (type === 'AstNode') {
this.entries.add('AstNode', entry);
Expand All @@ -183,4 +207,58 @@ export class ValidationRegistry {
return checks.map(entry => entry.check);
}

/**
* Register logic which will be executed once before validating all the nodes of an AST/Langium document.
* This helps to prepare or initialize some information which are required or reusable for the following checks on the AstNodes.
*
* As an example, for validating unique fully-qualified names of nodes in the AST,
* here the map for mapping names to nodes could be established.
* During the usual checks on the nodes, they are put into this map with their name.
*
* Note that this approach makes validations stateful, which is relevant e.g. when cancelling the validation.
* Therefore it is recommended to clear stored information
* _before_ validating an AST to validate each AST unaffected from other ASTs
* AND _after_ validating the AST to free memory by information which are no longer used.
*
* @param checkBefore a set-up function which will be called once before actually validating an AST
* @param thisObj Optional object to be used as `this` when calling the validation check functions.
*/
registerBeforeDocument(checkBefore: ValidationPreparation, thisObj: ThisParameterType<unknown> = this): void {
this.entriesBefore.push(this.wrapPreparationException(checkBefore, 'An error occurred during set-up of the validation', thisObj));
}

/**
* Register logic which will be executed once after validating all the nodes of an AST/Langium document.
* This helps to finally evaluate information which are collected during the checks on the AstNodes.
*
* As an example, for validating unique fully-qualified names of nodes in the AST,
* here the map with all the collected nodes and their names is checked
* and validation hints are created for all nodes with the same name.
*
* Note that this approach makes validations stateful, which is relevant e.g. when cancelling the validation.
* Therefore it is recommended to clear stored information
* _before_ validating an AST to validate each AST unaffected from other ASTs
* AND _after_ validating the AST to free memory by information which are no longer used.
*
* @param checkBefore a set-up function which will be called once before actually validating an AST
* @param thisObj Optional object to be used as `this` when calling the validation check functions.
*/
registerAfterDocument(checkAfter: ValidationPreparation, thisObj: ThisParameterType<unknown> = this): void {
this.entriesAfter.push(this.wrapPreparationException(checkAfter, 'An error occurred during tear-down of the validation', thisObj));
}

protected wrapPreparationException(check: ValidationPreparation, messageContext: string, thisObj: unknown): ValidationPreparation {
return async (rootNode, accept, categories, cancelToken) => {
await this.handleException(() => check.call(thisObj, rootNode, accept, categories, cancelToken), messageContext, accept, rootNode);
};
}

get checksBefore(): ValidationPreparation[] {
return this.entriesBefore;
}

get checksAfter(): ValidationPreparation[] {
return this.entriesAfter;
}

}
156 changes: 153 additions & 3 deletions packages/langium/test/validation/document-validator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
* terms of the MIT License, which is available in the project root.
******************************************************************************/

import type { AstNode, ValidationChecks } from 'langium';
import { AstUtils } from 'langium';
import type { AstNode, ValidationAcceptor, ValidationChecks } from 'langium';
import { createServicesForGrammar } from 'langium/grammar';
import type { LangiumServices } from 'langium/lsp';
import type { ValidationResult } from 'langium/test';
import { validationHelper } from 'langium/test';
import { beforeAll, describe, expect, test } from 'vitest';
import { Position, Range } from 'vscode-languageserver';
import { createServicesForGrammar } from 'langium/grammar';
import { validationHelper } from 'langium/test';

// Related to https://github.com/eclipse-langium/langium/issues/571
describe('Parser error is thrown on resynced token with NaN position', () => {
Expand Down Expand Up @@ -110,3 +112,151 @@ describe('Generic `AstNode` validation applies to all nodes', () => {
});

});

describe('Register Before/AfterDocument logic for validations with state', () => {

// the grammar
const grammar = `grammar NestedNamedNodes
entry Model:
(elements+=NamedNode)*;
NamedNode:
name=ID '{' (children+=NamedNode)* '}';
hidden terminal WS: /\\s+/;
terminal ID: /[_a-zA-Z][\\w_]*/;
`;

// AST types derived from the grammar
type NestedNamedNodesAstTypes = {
Model: Model;
NamedNode: NamedNode;
}
interface Model extends AstNode {
$type: 'Model';
elements: NamedNode[];
}
interface NamedNode extends AstNode {
$type: 'NamedNode';
$container: Model | NamedNode;
name: string;
children: NamedNode[];
}

// utilities
let validate: (input: string) => Promise<ValidationResult<AstNode>>;
let services: LangiumServices;

function getFQN(node: AstNode): string {
if ('name' in node && node.$container) {
const parentName = getFQN(node.$container);
if (parentName) {
return parentName + '.' + node.name;
}
return '' + node.name;
}
return '';
}

function rememberNamedNode(child: NamedNode, fqnMap: Map<string, NamedNode[]>): void {
const fqn = getFQN(child);
let list = fqnMap.get(fqn);
if (!list) {
list = [];
fqnMap.set(fqn, list);
}
list.push(child);
}

function checkForDuplicates(fqnMap: Map<string, NamedNode[]>, accept: ValidationAcceptor): void {
for (const [key, value] of fqnMap.entries()) {
if (value.length >= 2) {
value.forEach(child => accept('error', `The FQN '${key}' is not unique.`, { node: child }));
}
}
}

// 1. realize validation in state-less way
function registerValidationForRootElement() {
const validationChecks: ValidationChecks<NestedNamedNodesAstTypes> = {
// do the expensive validation once on the root Model only
Model: (node, accept) => {
const fqnMap = new Map<string, NamedNode[]>();
// collect the FQN of all nodes
// !! This 'streamAllContents' is saved in the alternative version of the validation !!
AstUtils.streamAllContents(node).forEach(child => {
rememberNamedNode(child as NamedNode, fqnMap);
});
// check for duplicates
checkForDuplicates(fqnMap, accept);
},
};
services.validation.ValidationRegistry.register(validationChecks);
}

// 2. realize validation without an additional AST traversal by exploiting the stateful validation approach
function registerValidationBeforeAfter() {
const fqnMap = new Map<string, NamedNode[]>(); // this is the new state: remember all NamedNode nodes, classified by their name
services.validation.ValidationRegistry.registerBeforeDocument((_rootNode, _accept, _categories) => {
fqnMap.clear(); // clear everything to be sure when starting to validate an AST
});
services.validation.ValidationRegistry.register(<ValidationChecks<NestedNamedNodesAstTypes>>{
// register the named nodes in the map (but don't validate/check them now)
NamedNode: (node, _accept) => {
// Streaming the whole AST is not required with this approach, since the streaming is already done by the DocumentValidator!
rememberNamedNode(node, fqnMap);
},
});
services.validation.ValidationRegistry.registerAfterDocument((_rootNode, accept, _categories) => {
// check for duplicates after all checks for the single nodes of the AST are done
checkForDuplicates(fqnMap, accept);
fqnMap.clear(); // free memory afterwards
});
}

// test cases to ensure, that both approaches produce the same validation hints
describe('Using the stateless validation', () => {
beforeAll(async () => {
services = await createServicesForGrammar({
grammar
});
validate = validationHelper(services);
registerValidationForRootElement();
});

test('Children with same name on same level (stateless)', async () => {
const validationResult = await validate('A { B{} C{} B{} }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(2);
});

test('Nested Children with same name (stateless)', async () => {
const validationResult = await validate('A { A{ A{} } }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(0);
});
});

describe('Using the stateful validation', () => {
beforeAll(async () => {
services = await createServicesForGrammar({
grammar
});
validate = validationHelper(services);
registerValidationBeforeAfter();
});

test('Children with same name on same level (with state)', async () => {
const validationResult = await validate('A { B{} C{} B{} }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(2);
});

test('Nested Children with same name (with state)', async () => {
const validationResult = await validate('A { A{ A{} } }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(0);
});
});

});

0 comments on commit b2b1cbe

Please sign in to comment.