Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the API of the validation registry for more performant custom validations #1614

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion packages/langium/src/validation/document-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,37 @@ export class DefaultDocumentValidator implements DocumentValidator {
validationItems.push(this.toDiagnostic(severity, message, info));
};

await this.validateAstBefore(rootNode, options, acceptor, cancelToken);
await this.validateAstNodes(rootNode, options, acceptor, cancelToken);
await this.validateAstAfter(rootNode, options, acceptor, cancelToken);

return validationItems;
}

protected async validateAstBefore(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise<void> {
const checksBefore = this.validationRegistry.checksBefore;
for (const checkBefore of checksBefore) {
await interruptAndCheck(cancelToken);
await checkBefore(rootNode, acceptor, options.categories ?? [], cancelToken);
}
}

protected async validateAstNodes(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise<void> {
await Promise.all(streamAst(rootNode).map(async node => {
await interruptAndCheck(cancelToken);
const checks = this.validationRegistry.getChecks(node.$type, options.categories);
for (const check of checks) {
await check(node, acceptor, cancelToken);
}
}));
return validationItems;
}

protected async validateAstAfter(rootNode: AstNode, options: ValidationOptions, acceptor: ValidationAcceptor, cancelToken = CancellationToken.None): Promise<void> {
const checksAfter = this.validationRegistry.checksAfter;
for (const checkAfter of checksAfter) {
await interruptAndCheck(cancelToken);
await checkAfter(rootNode, acceptor, options.categories ?? [], cancelToken);
}
}

protected toDiagnostic<N extends AstNode>(severity: ValidationSeverity, message: string, info: DiagnosticInfo<N, string>): Diagnostic {
Expand Down
112 changes: 95 additions & 17 deletions packages/langium/src/validation/validation-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
******************************************************************************/

import type { CodeDescription, DiagnosticRelatedInformation, DiagnosticTag, integer, Range } from 'vscode-languageserver-types';
import type { CancellationToken } from '../utils/cancellation.js';
import { assertUnreachable } from '../index.js';
import type { LangiumCoreServices } from '../services.js';
import type { AstNode, AstReflection, Properties } from '../syntax-tree.js';
import type { MaybePromise } from '../utils/promise-utils.js';
import type { Stream } from '../utils/stream.js';
import type { DocumentSegment } from '../workspace/documents.js';
import type { CancellationToken } from '../utils/cancellation.js';
import { MultiMap } from '../utils/collections.js';
import type { MaybePromise } from '../utils/promise-utils.js';
import { isOperationCancelled } from '../utils/promise-utils.js';
import type { Stream } from '../utils/stream.js';
import { stream } from '../utils/stream.js';
import type { DocumentSegment } from '../workspace/documents.js';

export type DiagnosticInfo<N extends AstNode, P extends string = Properties<N>> = {
/** The AST node to which the diagnostic is attached. */
Expand Down Expand Up @@ -63,6 +64,20 @@ export type ValidationAcceptor = <N extends AstNode>(severity: ValidationSeverit

export type ValidationCheck<T extends AstNode = AstNode> = (node: T, accept: ValidationAcceptor, cancelToken: CancellationToken) => MaybePromise<void>;

/**
* A utility type for describing functions which will be called once before or after all the AstNodes of an AST/Langium document are validated.
*
* The AST is represented by its root AstNode.
*
* The given validation acceptor helps to report some early or lately detected issues.
*
* The 'categories' indicate, which validation categories are executed for all the AstNodes.
* This helps to tailor the preparations/tear-down logic to the actually executed checks on the nodes.
*
* It is recommended to support interrupts during long-running logic with 'interruptAndCheck(cancelToken)'.
*/
export type ValidationPreparation = (rootNode: AstNode, accept: ValidationAcceptor, categories: ValidationCategory[], cancelToken: CancellationToken) => MaybePromise<void>;

/**
* A utility type for associating non-primitive AST types to corresponding validation checks. For example:
*
Expand Down Expand Up @@ -110,6 +125,9 @@ export class ValidationRegistry {
private readonly entries = new MultiMap<string, ValidationCheckEntry>();
private readonly reflection: AstReflection;

private entriesBefore: ValidationPreparation[] = [];
private entriesAfter: ValidationPreparation[] = [];

constructor(services: LangiumCoreServices) {
this.reflection = services.shared.AstReflection;
}
Expand Down Expand Up @@ -142,28 +160,34 @@ export class ValidationRegistry {
category
};
this.addEntry(type, entry);
} else {
assertUnreachable(callbacks);
}
}
}

protected wrapValidationException(check: ValidationCheck, thisObj: unknown): ValidationCheck {
return async (node, accept, cancelToken) => {
try {
await check.call(thisObj, node, accept, cancelToken);
} catch (err) {
if (isOperationCancelled(err)) {
throw err;
}
console.error('An error occurred during validation:', err);
const message = err instanceof Error ? err.message : String(err);
if (err instanceof Error && err.stack) {
console.error(err.stack);
}
accept('error', 'An error occurred during validation: ' + message, { node });
}
await this.handleException(() => check.call(thisObj, node, accept, cancelToken), 'An error occurred during validation', accept, node);
};
}

protected async handleException(functionality: () => MaybePromise<void>, messageContext: string, accept: ValidationAcceptor, node: AstNode): Promise<void> {
try {
await functionality();
} catch (err) {
if (isOperationCancelled(err)) {
throw err;
}
console.error(`${messageContext}:`, err);
if (err instanceof Error && err.stack) {
console.error(err.stack);
}
const messageDetails = err instanceof Error ? err.message : String(err);
accept('error', `${messageContext}: ${messageDetails}`, { node });
}
}

protected addEntry(type: string, entry: ValidationCheckEntry): void {
if (type === 'AstNode') {
this.entries.add('AstNode', entry);
Expand All @@ -183,4 +207,58 @@ export class ValidationRegistry {
return checks.map(entry => entry.check);
}

/**
* Register logic which will be executed once before validating all the nodes of an AST/Langium document.
* This helps to prepare or initialize some information which are required or reusable for the following checks on the AstNodes.
*
* As an example, for validating unique fully-qualified names of nodes in the AST,
* here the map for mapping names to nodes could be established.
* During the usual checks on the nodes, they are put into this map with their name.
*
* Note that this approach makes validations stateful, which is relevant e.g. when cancelling the validation.
* Therefore it is recommended to clear stored information
* _before_ validating an AST to validate each AST unaffected from other ASTs
* AND _after_ validating the AST to free memory by information which are no longer used.
*
* @param checkBefore a set-up function which will be called once before actually validating an AST
* @param thisObj Optional object to be used as `this` when calling the validation check functions.
*/
registerBeforeDocument(checkBefore: ValidationPreparation, thisObj: ThisParameterType<unknown> = this): void {
this.entriesBefore.push(this.wrapPreparationException(checkBefore, 'An error occurred during set-up of the validation', thisObj));
}

/**
* Register logic which will be executed once after validating all the nodes of an AST/Langium document.
* This helps to finally evaluate information which are collected during the checks on the AstNodes.
*
* As an example, for validating unique fully-qualified names of nodes in the AST,
* here the map with all the collected nodes and their names is checked
* and validation hints are created for all nodes with the same name.
*
* Note that this approach makes validations stateful, which is relevant e.g. when cancelling the validation.
* Therefore it is recommended to clear stored information
* _before_ validating an AST to validate each AST unaffected from other ASTs
* AND _after_ validating the AST to free memory by information which are no longer used.
*
* @param checkBefore a set-up function which will be called once before actually validating an AST
* @param thisObj Optional object to be used as `this` when calling the validation check functions.
*/
registerAfterDocument(checkAfter: ValidationPreparation, thisObj: ThisParameterType<unknown> = this): void {
this.entriesAfter.push(this.wrapPreparationException(checkAfter, 'An error occurred during tear-down of the validation', thisObj));
}

protected wrapPreparationException(check: ValidationPreparation, messageContext: string, thisObj: unknown): ValidationPreparation {
return async (rootNode, accept, categories, cancelToken) => {
await this.handleException(() => check.call(thisObj, rootNode, accept, categories, cancelToken), messageContext, accept, rootNode);
};
}

get checksBefore(): ValidationPreparation[] {
return this.entriesBefore;
}

get checksAfter(): ValidationPreparation[] {
return this.entriesAfter;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd declare the properties as public readonly rather than using such getter methods.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with public properties is, that users could register checks directly in this property and without calling registerBeforeDocument(...), which bypasses our internal handling of exceptions.
Therefore I decided to keep the "simple getters".

Copy link
Contributor

@spoenemann spoenemann Oct 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The getter does not change that: someone could still push elements to the returned array. If you really want to prevent that, you'd need to clone the array. But an easier and faster solution is just to document the property/method saying you shouldn't manipulate the array.

If you want to keep a getter, I suggest using a get property:

    get checksAfter(): ValidationPreparation[] {
        return this.entriesAfter;
    }

This is about code style: methods like getChecksAfter() are common in Java, while in JS/TS either value properties or get properties are used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

someone could still push elements to the returned array. If you really want to prevent that, you'd need to clone the array.

You are right.

I replaced getChecksBefore() by get checksBefore(), since it is important to have a unified code style.

Beyond that, I personally prefer to have the new getChecksBefore() and the already existing getChecks() in the same style, but I am fine with get ... as well.


}
156 changes: 153 additions & 3 deletions packages/langium/test/validation/document-validator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
* terms of the MIT License, which is available in the project root.
******************************************************************************/

import type { AstNode, ValidationChecks } from 'langium';
import { AstUtils } from 'langium';
import type { AstNode, ValidationAcceptor, ValidationChecks } from 'langium';
import { createServicesForGrammar } from 'langium/grammar';
import type { LangiumServices } from 'langium/lsp';
import type { ValidationResult } from 'langium/test';
import { validationHelper } from 'langium/test';
import { beforeAll, describe, expect, test } from 'vitest';
import { Position, Range } from 'vscode-languageserver';
import { createServicesForGrammar } from 'langium/grammar';
import { validationHelper } from 'langium/test';

// Related to https://github.com/eclipse-langium/langium/issues/571
describe('Parser error is thrown on resynced token with NaN position', () => {
Expand Down Expand Up @@ -110,3 +112,151 @@ describe('Generic `AstNode` validation applies to all nodes', () => {
});

});

describe('Register Before/AfterDocument logic for validations with state', () => {

// the grammar
const grammar = `grammar NestedNamedNodes
entry Model:
(elements+=NamedNode)*;

NamedNode:
name=ID '{' (children+=NamedNode)* '}';

hidden terminal WS: /\\s+/;
terminal ID: /[_a-zA-Z][\\w_]*/;
`;

// AST types derived from the grammar
type NestedNamedNodesAstTypes = {
Model: Model;
NamedNode: NamedNode;
}
interface Model extends AstNode {
$type: 'Model';
elements: NamedNode[];
}
interface NamedNode extends AstNode {
$type: 'NamedNode';
$container: Model | NamedNode;
name: string;
children: NamedNode[];
}

// utilities
let validate: (input: string) => Promise<ValidationResult<AstNode>>;
let services: LangiumServices;

function getFQN(node: AstNode): string {
if ('name' in node && node.$container) {
const parentName = getFQN(node.$container);
if (parentName) {
return parentName + '.' + node.name;
}
return '' + node.name;
}
return '';
}

function rememberNamedNode(child: NamedNode, fqnMap: Map<string, NamedNode[]>): void {
const fqn = getFQN(child);
let list = fqnMap.get(fqn);
if (!list) {
list = [];
fqnMap.set(fqn, list);
}
list.push(child);
}

function checkForDuplicates(fqnMap: Map<string, NamedNode[]>, accept: ValidationAcceptor): void {
for (const [key, value] of fqnMap.entries()) {
if (value.length >= 2) {
value.forEach(child => accept('error', `The FQN '${key}' is not unique.`, { node: child }));
}
}
}

// 1. realize validation in state-less way
function registerValidationForRootElement() {
const validationChecks: ValidationChecks<NestedNamedNodesAstTypes> = {
// do the expensive validation once on the root Model only
Model: (node, accept) => {
const fqnMap = new Map<string, NamedNode[]>();
// collect the FQN of all nodes
// !! This 'streamAllContents' is saved in the alternative version of the validation !!
AstUtils.streamAllContents(node).forEach(child => {
rememberNamedNode(child as NamedNode, fqnMap);
});
// check for duplicates
checkForDuplicates(fqnMap, accept);
},
};
services.validation.ValidationRegistry.register(validationChecks);
}

// 2. realize validation without an additional AST traversal by exploiting the stateful validation approach
function registerValidationBeforeAfter() {
const fqnMap = new Map<string, NamedNode[]>(); // this is the new state: remember all NamedNode nodes, classified by their name
services.validation.ValidationRegistry.registerBeforeDocument((_rootNode, _accept, _categories) => {
fqnMap.clear(); // clear everything to be sure when starting to validate an AST
});
services.validation.ValidationRegistry.register(<ValidationChecks<NestedNamedNodesAstTypes>>{
// register the named nodes in the map (but don't validate/check them now)
NamedNode: (node, _accept) => {
// Streaming the whole AST is not required with this approach, since the streaming is already done by the DocumentValidator!
rememberNamedNode(node, fqnMap);
},
});
services.validation.ValidationRegistry.registerAfterDocument((_rootNode, accept, _categories) => {
// check for duplicates after all checks for the single nodes of the AST are done
checkForDuplicates(fqnMap, accept);
fqnMap.clear(); // free memory afterwards
});
}

// test cases to ensure, that both approaches produce the same validation hints
describe('Using the stateless validation', () => {
beforeAll(async () => {
services = await createServicesForGrammar({
grammar
});
validate = validationHelper(services);
registerValidationForRootElement();
});

test('Children with same name on same level (stateless)', async () => {
const validationResult = await validate('A { B{} C{} B{} }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(2);
});

test('Nested Children with same name (stateless)', async () => {
const validationResult = await validate('A { A{ A{} } }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(0);
});
});

describe('Using the stateful validation', () => {
beforeAll(async () => {
services = await createServicesForGrammar({
grammar
});
validate = validationHelper(services);
registerValidationBeforeAfter();
});

test('Children with same name on same level (with state)', async () => {
const validationResult = await validate('A { B{} C{} B{} }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(2);
});

test('Nested Children with same name (with state)', async () => {
const validationResult = await validate('A { A{ A{} } }');
const diagnostics = validationResult.diagnostics;
expect(diagnostics).toHaveLength(0);
});
});

});
Loading