Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keyword tokens type #1647

Merged
merged 4 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions examples/arithmetics/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,23 @@ export const ArithmeticsTerminals = {

export type ArithmeticsTerminalNames = keyof typeof ArithmeticsTerminals;

export type ArithmeticsKeywordNames =
| "%"
| "("
| ")"
| "*"
| "+"
| ","
| "-"
| "/"
| ":"
| ";"
| "^"
| "def"
| "module";

export type ArithmeticsTokenNames = ArithmeticsTerminalNames | ArithmeticsKeywordNames;

export type AbstractDefinition = DeclaredParameter | Definition;

export const AbstractDefinition = 'AbstractDefinition';
Expand Down
13 changes: 13 additions & 0 deletions examples/domainmodel/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@ export const DomainModelTerminals = {

export type DomainModelTerminalNames = keyof typeof DomainModelTerminals;

export type DomainModelKeywordNames =
| "."
| ":"
| "datatype"
| "entity"
| "extends"
| "many"
| "package"
| "{"
| "}";

export type DomainModelTokenNames = DomainModelTerminalNames | DomainModelKeywordNames;

export type AbstractElement = PackageDeclaration | Type;

export const AbstractElement = 'AbstractElement';
Expand Down
15 changes: 15 additions & 0 deletions examples/requirements/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@ export const RequirementsAndTestsTerminals = {

export type RequirementsAndTestsTerminalNames = keyof typeof RequirementsAndTestsTerminals;

export type RequirementsAndTestsKeywordNames =
| ","
| ":"
| "="
| "applicable"
| "contact"
| "environment"
| "for"
| "req"
| "testFile"
| "tests"
| "tst";

export type RequirementsAndTestsTokenNames = RequirementsAndTestsTerminalNames | RequirementsAndTestsKeywordNames;

export interface Contact extends AstNode {
readonly $container: RequirementModel | TestModel;
readonly $type: 'Contact';
Expand Down
14 changes: 14 additions & 0 deletions examples/statemachine/src/language-server/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@ export const StatemachineTerminals = {

export type StatemachineTerminalNames = keyof typeof StatemachineTerminals;

export type StatemachineKeywordNames =
| "=>"
| "actions"
| "commands"
| "end"
| "events"
| "initialState"
| "state"
| "statemachine"
| "{"
| "}";

export type StatemachineTokenNames = StatemachineTerminalNames | StatemachineKeywordNames;

export interface Command extends AstNode {
readonly $container: Statemachine;
readonly $type: 'Command';
Expand Down
14 changes: 12 additions & 2 deletions packages/langium-cli/src/generator/ast-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
* terms of the MIT License, which is available in the project root.
******************************************************************************/
import type { Grammar, LangiumCoreServices } from 'langium';
import { type Generated, expandToNode, joinToNode, toString } from 'langium/generate';
import { EOL, type Generated, expandToNode, joinToNode, toString } from 'langium/generate';
import type { AstTypes, Property, PropertyDefaultValue } from 'langium/grammar';
import type { LangiumConfig } from '../package-types.js';
import { AstUtils, MultiMap, GrammarAST } from 'langium';
import { collectAst, collectTypeHierarchy, findReferenceTypes, isAstType, mergeTypesAndInterfaces, escapeQuotes } from 'langium/grammar';
import { generatedHeader } from './node-util.js';
import { collectTerminalRegexps } from './langium-util.js';
import { collectKeywords, collectTerminalRegexps } from './langium-util.js';

export function generateAst(services: LangiumCoreServices, grammars: Grammar[], config: LangiumConfig): string {
const astTypes = collectAst(grammars, services.shared.workspace.LangiumDocuments);
Expand Down Expand Up @@ -231,16 +231,26 @@ function groupBySupertypes(astTypes: AstTypes): MultiMap<string, string> {

function generateTerminalConstants(grammars: Grammar[], config: LangiumConfig): Generated {
let collection: Record<string, RegExp> = {};
const keywordTokens = new Set<string>();
grammars.forEach(grammar => {
const terminalConstants = collectTerminalRegexps(grammar);
collection = {...collection, ...terminalConstants};
for (const keyword of collectKeywords(grammar)) {
keywordTokens.add(keyword);
}
});

const keywordStrings = Array.from(keywordTokens).sort().map((keyword) => JSON.stringify(keyword));

return expandToNode`
export const ${config.projectName}Terminals = {
${joinToNode(Object.entries(collection), ([name, regexp]) => `${name}: ${regexp.toString()},`, { appendNewLineIfNotEmpty: true })}
};

export type ${config.projectName}TerminalNames = keyof typeof ${config.projectName}Terminals;

export type ${config.projectName}KeywordNames = ${keywordStrings.length > 0 ? keywordStrings.map(keyword => `${EOL} | ${keyword}`).join('') : 'never'};

export type ${config.projectName}TokenNames = ${config.projectName}TerminalNames | ${config.projectName}KeywordNames;
`.appendNewLine();
}
2 changes: 1 addition & 1 deletion packages/langium-cli/test/generator/ast-generator.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ function testGeneratedInterface(name: string, grammar: string, expected: string)
}

function testGeneratedAst(name: string, grammar: string, expected: string): void {
testGenerated(name, grammar, expected, 'export type', 'export type testAstType', 1);
testGenerated(name, grammar, expected, 'export type', 'export type testAstType', 3);
}

function testTypeMetaData(name: string, grammar: string, expected: string): void {
Expand Down
54 changes: 54 additions & 0 deletions packages/langium/src/languages/generated/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,60 @@ export const LangiumGrammarTerminals = {

export type LangiumGrammarTerminalNames = keyof typeof LangiumGrammarTerminals;

export type LangiumGrammarKeywordNames =
| "!"
| "&"
| "("
| ")"
| "*"
| "+"
| "+="
| ","
| "->"
| "."
| ".."
| ":"
| ";"
| "<"
| "="
| "=>"
| ">"
| "?"
| "?!"
| "?<!"
| "?<="
| "?="
| "@"
| "Date"
| "EOF"
| "["
| "]"
| "bigint"
| "boolean"
| "current"
| "entry"
| "extends"
| "false"
| "fragment"
| "grammar"
| "hidden"
| "import"
| "infer"
| "infers"
| "interface"
| "number"
| "returns"
| "string"
| "terminal"
| "true"
| "type"
| "with"
| "{"
| "|"
| "}";

export type LangiumGrammarTokenNames = LangiumGrammarTerminalNames | LangiumGrammarKeywordNames;

export type AbstractRule = ParserRule | TerminalRule;

export const AbstractRule = 'AbstractRule';
Expand Down
22 changes: 13 additions & 9 deletions packages/langium/src/parser/indentation-aware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import { DefaultLexer, isTokenTypeArray } from './lexer.js';

type IndentationAwareDelimiter<TokenName extends string> = [begin: TokenName, end: TokenName];

export interface IndentationTokenBuilderOptions<TokenName extends string = string> {
export interface IndentationTokenBuilderOptions<TerminalName extends string = string, KeywordName extends string = string> {
/**
* The name of the token used to denote indentation in the grammar.
* A possible definition in the grammar could look like this:
Expand All @@ -25,7 +25,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default 'INDENT'
*/
indentTokenName: TokenName;
indentTokenName: TerminalName;
/**
* The name of the token used to denote deindentation in the grammar.
* A possible definition in the grammar could look like this:
Expand All @@ -35,7 +35,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default 'DEDENT'
*/
dedentTokenName: TokenName;
dedentTokenName: TerminalName;
/**
* The name of the token used to denote whitespace other than indentation and newlines in the grammar.
* A possible definition in the grammar could look like this:
Expand All @@ -45,7 +45,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default 'WS'
*/
whitespaceTokenName: TokenName;
whitespaceTokenName: TerminalName;
/**
* The delimiter tokens inside of which indentation should be ignored and treated as normal whitespace.
* For example, Python doesn't treat any whitespace between `(` and `)` as significant.
Expand All @@ -54,7 +54,7 @@ export interface IndentationTokenBuilderOptions<TokenName extends string = strin
*
* @default []
*/
ignoreIndentationDelimeters: Array<IndentationAwareDelimiter<TokenName>>
ignoreIndentationDelimeters: Array<IndentationAwareDelimiter<TerminalName | KeywordName>>
}

export const indentationBuilderDefaultOptions: IndentationTokenBuilderOptions = {
Expand All @@ -73,15 +73,19 @@ export enum LexingMode {
* A token builder that is sensitive to indentation in the input text.
* It will generate tokens for indentation and dedentation based on the indentation level.
*
* The first generic parameter corresponds to the names of terminal tokens,
* while the second one corresonds to the names of keyword tokens.
* Both parameters are optional and can be imported from `./generated/ast.js`.
*
* Inspired by https://github.com/chevrotain/chevrotain/blob/master/examples/lexer/python_indentation/python_indentation.js
*/
export class IndentationAwareTokenBuilder<Terminals extends string = string> extends DefaultTokenBuilder {
export class IndentationAwareTokenBuilder<Terminals extends string = string, KeywordName extends string = string> extends DefaultTokenBuilder {
/**
* The stack in which all the previous matched indentation levels are stored
* to understand how deep a the next tokens are nested.
*/
protected indentationStack: number[] = [0];
readonly options: IndentationTokenBuilderOptions<Terminals>;
readonly options: IndentationTokenBuilderOptions<Terminals, KeywordName>;

/**
* The token type to be used for indentation tokens
Expand All @@ -99,10 +103,10 @@ export class IndentationAwareTokenBuilder<Terminals extends string = string> ext
*/
protected whitespaceRegExp = /[ \t]+/y;

constructor(options: Partial<IndentationTokenBuilderOptions<NoInfer<Terminals>>> = indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals>) {
constructor(options: Partial<IndentationTokenBuilderOptions<NoInfer<Terminals>, NoInfer<KeywordName>>> = indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals, KeywordName>) {
super();
this.options = {
...indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals>,
...indentationBuilderDefaultOptions as IndentationTokenBuilderOptions<Terminals, KeywordName>,
...options,
};

Expand Down
Loading