From 52dee4535867d589ddb3e4f9a9901fb64e518f61 Mon Sep 17 00:00:00 2001 From: aabounegm Date: Sun, 25 Aug 2024 20:13:10 +0000 Subject: [PATCH] =?UTF-8?q?Deploy=20preview=20for=20PR=20246=20?= =?UTF-8?q?=F0=9F=9B=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pr-previews/pr-246/docs/features/index.html | 4 +- .../docs/introduction/playground/index.html | 4 +- .../docs/introduction/showcases/index.html | 4 +- .../learn/minilogo/customizing_cli/index.html | 4 +- .../docs/learn/minilogo/generation/index.html | 4 +- .../minilogo/generation_in_the_web/index.html | 4 +- .../minilogo/langium_and_monaco/index.html | 4 +- .../docs/learn/minilogo/validation/index.html | 4 +- .../minilogo/writing_a_grammar/index.html | 4 +- .../workflow/create_validations/index.html | 4 +- .../learn/workflow/generate_ast/index.html | 4 +- .../workflow/generate_everything/index.html | 4 +- .../docs/learn/workflow/install/index.html | 4 +- .../resolve_cross_references/index.html | 4 +- .../docs/learn/workflow/scaffold/index.html | 4 +- .../learn/workflow/write_grammar/index.html | 4 +- .../docs/recipes/builtin-library/index.html | 4 +- .../docs/recipes/code-bundling/index.html | 4 +- .../pr-246/docs/recipes/formatting/index.html | 4 +- .../case-insensitive-languages/index.html | 4 +- .../index.html | 111 +++++++++++------- .../recipes/multiple-languages/index.html | 4 +- .../recipes/scoping/class-member/index.html | 4 +- .../recipes/scoping/file-based/index.html | 4 +- .../recipes/scoping/qualified-name/index.html | 4 +- .../configuration-services/index.html | 4 +- .../reference/document-lifecycle/index.html | 4 +- .../pr-246/docs/reference/glossary/index.html | 4 +- .../reference/grammar-language/index.html | 4 +- .../docs/reference/semantic-model/index.html | 4 +- pr-previews/pr-246/en.search-data.min.json | 2 +- pr-previews/pr-246/sitemap.xml | 2 +- 32 files changed, 126 insertions(+), 105 deletions(-) diff --git a/pr-previews/pr-246/docs/features/index.html b/pr-previews/pr-246/docs/features/index.html index 03e64b26..e15146ab 100644 --- a/pr-previews/pr-246/docs/features/index.html +++ b/pr-previews/pr-246/docs/features/index.html @@ -14,7 +14,7 @@ How do I get from a string to a semantic model which I can work with?"> - + +
diff --git a/pr-previews/pr-246/docs/introduction/playground/index.html b/pr-previews/pr-246/docs/introduction/playground/index.html index 3222eb5b..73814c8a 100644 --- a/pr-previews/pr-246/docs/introduction/playground/index.html +++ b/pr-previews/pr-246/docs/introduction/playground/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/introduction/showcases/index.html b/pr-previews/pr-246/docs/introduction/showcases/index.html index b5320c11..6fb98442 100644 --- a/pr-previews/pr-246/docs/introduction/showcases/index.html +++ b/pr-previews/pr-246/docs/introduction/showcases/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/learn/minilogo/customizing_cli/index.html b/pr-previews/pr-246/docs/learn/minilogo/customizing_cli/index.html index 91f08ffa..6a3d311c 100644 --- a/pr-previews/pr-246/docs/learn/minilogo/customizing_cli/index.html +++ b/pr-previews/pr-246/docs/learn/minilogo/customizing_cli/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/learn/minilogo/generation/index.html b/pr-previews/pr-246/docs/learn/minilogo/generation/index.html index ecb5cdcc..aaab8d9c 100644 --- a/pr-previews/pr-246/docs/learn/minilogo/generation/index.html +++ b/pr-previews/pr-246/docs/learn/minilogo/generation/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/learn/minilogo/generation_in_the_web/index.html b/pr-previews/pr-246/docs/learn/minilogo/generation_in_the_web/index.html index e9176500..b1e2d1c1 100644 --- a/pr-previews/pr-246/docs/learn/minilogo/generation_in_the_web/index.html +++ b/pr-previews/pr-246/docs/learn/minilogo/generation_in_the_web/index.html @@ -14,7 +14,7 @@ In this tutorial we’ll be talking about how to perform generation in the web by listening for document builder notifications. There are multiple ways to hook into Langium to utilize the generator, such as by directly exporting the generator API."> - + +
diff --git a/pr-previews/pr-246/docs/learn/minilogo/langium_and_monaco/index.html b/pr-previews/pr-246/docs/learn/minilogo/langium_and_monaco/index.html index 4a6e9f65..0400a480 100644 --- a/pr-previews/pr-246/docs/learn/minilogo/langium_and_monaco/index.html +++ b/pr-previews/pr-246/docs/learn/minilogo/langium_and_monaco/index.html @@ -14,7 +14,7 @@ In this tutorial we’ll be talking about running Langium in the web with the Monaco editor. If you’re not familiar with Monaco, it’s the editor that powers VS Code."> - + +
diff --git a/pr-previews/pr-246/docs/learn/minilogo/validation/index.html b/pr-previews/pr-246/docs/learn/minilogo/validation/index.html index 79e2e558..89571784 100644 --- a/pr-previews/pr-246/docs/learn/minilogo/validation/index.html +++ b/pr-previews/pr-246/docs/learn/minilogo/validation/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/learn/minilogo/writing_a_grammar/index.html b/pr-previews/pr-246/docs/learn/minilogo/writing_a_grammar/index.html index 91d36d8d..02d9014f 100644 --- a/pr-previews/pr-246/docs/learn/minilogo/writing_a_grammar/index.html +++ b/pr-previews/pr-246/docs/learn/minilogo/writing_a_grammar/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/learn/workflow/create_validations/index.html b/pr-previews/pr-246/docs/learn/workflow/create_validations/index.html index d4b72c6a..2a5f7de6 100644 --- a/pr-previews/pr-246/docs/learn/workflow/create_validations/index.html +++ b/pr-previews/pr-246/docs/learn/workflow/create_validations/index.html @@ -14,7 +14,7 @@ Example Let’s consider the Hello-World example from the Yeoman generator. One semantic of this language could be that each declared person must be greeted at most once."> - + +
diff --git a/pr-previews/pr-246/docs/learn/workflow/generate_ast/index.html b/pr-previews/pr-246/docs/learn/workflow/generate_ast/index.html index f7486ed8..0fc9f8c7 100644 --- a/pr-previews/pr-246/docs/learn/workflow/generate_ast/index.html +++ b/pr-previews/pr-246/docs/learn/workflow/generate_ast/index.html @@ -14,7 +14,7 @@ npm run langium:generate This line will call langium generate on your Langium project. The Langium CLI will generate the files in the src/generated directory."> - + +
diff --git a/pr-previews/pr-246/docs/learn/workflow/generate_everything/index.html b/pr-previews/pr-246/docs/learn/workflow/generate_everything/index.html index 1089f465..ab3a496f 100644 --- a/pr-previews/pr-246/docs/learn/workflow/generate_everything/index.html +++ b/pr-previews/pr-246/docs/learn/workflow/generate_everything/index.html @@ -16,7 +16,7 @@ How to write the generator? The simplest way is to generate text into a string."> - + +
diff --git a/pr-previews/pr-246/docs/learn/workflow/install/index.html b/pr-previews/pr-246/docs/learn/workflow/install/index.html index eff3a581..8bfed69b 100644 --- a/pr-previews/pr-246/docs/learn/workflow/install/index.html +++ b/pr-previews/pr-246/docs/learn/workflow/install/index.html @@ -14,7 +14,7 @@ You have a working Node environment with version 16 or higher. Install Yeoman and the Langium extension generator. npm i -g yo generator-langium For our getting started example, we would also recommend you to install the latest version of vscode."> - + +
diff --git a/pr-previews/pr-246/docs/learn/workflow/resolve_cross_references/index.html b/pr-previews/pr-246/docs/learn/workflow/resolve_cross_references/index.html index 589b59b7..47997f4e 100644 --- a/pr-previews/pr-246/docs/learn/workflow/resolve_cross_references/index.html +++ b/pr-previews/pr-246/docs/learn/workflow/resolve_cross_references/index.html @@ -16,7 +16,7 @@ person John person Jane Hello John! Hello Jane! The following syntax tree is generated by the Langium parser during the runtime."> - + +
diff --git a/pr-previews/pr-246/docs/learn/workflow/scaffold/index.html b/pr-previews/pr-246/docs/learn/workflow/scaffold/index.html index 0e440753..f8d89126 100644 --- a/pr-previews/pr-246/docs/learn/workflow/scaffold/index.html +++ b/pr-previews/pr-246/docs/learn/workflow/scaffold/index.html @@ -14,7 +14,7 @@ > yo langium ┌─────┐ ─┐ ┌───┐ │ ╶─╮ ┌─╮ ╭─╮ ╷ ╷ ╷ ┌─┬─╮ │ ,´ │ ╭─┤ │ │ │ │ │ │ │ │ │ │ │╱ ╰─ ╰─┘ ╵ ╵ ╰─┤ ╵ ╰─╯ ╵ ╵ ╵ ` ╶─╯ Welcome to Langium! This tool generates a VS Code extension with a "Hello World" language to get started quickly. The extension name is an identifier used in the extension marketplace or package registry."> - + +
diff --git a/pr-previews/pr-246/docs/learn/workflow/write_grammar/index.html b/pr-previews/pr-246/docs/learn/workflow/write_grammar/index.html index f5424435..655053d3 100644 --- a/pr-previews/pr-246/docs/learn/workflow/write_grammar/index.html +++ b/pr-previews/pr-246/docs/learn/workflow/write_grammar/index.html @@ -14,7 +14,7 @@ The grammar is defined in a .langium file. Make sure that you have installed the VS Code extension for Langium. This extension provides syntax highlighting and code completion for ."> - + +
diff --git a/pr-previews/pr-246/docs/recipes/builtin-library/index.html b/pr-previews/pr-246/docs/recipes/builtin-library/index.html index 4c9e78f2..b4240d37 100644 --- a/pr-previews/pr-246/docs/recipes/builtin-library/index.html +++ b/pr-previews/pr-246/docs/recipes/builtin-library/index.html @@ -14,7 +14,7 @@ Loading a builtin library in Langium is very simple."> - + +
diff --git a/pr-previews/pr-246/docs/recipes/code-bundling/index.html b/pr-previews/pr-246/docs/recipes/code-bundling/index.html index 07d0121d..45c8868b 100644 --- a/pr-previews/pr-246/docs/recipes/code-bundling/index.html +++ b/pr-previews/pr-246/docs/recipes/code-bundling/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/recipes/formatting/index.html b/pr-previews/pr-246/docs/recipes/formatting/index.html index 88b3964a..eeb4c589 100644 --- a/pr-previews/pr-246/docs/recipes/formatting/index.html +++ b/pr-previews/pr-246/docs/recipes/formatting/index.html @@ -14,7 +14,7 @@ import { AbstractFormatter, AstNode, Formatting } from 'langium'; export class CustomFormatter extends AbstractFormatter { protected format(node: AstNode): void { // This method is called for every AstNode in a document } } ... // Bind the class in your module export const CustomModule: Module = { lsp: { Formatter: () => new CustomFormatter() } }; The entry point for the formatter is the abstract format(AstNode) method."> - + +
diff --git a/pr-previews/pr-246/docs/recipes/lexing/case-insensitive-languages/index.html b/pr-previews/pr-246/docs/recipes/lexing/case-insensitive-languages/index.html index 68f2be8b..53b83541 100644 --- a/pr-previews/pr-246/docs/recipes/lexing/case-insensitive-languages/index.html +++ b/pr-previews/pr-246/docs/recipes/lexing/case-insensitive-languages/index.html @@ -16,7 +16,7 @@ You can make Langium’s parser completely case insensitive using the language configuration You can include case-insensitivity for specific terminal rules You can make cross references case insensitive All of these options can be enabled independent of one another."> - + +
diff --git a/pr-previews/pr-246/docs/recipes/lexing/indentation-sensitive-languages/index.html b/pr-previews/pr-246/docs/recipes/lexing/indentation-sensitive-languages/index.html index 102db648..4ab6e17d 100644 --- a/pr-previews/pr-246/docs/recipes/lexing/indentation-sensitive-languages/index.html +++ b/pr-previews/pr-246/docs/recipes/lexing/indentation-sensitive-languages/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
@@ -469,14 +469,17 @@

Indentation-sensitive languages

Some programming languages (such as Python, Haskell, and YAML) use indentation to denote nesting, as opposed to special non-whitespace tokens (such as { and } in C++/JavaScript). This can be difficult to express in the EBNF notation used for defining a language grammar in Langium, which is context-free. To achieve that, you can make use of synthetic tokens in the grammar which you would then redefine using Chevrotain in a custom token builder.

-

Starting with Langium v3.2, such token builder (and an accompanying lexer) are provided for easy plugging into your language.

+

Starting with Langium v3.2, such token builder (and an accompanying lexer) are provided for easy plugging into your language. +They work by modifying the underlying Chevrotain token generated for your indentation terminal tokens to use a custom matcher function instead that has access to more context than simple Regular Expressions, allowing it to store state and detect changes in indentation levels. This is why you should provide it with the names of the tokens you used to denote indentation: so it can override the correct tokens for your grammar.

Configuring the token builder and lexer

-

To be able to use the indendation tokens in your grammar, you first have to import and register the IndentationAwareTokenBuilder and IndentationAwareLexer services in your module as such:

+

To be able to use the indendation tokens in your grammar, you first have to import and register the IndentationAwareTokenBuilder +and IndentationAwareLexer +services in your module as such:

import { IndentationAwareTokenBuilder, IndentationAwareLexer } from 'langium';
 
 // ...
@@ -489,14 +492,64 @@ 

}; // ...

-

The IndentationAwareTokenBuilder constructor optionally accepts an object defining the names of the tokens you used to denote indentation and whitespace in your .langium grammar file. It defaults to:

+

The IndentationAwareTokenBuilder constructor optionally accepts an object defining the names of the tokens you used to denote indentation and whitespace in your .langium grammar file, as well as a list of delimiter tokens inside of which indentation should be ignored. It defaults to:

{
     indentTokenName: 'INDENT',
     dedentTokenName: 'DEDENT',
     whitespaceTokenName: 'WS',
+    ignoreIndentationDelimiters: [],
 }
 
+

+Ignoring indentation between specific tokens + + +

+

Sometimes, it is necessary to ignore any indentation token inside some expressions, such as with tuples and lists in Python. For example, in the following statement:

+
x = [
+    1,
+    2
+]
+
+

any indentation between [ and ] should be ignored.

+

To achieve similar behavior with the IndentationAwareTokenBuilder, the ignoreIndentationDelimiters option can be used. +It accepts is a list of pairs of token names (terminal or keyword) and turns off indentation token detection between each pair.

+

For example, if you construct the IndentationAwareTokenBuilder with the following options:

+
new IndentationAwareTokenBuilder({
+    ignoreIndentationDelimiters: [
+        ['[', ']'],
+        ['(', ')'],
+    ],
+})
+
+

then no indentation tokens will be emitted between either of those pairs of tokens.

+
+

+Configuration options type safety + + +

+

The IndentationAwareTokenBuilder supports generic type parameters to improve type-safety and IntelliSense of its options. +This helps detect when a token name has been mistyped or changed in the grammar. +The first generic parameter corresponds to the names of terminal tokens, while the second one corresonds to the names of keyword tokens. +Both parameters are optional and can be imported from ./generated/ast.js and used as such:

+
import { MyLanguageTerminalNames, MyLanguageKeywordNames } from './generated/ast.js';
+import { IndentationAwareTokenBuilder, IndentationAwareLexer } from 'langium';
+
+// ...
+export const HelloWorldModule: Module<HelloWorldServices, PartialLangiumServices & HelloWorldAddedServices> = {
+    parser: {
+        TokenBuilder: () => new IndentationAwareTokenBuilder<MyLanguageTerminalNames, MyLanguageKeywordNames>({
+            ignoreIndentationDelimiters: [
+                ['L_BRAC', 'R_BARC'], // <-- This typo will now cause a TypeScript error
+            ]
+        }),
+        Lexer: (services) => new IndentationAwareLexer(services),
+    },
+};
+
+

Writing the grammar @@ -526,49 +579,17 @@

The important terminals here are INDENT, DEDENT, and WS. INDENT and DEDENT are used to delimit a nested block, similar to { and } (respectively) in C-like languages. -Note that INDENT indicates an increase in indentation, not just the existence of leading whitespace, which is why in the example above we used it only at the beginning of the block, not before every Statement.

+Note that INDENT indicates an increase in indentation, not just the existence of leading whitespace, which is why in the example above we used it only at the beginning of the block, not before every Statement. +Additionally, the separation of WS from simply \s+ to [\t ]+ and [\r\n]+ is necessary because a simple \s+ will match the new line character, as well as any possible indentation after it. To ensure correct behavior, the token builder modifies the pattern of the whitespaceTokenName token to be [\t ]+, so a separate hidden token for new lines needs to be explicitly defined.

The content you choose for these 3 terminals doesn’t matter since it will overridden by IndentationAwareTokenBuilder anyway. However, you might still want to choose tokens that don’t overlap with other terminals for easier use in the playground.

-

Since the Langium playground doesn’t support overriding the default services, you cannot use indentation-aware grammar there. -However, you can get around this by defining the indentation terminals in a way that doesn’t overlap with other terminals, and then actually using them to simulate indentation.

-

For example, for the grammar above, you can write:

-
if false:
-synthetic:indent    return true
-synthetic:dedent
-else:
-synthetic:indent    if false:
-synthetic:indent        return false
-synthetic:dedent synthetic:dedent
-
-

instead of:

-
if false:
-    return true
+

With the default configuration and the grammar above, for the following code sample:

+
if true:
+    return false
 else:
-    if false:
-        return false
-
-

since all whitespace will be ignored anyway.

-

While this approach doesn’t easily scale, it can be useful for testing when defining your grammar.

-
-

-Drawbacks - - -

-

Using this token builder, all leading whitespace becomes significant, no matter the context. -This means that it will no longer be possible for an expression to span multiple lines if one of these lines starts with whitespace and an INDENT token is not explicitly allowed in that position.

-

For example, the following Python code wouldn’t parse:

-
x = [
-    1, # ERROR: Unexpected INDENT token
-]
+    if true:
+        return true
 
-

without explicitly specifying that INDENT is allowed after [.

-

This can be worked around by using multi-mode lexing.

+

the lexer will output the following sequence of tokens: if, BOOLEAN, INDENT, return, BOOLEAN, DEDENT, else, INDENT, if, BOOLEAN, INDENT, return, BOOLEAN, DEDENT, DEDENT.

diff --git a/pr-previews/pr-246/docs/recipes/multiple-languages/index.html b/pr-previews/pr-246/docs/recipes/multiple-languages/index.html index 57688323..ac4d20d2 100644 --- a/pr-previews/pr-246/docs/recipes/multiple-languages/index.html +++ b/pr-previews/pr-246/docs/recipes/multiple-languages/index.html @@ -16,7 +16,7 @@ Notice that for n independent languages, you can simply create n independent Langium projects."> - + +
diff --git a/pr-previews/pr-246/docs/recipes/scoping/class-member/index.html b/pr-previews/pr-246/docs/recipes/scoping/class-member/index.html index cf749e7a..a0e9d1de 100644 --- a/pr-previews/pr-246/docs/recipes/scoping/class-member/index.html +++ b/pr-previews/pr-246/docs/recipes/scoping/class-member/index.html @@ -14,7 +14,7 @@ class A { b: B; } class B { value: string; } function test(): void { const a = new A(); const b = a.b; // Refers to the `b` defined in class `A` const value = b.value; // Refers to the `value` defined in class `B` } Member based scoping like this requires not only a modification of the default scoping provider, but also some other prerequisites."> - + +
diff --git a/pr-previews/pr-246/docs/recipes/scoping/file-based/index.html b/pr-previews/pr-246/docs/recipes/scoping/file-based/index.html index 3d63d994..dbb158ff 100644 --- a/pr-previews/pr-246/docs/recipes/scoping/file-based/index.html +++ b/pr-previews/pr-246/docs/recipes/scoping/file-based/index.html @@ -14,7 +14,7 @@ Using export makes a symbol from the current file available for referencing from another file. Using import allows to reference symbols for a different file. To make things easier I will modify the “Hello World” example from the learning section."> - + +
diff --git a/pr-previews/pr-246/docs/recipes/scoping/qualified-name/index.html b/pr-previews/pr-246/docs/recipes/scoping/qualified-name/index.html index 8322c623..3232b914 100644 --- a/pr-previews/pr-246/docs/recipes/scoping/qualified-name/index.html +++ b/pr-previews/pr-246/docs/recipes/scoping/qualified-name/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/reference/configuration-services/index.html b/pr-previews/pr-246/docs/reference/configuration-services/index.html index d82ded2d..e440901a 100644 --- a/pr-previews/pr-246/docs/reference/configuration-services/index.html +++ b/pr-previews/pr-246/docs/reference/configuration-services/index.html @@ -16,7 +16,7 @@ Shared Services The shared services are services that are shared across all Langium languages. In many applications there is only one Langium language, but the overall structure of the services is the same."> - + +
diff --git a/pr-previews/pr-246/docs/reference/document-lifecycle/index.html b/pr-previews/pr-246/docs/reference/document-lifecycle/index.html index 3d1756bf..813785d1 100644 --- a/pr-previews/pr-246/docs/reference/document-lifecycle/index.html +++ b/pr-previews/pr-246/docs/reference/document-lifecycle/index.html @@ -16,7 +16,7 @@ Parsed when an AST has been generated from the content of the document."> - + +
diff --git a/pr-previews/pr-246/docs/reference/glossary/index.html b/pr-previews/pr-246/docs/reference/glossary/index.html index 0aeaf268..f6075d7f 100644 --- a/pr-previews/pr-246/docs/reference/glossary/index.html +++ b/pr-previews/pr-246/docs/reference/glossary/index.html @@ -16,7 +16,7 @@ document: An abstract term to refer to a text file on your file system or an open editor document in your IDE."> - + +
diff --git a/pr-previews/pr-246/docs/reference/grammar-language/index.html b/pr-previews/pr-246/docs/reference/grammar-language/index.html index 0d87451c..d0f63705 100644 --- a/pr-previews/pr-246/docs/reference/grammar-language/index.html +++ b/pr-previews/pr-246/docs/reference/grammar-language/index.html @@ -12,7 +12,7 @@ - + @@ -30,7 +30,7 @@ - +
diff --git a/pr-previews/pr-246/docs/reference/semantic-model/index.html b/pr-previews/pr-246/docs/reference/semantic-model/index.html index a32fdd3d..d03b0a92 100644 --- a/pr-previews/pr-246/docs/reference/semantic-model/index.html +++ b/pr-previews/pr-246/docs/reference/semantic-model/index.html @@ -14,7 +14,7 @@ Inference is the default behavior in Langium. During the generation of the semantic model types, Langium infers the possible types directly from the grammar rules."> - + +
diff --git a/pr-previews/pr-246/en.search-data.min.json b/pr-previews/pr-246/en.search-data.min.json index fbf5809e..b8cecbf6 100644 --- a/pr-previews/pr-246/en.search-data.min.json +++ b/pr-previews/pr-246/en.search-data.min.json @@ -1 +1 @@ -[{"id":0,"href":"/docs/introduction/","title":"What is Langium?","parent":"Documentation","content":"Langium is an open source language engineering tool with first-class support for the Language Server Protocol, written in TypeScript and running in Node.js.\nWhere to go from here? Features If you need a more detailed list of Langium features, you can find them in the features section.\nTry it out If you want to see Langium in action, you can follow the showcases or even the playground.\nLearn Langium If you are convinced by Langium and want to learn more about it, you can start with the learn section.\nMore details If you are looking for more details about Langium, you can find them in the reference section.\nIf you are searching for a certain guide or recipe, you can find them in the recipes section.\n"},{"id":1,"href":"/docs/learn/minilogo/validation/","title":"Validation","parent":"Minilogo tutorial","content":" Overview The Validation Registry Finding Nodes to Validate Registering Validations In this tutorial, we will be talking about implementing validation for your Langium-based language. We recommend first reading the previous tutorial about writing a grammar, as we will assume you\u0026rsquo;re familiar with the topics covered there. We\u0026rsquo;ll also assume that you have a working language to add validation to, so double check that npm run langium:generate succeeds without errors before you proceed.\nFor this tutorial, we\u0026rsquo;ll be implementing validation for the MiniLogo language, but you can use your own language to follow along as well.\nOverview Adding validation is an important step to building a language, as there are often invalid cases that cannot be filtered out through your grammar alone.\nConsider the case of having unique names for identifiers. In MiniLogo we have definitions with names, and we also have parameters that are identified by name. One problem here is if we have several definitions that share the same name. We could also have a similar problem with parameters, where perhaps the same name is used multiple times in the same definition. In the second case, this is most certainly undesirable, but in the first it depends on how you want your language to handle redefinitions.\nLet\u0026rsquo;s consider the case where you want to allow redeclaring a previous definition. This opens the door to allowing redeclaring or shadowing of definitions. If you ever wanted to extend your language down the road, such as by adding the ability to import other programs (along with their definitions) then you might consider allowing a definition to be redefined. However, it could also lead to unintended redeclarations that may be harder to track down. Ultimately, this choice depends on the desired semantics for your language, and is something you should consider carefully.\nIn this example we\u0026rsquo;re going to disallow names that are non-unique for definitions, and we\u0026rsquo;ll be doing the same for arguments of a definition as well.\nThe Validation Registry In order to express these constraints, we need to modify our language\u0026rsquo;s validator. By default, this can be found in src/language/YOUR-LANGUAGE-validator.ts; with a name that corresponds to your language. This file begins with a validation registry that extends the default validation registry. The validation registry allows us to register validation checks for our language.\nThe constructor for the registry is of particular interest, as it allows associating validation functions with specific nodes in your AST. Here you can see an example of the constructor below for the default hello world language from the yeoman generator.\n/** * Registry for validation checks. */ export class HelloWorldValidationRegistry extends ValidationRegistry { constructor(services: HelloWorldServices) { super(services); const validator = services.validation.HelloWorldValidator; const checks: ValidationChecks\u0026lt;HelloWorldAstType\u0026gt; = { // we want to add checks here... Person: validator.checkPersonStartsWithCapital }; this.register(checks, validator); } } From this example, we have a single validation for the Person node.\nPerson: validator.checkPersonStartsWithCapital Before we changed our grammar in the last tutorial, the Person node corresponded with a parser rule named Person. Similarly, most nodes that we can validate will share the name of the parser rule that instantiates them. However, there are a couple cases where this is different:\n when Rule infers AnotherName (or uses return), the node\u0026rsquo;s type will be AnotherName when the body of a parser rule has an action (like {AnotherName}, possibly starting with infer) this new name will exist instead for this part of the rule body Finding Nodes to Validate With this in mind, we can look back at our grammar that we\u0026rsquo;ve written for MiniLogo (from the last tutorial), and find the parser rules that refer to the nodes we want to validate. For this language we have a pair of cases to check, as mentioned above:\n Validate that definitions have unique names in a Model Validate that arguments have unique names in a Definition In order to perform a validation, we need to know the type of that node to validate. Beyond checking our grammar to find this, we can also check the semantic model (akin to the abstract syntax) of our language. This was generated while running npm run langium:generate, and is located in src/language/generated/ast.ts. Peeking into this model, we can see that our rule for Model was written like so:\nentry Model: (stmts+=Stmt | defs+=Def)*; which produces the following node type in our semantic model:\nexport interface Model extends AstNode { defs: Array\u0026lt;Def\u0026gt; stmts: Array\u0026lt;Stmt\u0026gt; } Registering Validations So, we can register a validation on all nodes of type Model (which should be just the root), like so. Note the import coming from the generated file, which contains the definitions that compose our semantic model. The name ast.ts reflects it\u0026rsquo;s usage as identifying node types that constitute an AST in our language (akin to an abstract syntax).\nimport { Model } from './generated/ast'; ... const checks: ValidationChecks\u0026lt;HelloWorldAstType\u0026gt; = { Model: (m: Model, accept: ValidationAcceptor) =\u0026gt; { // and validate the model 'm' here } }; We also have a perfectly good validator class that\u0026rsquo;s just below this part of the file that we can use, but it\u0026rsquo;s still setup to perform validation on the old Person node. We can safely remove the old function, add our custom validation there, and associate it back with our validation registry checks.\nThe updated validator class looks like so:\n/** * Implementation of custom validations. */ export class HelloWorldValidator { // our new validation function for defs checkUniqueDefs(model: Model, accept: ValidationAcceptor): void { // create a set of visited functions // and report an error when we see one we've already seen const reported = new Set(); model.defs.forEach(d =\u0026gt; { if (reported.has(d.name)) { accept('error', `Def has non-unique name '${d.name}'.`, {node: d, property: 'name'}); } reported.add(d.name); }); } } To call this validator in our registry, we can modify the check that is listed in our registry like so (removing the previously written lambda/arrow function).\nconst checks: ValidationChecks\u0026lt;MiniLogoAstType\u0026gt; = { Model: validator.checkUniqueDefs, }; Great! Now we have a simple validation in place to guard against duplicate definitions in MiniLogo.\nNow that we\u0026rsquo;ve shown how this can be done, we can implement this for parameters as well. Looking at our grammar, we can see params are contained as part of a Definition, so we\u0026rsquo;ll register validation for Definition nodes and report if any parameter are duplicated.\nconst checks: ValidationChecks\u0026lt;MiniLogoAstType\u0026gt; = { Model: validator.checkUniqueDefs, Def: validator.checkUniqueParams }; And we can define this new function in our validator class, which is very close in structure to our first function.\ncheckUniqueParams(def: Def, accept: ValidationAcceptor): void { const reported = new Set(); def.params.forEach(p =\u0026gt; { if (reported.has(p.name)) { accept('error', `Param ${p.name} is non-unique for Def '${def.name}'`, {node: p, property: 'name'}); } reported.add(p.name); }); } Although we\u0026rsquo;ve only implemented a pair of validations, hopefully this demonstrates the flexibility of the validator API. The validator can help enforce constraints or features of your language, and ensure that your programs are correct. You could also explore more customized validations for specific cases, perhaps where a parameter and a definition share the same name \u0026ndash; which is not handled here. So long as you can identify the AST node type that you need to validate, you can implement the logic here.\nThat\u0026rsquo;s all for validation. Next we\u0026rsquo;ll be talking about how we can customize our CLI.\n"},{"id":2,"href":"/docs/learn/minilogo/customizing_cli/","title":"Customizing the CLI","parent":"Minilogo tutorial","content":" Overview About the Command Line Interface Adding a Parse and Validate Action Building and Running the CLI In this tutorial, we\u0026rsquo;ll be talking about customizing the command line interface for your language. We recommend reading through previous tutorials about writing a grammar and validation. Once you have a good grasp on those concepts, then you should be all set for setting up a CLI. We will also continue to use the MiniLogo language as a motivating example.\nOverview Once you have a grammar and some validation in place, you may want to start configuring a basic CLI for your language. This is an important step where your language begins to become more accessible to other programs. Having a CLI for your language is a powerful way to access functionality that is expressed through Langium, but without having to interact directly with Langium. A well designed CLI can be used by other applications to provide advanced language features, without making those other applications unnecessarily complex.\nAbout the Command Line Interface If you\u0026rsquo;ve been using a language built with the yeoman generator for Langium, you should be able to find your CLI defined in src/cli/index.ts. This file describes the general layout of your languages\u0026rsquo;s command line interface, and lets you register specific commands. By default, you\u0026rsquo;re provided with a single command for your CLI, the generate command.\nMuch like the command implies, it allows you to take a program written in your DSL, parse it, and traverse the AST to produce some sort of generated output. We won\u0026rsquo;t talk about the generator itself in this tutorial (that will come in the next tutorial on generation). Instead we\u0026rsquo;ll focus on a simple example for parsing and validating a program, which allows learning more about the CLI itself.\nAdding a Parse and Validate Action To start, let\u0026rsquo;s write up a custom action to allow us to parse and validate a program in our language. If we\u0026rsquo;ve already written up a grammar, and already added some basic validation, then all we have to do is hookup the CLI action here to get this to work. This action will help us verify that our MiniLogo programs have no syntax errors, and also pass our custom validations.\nFeel free to keep (or remove) the existing generate action, as we won\u0026rsquo;t be setting that up until the next tutorial. We\u0026rsquo;ll be sure to present example code for that as well, so don\u0026rsquo;t worry about deleting functions that you\u0026rsquo;ll need later.\nIn order to add our new command, we need to register it in the default export for the index.ts file. In this function, there\u0026rsquo;s a command object, which is a collection of commands for our CLI. Let\u0026rsquo;s call our command parseAndValidate, and give it some extra details, like:\n arguments: Indicating that it takes a single file a description detailing what this action does an action that performs the actual parsing and validation We could also add additional options, but we won\u0026rsquo;t be doing that for this action.\nWe can register our parse and validate action like so:\nprogram .command('parseAndValidate') .argument('\u0026lt;file\u0026gt;', 'Source file to parse \u0026amp; validate (ending in ${fileExtensions})') .description('Indicates where a program parses \u0026amp; validates successfully, but produces no output code') .action(parseAndValidate) // we'll need to implement this function Finally, we need to implement the parseAndValidate function itself. This will allow us to be able to parse \u0026amp; validate our programs, but without producing any output. We just want to know when our program is \u0026lsquo;correct\u0026rsquo; by the constraints of our language implementation.\nUsing parts of the existing generateAction function we got by default, we can do our parsing \u0026amp; validation without having to write too much new code at all.\nimport { extractDocument } from './cli-util'; ... /** * Parse and validate a program written in our language. * Verifies that no lexer or parser errors occur. * Implicitly also checks for validation errors while extracting the document * * @param fileName Program to validate */ export const parseAndValidate = async (fileName: string): Promise\u0026lt;void\u0026gt; =\u0026gt; { // retrieve the services for our language const services = createHelloWorldServices(NodeFileSystem).HelloWorld; // extract a document for our program const document = await extractDocument(fileName, services); // extract the parse result details const parseResult = document.parseResult; // verify no lexer, parser, or general diagnostic errors show up if (parseResult.lexerErrors.length === 0 \u0026amp;\u0026amp; parseResult.parserErrors.length === 0 ) { console.log(chalk.green(`Parsed and validated ${fileName} successfully!`)); } else { console.log(chalk.red(`Failed to parse and validate ${fileName}!`)); } }; Some amount of the contents for our custom action are shared with the generateAction function. This isn\u0026rsquo;t surprising given that we still need to set up our language\u0026rsquo;s services.\nBuilding and Running the CLI Now that we have our new action in place, we\u0026rsquo;ll want to build and verify the CLI works for a program written in our language.\nIf you\u0026rsquo;ve been following along from the hello world example produced by the yeoman generator, then you\u0026rsquo;ll have some errors at this point that need to be corrected as follows.\nIf you have errors with regards to any imports of HelloWorld..., this is likely related to your grammar NAME in your langium file being something different than the original HelloWorld. The name of these imports will change based on your grammar file\u0026rsquo;s name after npm run langium:generate, so in each case you should be able to change each import to MyLanguage... to resolve the issue.\nYou may also have build errors related to the generator logic, especially if it was written for the hello-world semantic model. For now, we can comment out the generator function\u0026rsquo;s contents in src/cli/generator.ts, return an empty string, and comment/remove the imports to make Typescript happy. In the next tutorial, we\u0026rsquo;ll come back to it and implement an initial version of a generator for our language.\nIf you have any other errors while building, double check that the exported \u0026amp; imported names match up. More often than note there\u0026rsquo;s a small discrepancy here, especially when you use a different language name than the default.\nAt this point, you should be able to run the following with no errors from the project root.\nnpm run langium:generate npm run build If everything looks good, you should have access to the CLI in /bin/cli. We also need a program we can test and validate. For the MiniLogo language we have a simple example program that we can validate:\ndef test() { pen(down) move(10,10) pen(up) } test() We\u0026rsquo;ll save this under our project root as test.logo, and we can test that it\u0026rsquo;s correct using our CLI like so:\n./bin/cli parseAndValidate test.logo NOTE: The langium-minilogo repo places test.logo in an examples subdirectory under the project root. So, for that case, the CLI usage would be:\n./bin/cli parseAndValidate examples/test.logo It does not matter where you place your .logo files. Organize them as you see fit.\nWe should get an output indicating that there were no errors with our program.\n Parsed and validated test.logo successfully!\n If you get a message that indicates you need to choose a file with a given extension, you\u0026rsquo;ll want to go back and update your list of extensions in your package.json and your langium-config.json in your project root. Then you\u0026rsquo;ll need to run npm run langium:generate followed by npm run build to get that change incorporated into your CLI.\nIf we wanted to verify that we can get errors, we can modify our program a bit to include a duplicate definition (which we should have a validation for, as we implemented in the validation tutorial).\ndef test() { pen(down) move(10,10) pen(up) } // redefinition of test, should 'not' validate def test() { pen(up) } test() Running the CLI again should show that this program has an error, and better yet it will show us exactly the error in question.\n There are validation errors:\nline 7: Def has non-unique name \u0026lsquo;test\u0026rsquo;. [test]\n This is perfect, as we didn\u0026rsquo;t have to implement too much more logic to get validation in our CLI. Since we already hooked up our validation service before, the CLI just handles the interaction with an external program. This separation of concerns makes for a very flexible implementation that is easy to adapt over time.\nThat sums up how to add basic CLI functionality. In the next tutorial, we will be talking about generation in more detail, specifically about techniques that you can use to traverse your AST and produce a generated output.\n"},{"id":3,"href":"/docs/learn/minilogo/generation/","title":"Generation","parent":"Minilogo tutorial","content":" Setting up the Generator API Deciding Output to Generate Generating from Statements Writing an Expression Evaluator Generating from Statements with the Evaluator Connecting the Generator to the CLI In this tutorial we\u0026rsquo;ll be showing how to implement basic generation for your language. When we\u0026rsquo;re talking about generation, we\u0026rsquo;re talking about transforming an AST from your Langium-based language into some output target. This could be another language of similar functionality (transpilation), a lower level language (compilation), or generating some artifacts/data that will be consumed by another application. If you haven\u0026rsquo;t already, make sure to go back over and check out the tutorial on customizing your CLI, as it touches on details about how to implement endpoints for your application (like generation).\nPer usual, we\u0026rsquo;ll be using the MiniLogo language as a motivating example here.\nWe\u0026rsquo;ll be describing how to write a simple MiniLogo generator to output drawing a JSON array of drawing instructions. This tutorial will give you a general idea of how you can traverse an AST to produce generated output.\nSetting up the Generator API To write the generator, we\u0026rsquo;re going to work in the src/cli/generator.ts file. If you\u0026rsquo;re using a language produced by the yeoman generator for Langium, then you should already have a function in here called generateJavascript. For MiniLogo, we\u0026rsquo;ll change this to generateCommands, which will generate drawing commands to be handled later. We will also change the function signature to take a Model, and return a string of the generated file path.\n// import the 'Model' type from our semantic model import { Model } from '../language/generated/ast.ts'; export function generateCommands(mode: Model, filePath: string, destination: string | undefined): string { // ... } This function will serve as our generator endpoint. All MiniLogo programs that we want to generate from will be processed from here.\nNow, our objective is to take a program like this:\ndef test() { pen(down) move(10,10) pen(up) } test() And translate it into a generated JSON-like list of drawing commands like so:\n[ { cmd: 'penDown' }, { cmd: 'move', x: 10, y: 10 }, { cmd: 'penUp' } ] Deciding Output to Generate Notice that there\u0026rsquo;s no notion of macros, definitions, for loops, or other constructs that are present in MiniLogo. We only need to produce a generated output that contains information relevant to our semantic domain. If you remember this term from the very beginning of writing our grammar, then you\u0026rsquo;ll likely also remember that our semantic domain is a series of transformations performed on a drawing context. With this in mind, we can safely reduce a MiniLogo program to such a series of transformations on the pen, position, and color. We don\u0026rsquo;t need to include anything else. In this context, you could think of it like a form of evaluation.\nTo be able to produce this output, we need to be able to traverse through all nodes of our AST. We can perform such a traversal by creating functions that map from our AST to our generated output. This is as simple as accessing the properties stored on a node, and writing functions to process the types of those properties such that generation is defined for every type of node in your AST.\nAn example of this would be defining a generateStatements function that takes a list of Statements, and produces some generated result from those statements. Anytime we were working with a node that contained statements, we could invoke this function on it, and return the results.\nWe can add this function to our generateCommands function to begin generation from the top-level statements in our Model.\nexport function generateCommands(mode: Model, filePath: string, destination: string | undefined): string { const result: Object[] = generateStatements(model.stmts); } ... function generateStatements(stmts: Stmt[]): Object[] { ... } As a side note, to support generation with string content (like for generating file/program contents) we\u0026rsquo;ve added a CompositeGeneratorNode that is designed to help collect generated output. This is located in our cli-util.ts, and provides more structure with constructing textual outputs, without resorting to direct manipulation of strings.\nGenerating from Statements Now, let\u0026rsquo;s expand on generateStatements. From our grammar, there are 5 types of statements:\n pen move macro for color We we want to expand our function to handle each of these cases. This is easy to do using some special isTYPE functions made available from our semantic model. These are automatically generated from our grammar, and allow us to verify the type of a node from our AST at runtime.\nimport { isPen, isMove, isMacro, isFor, isColor } from '../language/generated/ast'; ... if(isPen(stmt)) { ... } else if(isMove(stmt)) { ... } else if(isMacro(stmt)) { ... } else if(isFor(stmt)) { ... } else if (isColor(stmt)) { ... } For isPen we have the easiest case where we could emit something like so:\n{ cmd: stmt.mode === 'up' ? 'penUp' : 'penDown' }; However, for the rest of the statements, we need to be able to evaluate expressions first.\nWriting an Expression Evaluator We need to evaluate our expressions to final values for statements, as we don\u0026rsquo;t want to emit literal expressions like 1 + x * 5; but rather their evaluated result. We\u0026rsquo;ll handle this in a new evalExprWithEnv function.\n// map of names to values type MiniLogoGenEnv = Map\u0026lt;string,number\u0026gt;; // evalutes exprs in the context of an env function evalExprWithEnv(e: Expr, env: MiniLogoGenEnv): number { ... } As we mentioned before, in order to perform generation in this context, we\u0026rsquo;re also writing an evaluator for our language. Thankfully, MiniLogo is relatively simple, especially since it doesn\u0026rsquo;t have variables outside of definitions and for loops.\nSo let\u0026rsquo;s write our expression evaluator. Assuming we have the function declaration from above, our first case to be added into that function is for Lit. Again, this is imported from our generated semantic model.\nif(isLit(e)) { return e.val; } Pretty easy. A literal returns its value. Now for references.\nif(isRef(e)) { const v = env.get(e.val.ref?.name ?? ''); if (v !== undefined) { return v; } // handle the error case... } Since we have cross references, we can retrieve the node in question (ref), and check if we have a value stored for its name. In the case that we do, we return the value, otherwise we would want to report an error.\nFor binary expressions, we can invoke evalExprWithEnv recursively on the left \u0026amp; right operands. Since we used actions to restructure our semantic model a bit, we have access to this isBinExpr function to find BinExpr nodes. It\u0026rsquo;s quite convenient, since we can now handle all 4 cases at once.\nif(isBinExpr(e)) { let opval = e.op; let v1 = evalExprWithEnv(e.e1, env); let v2 = evalExprWithEnv(e.e2, env); switch(opval) { case '+': return v1 + v2; case '-': return v1 - v2; case '*': return v1 * v2; case '/': return v1 / v2; default: throw new Error(`Unrecognized bin op passed: ${opval}`); } } For negated expressions, it\u0026rsquo;s also fairly straight forward. We invert whatever value we would get normally.\nif (isNegExpr(e)) { return -1 * evalExprWithEnv(e.ne, env); } Lastly, for groups we extract the \u0026lsquo;grouped\u0026rsquo; value and evaluate it.\nif(isGroup(e)) { return evalExprWithEnv(e.ge, env); } Lastly, it\u0026rsquo;s always a good measure to sanity check that you aren\u0026rsquo;t missing a case. Throwing an error is often much more desirable than having something silently fail, and produce strange results on generation. This means adding a default for your switches, and a final else clause to handle unexpected nodes.\nWith all those cases above, we can combine them into a series of else if clauses to have a clean case-by-case check.\nGenerating from Statements with the Evaluator Now that we can evaluate expressions, we can handle the rest of our statement cases. In order to incorporate our env, we\u0026rsquo;ll also want to update our generateStatements function, and create a new evalStmt function to help out.\nfunction generateStatements(stmts: Stmt[]): Object[] { // minilogo evaluation env let env : MiniLogoGenEnv = new Map\u0026lt;string,number\u0026gt;(); // generate mini logo cmds off of statements return stmts.flatMap(s =\u0026gt; evalStmt(s,env)).filter(e =\u0026gt; e !== undefined) as Object[]; } /** * Takes an statement, an environment, and produces a list of generated objects */ function evalStmt(stmt: Stmt, env: MiniLogoGenEnv) : (Object | undefined)[] { if (isPen(stmt)) { return [{ cmd: stmt.mode === 'up' ? 'penUp' : 'penDown' }]; } // ... the rest of our cases will follow ... } This gives us an env that can be updated by evaluating each statement, and persist from one to another; which is what we want for MiniLogo. Now, for isMove, we just need to evaluate the x \u0026amp; y arguments to their values using this env\nif (isMove(stmt)) { return [{ cmd: 'move', x: evalExprWithEnv(stmt.ex, env), y: evalExprWithEnv(stmt.ey, env) }]; } For isMacro we need to save and restore our execution environment after the macro has been evaluated. We can do this by generating a new env, setting the parameters from the arguments, and passing that new env to the macro\u0026rsquo;s statements instead.\nKeep in mind arguments need to be evaluated before setting them into the env, and we want to carefully do this using the original env, not the new one being constructed. If there are names that already exist, and would be shadowed by this macro, then it could change the result of the macro (or even the value of subsequent arguments).\n// get the cross ref const macro: Def = stmt.def.ref as Def; // copied env let macroEnv = new Map(env); // produce pairs of string \u0026amp; exprs, using a tmp env // this is important to avoid mixing of params that are only present in the tmp env w/ our actual env let tmpEnv = new Map\u0026lt;string, number\u0026gt;(); // evalute args independently, staying out of the environment macro.params.map((elm, idx) =\u0026gt; tmpEnv.set(elm.name, evalExprWithEnv(stmt.args[idx], macroEnv))); // add new params into our copied env tmpEnv.forEach((v,k) =\u0026gt; macroEnv.set(k,v)); // evaluate all statements under this macro return macro.body.flatMap(s =\u0026gt; evalStmt(s, macroEnv)); For isFor, we also use a copied env, so that we don\u0026rsquo;t alter the original env outside of the loop.\n// compute for loop bounds // start let vi = evalExprWithEnv(stmt.e1, env); // end let ve = evalExprWithEnv(stmt.e2, env); let results : (Object | undefined)[] = []; // perform loop const loopEnv = new Map(env); while(vi \u0026lt; ve) { loopEnv.set(stmt.var.name, vi++); stmt.body.forEach(s =\u0026gt; { results = results.concat(evalStmt(s, new Map(loopEnv))); }); } return results; Lastly, to handle isColor, check whether one set of properties is defined or the other (like color vs. any of the r,g,b properties).\nif (stmt.color) { // literal color text or hex return [{cmd:'color', color: stmt.color}] } else { // color as rgb const r = evalExprWithEnv(stmt.r!, env); const g = evalExprWithEnv(stmt.g!, env); const b = evalExprWithEnv(stmt.b!, env); return [{cmd:'color', r, g, b}] } With that, we\u0026rsquo;re effectively done writing the core of our generator! The last changes to make are to write the output to a file, and to connect what we\u0026rsquo;ve written here with a command in our CLI.\nConnecting the Generator to the CLI To do this, we can go back to the top of our generator, and update the generateCommands function to write the generated result to a file. Most of the structure here is carried over from the original code first setup by the yeoman generator, which makes it convenient to add in.\nexport function generateCommands(model: Model, filePath: string, destination: string | undefined): string { const data = extractDestinationAndName(filePath, destination); const generatedFilePath = `${path.join(data.destination, data.name)}.json`; if (!fs.existsSync(data.destination)) { fs.mkdirSync(data.destination, { recursive: true }); } const result = generateStatements(model.stmts); fs.writeFileSync(generatedFilePath, JSON.stringify(result, undefined, 2)); return generatedFilePath; } And to connect it to the CLI, which is setup in src/cli/index.ts, we can register it by slightly modifying the existing generateAction endpoint that was there by default.\nexport const generateAction = async (fileName: string, opts: GenerateOptions): Promise\u0026lt;void\u0026gt; =\u0026gt; { const services = createHelloWorldServices(NodeFileSystem).HelloWorld; const model = await extractAstNode\u0026lt;Model\u0026gt;(fileName, services); // now with 'generateCommands' instead const generatedFilePath = generateCommands(model, fileName, opts.destination); console.log(chalk.green(`MiniLogo commands generated successfully: ${generatedFilePath}`)); }; Towards the bottom of the same file, we\u0026rsquo;ll modify the description for the logic that registers this action:\nprogram .command('generate') .argument('\u0026lt;file\u0026gt;', `source file (possible file extensions: ${fileExtensions})`) .option('-d, --destination \u0026lt;dir\u0026gt;', 'destination directory of generating') // new description .description('generates MiniLogo commands that can be used as simple drawing instructions') .action(generateAction); And that\u0026rsquo;s it. Now we can run the following to generate commands from a MiniLogo file of our choice.\nnpm run build ./bin/cli generate test.logo This should produce generated/test.json, which contains a JSON array of the drawing commands generated by our program. For the following example program:\ndef test() { pen(down) move(10,10) pen(up) } test() our JSON output should be:\n[ { \u0026quot;cmd\u0026quot;: \u0026quot;penDown\u0026quot; }, { \u0026quot;cmd\u0026quot;: \u0026quot;move\u0026quot;, \u0026quot;x\u0026quot;: 10, \u0026quot;y\u0026quot;: 10 }, { \u0026quot;cmd\u0026quot;: \u0026quot;penUp\u0026quot; } ] If you\u0026rsquo;re looking at the implementation of MiniLogo that we\u0026rsquo;ve already written in the Langium organization on Github, you may notice that the program and output there are slightly different. This interpretation of MiniLogo has gone through some iterations, and so there are some slight differences here and there. What\u0026rsquo;s most important is that your version produces the generated output that you expect.\nWe could continue to extend on this with new features, and generate new sorts of output using a given input language. In this tutorial, we\u0026rsquo;re able to take a MiniLogo program and convert it into some simple JSON drawing instructions that can be consumed by another program. This opens the door for us to write such a program in another language, such as Python or Javascript, and draw with these results. In later tutorials, we\u0026rsquo;ll be talking about how to run Langium in the web with generation, so that we can immediately verify our results by drawing on an HTML5 canvas.\nWe recommend that you next read the guide on bundling your language with Langium to reduce its size, before moving onto the tutorial about bundling an extension. This is an important step before deployment as an extension for VSCode, and also if you\u0026rsquo;re planning to later deploy your language in the web.\n"},{"id":4,"href":"/docs/learn/minilogo/building_an_extension/","title":"Building an Extension","parent":"Minilogo tutorial","content":" Setting up the Scripts Generate an Extension Installing Adding an Icon Conclusion In this tutorial we\u0026rsquo;ll be going over how to build a VSIX extension (VSCode extension) for your Langium-based language. This will allow providing LSP support in VSCode for your language. We\u0026rsquo;ll assume that you\u0026rsquo;ve already looked at the previous tutorial, and have had time to read the guide on bundling, so that you\u0026rsquo;re ready to build an extension. At this point we assume that your language is also working, and there are no issues running npm run langium:generate or npm run build. If there are, you\u0026rsquo;ll want to correct those first.\nSetting up the Scripts To get started, you\u0026rsquo;ll want to have a language expressed in Langium, such as Lox or MiniLogo. If you have been following along with these tutorials, you should already have something ready. If you don\u0026rsquo;t you can also use the default language generated by the yeoman generator for Langium, presented in the workflow section.\nRegardless of what you\u0026rsquo;re working with, you\u0026rsquo;ll want to make sure you have the following scripts in your package.json.\n{ ... \u0026quot;vscode:prepublish\u0026quot;: \u0026quot;npm run esbuild-base -- --minify \u0026amp;\u0026amp; npm run lint\u0026quot;, \u0026quot;esbuild-base\u0026quot;: \u0026quot;esbuild ./src/extension/main.ts --bundle --outfile=out/main.js --external:vscode --format=cjs --platform=node\u0026quot;, ... } The esbuild-base script is particularly important, as it will be constructing the extension itself.\nYou\u0026rsquo;ll also need to install esbuild if you haven\u0026rsquo;t already.\nnpm i --save-dev esbuild Generate an Extension At this point we\u0026rsquo;re ready to generate an extension. We need the VS Code Extension Manager (vsce) to do this, so make sure to download this from npm via npm install -g @vscode/vsce (or install locally, as per your preference). Once you have that installed, you can invoke it like so from the root of your project.\nvsce package You should now see a VSIX extension file in the root of your project. The name of this file will correspond with the name and version properties listed in your package.json. For MiniLogo, this produced minilogo-0.1.0.vsix.\n Installing For installing the extension, you can right click the extension file, and select \u0026ldquo;Install VSIX Extension\u0026rdquo; at the bottom of the list.\n Indication that VSIX extension has been installed You should see a small indication at the bottom right of your screen that your VSIX extension has been successfully installed, like so:\nYou can verify this by going to your extensions tab and looking at the enabled extensions, where you should find the name of your language (again corresponding to the name property in your package.json).\n Assuming the extension is enabled and working correctly, you can open any file that ends in the extensions registered for your language, and you should immediately observe the syntax highlighting kicking in. Interaction with your language should show that syntax errors are recognized, and other LSP functionalities are working as intended (such as renaming of symbols).\nAdding an Icon You may notice that your extension may not have an icon to start with. This is a small thing that we can quickly fix. This is as simple as adding a small PNG icon somewhere in your project repo, such as the root. You\u0026rsquo;ll also want to set the icon property in your package.json with the relative path to this icon.\n{ ... \u0026quot;name\u0026quot;: \u0026quot;minilogo\u0026quot;, \u0026quot;displayName\u0026quot;: \u0026quot;minilogo\u0026quot;, \u0026quot;icon\u0026quot;: \u0026quot;icon.png\u0026quot;, \u0026quot;publisher\u0026quot;: \u0026quot;TypeFox\u0026quot;, ... } In our example, we\u0026rsquo;re using a simple turtle icon from onlinewebfonts as a placeholder.\n When you regenerate your extension \u0026amp; reinstall it, you should get an icon that is the same as the one that you packaged it with.\n Conclusion And that\u0026rsquo;s it, at this point you have an extension for your language that you can use for development. After some testing, and improvements, you could even publish it!\nAs a quick aside, it\u0026rsquo;s important to keep the extensions that your language recognizes synchronized in both your package.json and your langium-config.json. If you do make changes to your extensions, it\u0026rsquo;s a good idea to double check that these are both synced up, and to do a full rebuild to get those changes into your extension.\nAnd that\u0026rsquo;s it for building an extension. In the next tutorial, we\u0026rsquo;ll be setting up Langium + Monaco in the web.\n"},{"id":5,"href":"/docs/learn/minilogo/langium_and_monaco/","title":"Langium + Monaco Editor","parent":"Minilogo tutorial","content":" Technologies You\u0026rsquo;ll Need Getting your Language Setup for the Web Factoring out File System Dependencies Setting up Monaco Setting up a Static Page Serving via NodeJS Updated on Oct. 4th, 2023 for usage with monaco-editor-wrapper 3.1.0 \u0026amp; above, as well as Langium 2.0.2\nIn this tutorial we\u0026rsquo;ll be talking about running Langium in the web with the Monaco editor. If you\u0026rsquo;re not familiar with Monaco, it\u0026rsquo;s the editor that powers VS Code. We\u0026rsquo;re quite fond of it at TypeFox, so we\u0026rsquo;ve taken the time to write up this tutorial to explain how to integrate Langium in the web with Monaco, no backend required.\nAlthough we\u0026rsquo;re using Monaco in this tutorial, that does not mean that you cannot use another code editor of your choice. For example, you can use Code Mirror with Langium as well. Generally, if an editor has LSP support, it is very likely you can integrate it easily with Langium, since it\u0026rsquo;s LSP compatible.\nWithout further ado, let\u0026rsquo;s jump into getting your web-based Langium experience setup!\nTechnologies You\u0026rsquo;ll Need Langium 2.0.2 or greater Monaco Editor Wrapper 3.1.0 or greater ESBuild 0.18.20 or greater Getting your Language Setup for the Web To begin, you\u0026rsquo;re going to need a Langium-based language to work with. We have already written MiniLogo in Langium as an example for deploying a language in the web. However, if you\u0026rsquo;ve been following along with these tutorials so far, you should be ready to move your own language into a web-based context.\nPer usual, we\u0026rsquo;ll be using MiniLogo as the motivating example here.\nFactoring out File System Dependencies In order to build for the browser, we need to create a bundle that is free of any browser-incompatible modules. To do this, let\u0026rsquo;s create a new entry point for our language server in src/language-server/main-browser.ts. This will mirror the regular entry point that we use to build already, but will target a browser-based context instead. We\u0026rsquo;ll start with the following content:\nimport { startLanguageServer, EmptyFileSystem } from 'langium'; import { BrowserMessageReader, BrowserMessageWriter, createConnection } from 'vscode-languageserver/browser.js'; // your services \u0026amp; module name may differ based on your language's name import { createMiniLogoServices } from './minilogo-module.js'; declare const self: DedicatedWorkerGlobalScope; /* browser specific setup code */ const messageReader = new BrowserMessageReader(self); const messageWriter = new BrowserMessageWriter(self); const connection = createConnection(messageReader, messageWriter); // Inject the shared services and language-specific services const { shared, MiniLogo } = createMiniLogoServices({connection, ...EmptyFileSystem }); // Start the language server with the shared services startLanguageServer(shared); Again, this is based on code that was originally produced by the yeoman generator, so it should look familiar.\nMost of this is in line with what\u0026rsquo;s contained in the main.ts file. The exceptions being the message readers \u0026amp; writers, and the notion of an EmptyFileSystem for the browser. There is a virtual file system API that we could utilize on most modern browsers, but for this tutorial we\u0026rsquo;ll assume we aren\u0026rsquo;t using any file system. Instead we\u0026rsquo;ll have a single source \u0026lsquo;file\u0026rsquo; located in memory.\nWe\u0026rsquo;ll also need to include a library to resolve the missing DedicatedWorkerGlobalScope, which is normally not accessible until we update our tsconfig.json in our project root. We need to supplement the libs entry with DOM and WebWorker. From the yeoman generator example, the lib entry usually has just [\u0026quot;ESNext\u0026quot;].\n{ \u0026quot;compilerOptions\u0026quot;: { ... \u0026quot;lib\u0026quot;: [\u0026quot;ESNext\u0026quot;, \u0026quot;DOM\u0026quot;, \u0026quot;WebWorker\u0026quot;] } } Now that we have a new entry point for the browser, we need to add a script to our package.json to build a web worker for this language. The bundle this script produces will contain the language server for your language. The following script example is specific to MiniLogo, but should capture the general approach quite nicely:\n{ ... \u0026quot;build:worker\u0026quot;: \u0026quot;esbuild --minify ./out/language-server/main-browser.js --bundle --format=iife --outfile=./public/minilogo-server-worker.js\u0026quot;, } Assuming esbuild is installed, and we\u0026rsquo;ve properly factored out any modules that are not suitable for a browser-based context, we should be good to go!\nRunning npm run build:worker we should see the bundle is successfully generated without issue. If you\u0026rsquo;re still having problems building the worker, double check that you\u0026rsquo;re not coupled to fs or other file system dependent modules in a related file.\nNote that although our generator is still connected to using the file system, it\u0026rsquo;s not relevant for the worker bundle to function.\nSetting up Monaco Now we\u0026rsquo;re going to setup Monaco, but not with Langium yet, as we want to be sure it\u0026rsquo;s working first before connecting the two.\nFor convenience, we\u0026rsquo;re going to use the Monaco Editor Wrapper (MER) to wrap around some of Monaco\u0026rsquo;s core functionality, along with the Monaco Editor Workers package to assist. These packages are both maintained by TypeFox, and are designed to make it easier to use Monaco in a web-based context. We\u0026rsquo;ll be using the following versions of these packages:\n Monaco Editor Wrapper version 3.1.0 monaco-editor-workers version 0.39.0 Both these packages should be installed as dependencies for your language. In particular, this guide will assume that you\u0026rsquo;re using version 3.1.0 or later of the monaco-editor-wrapper package, and version 0.39.0 of the monaco-editor-workers package.\nAdditionally, we\u0026rsquo;ll want a way to serve this bundled language server. The choice of how you want to go about this is ultimately up to you. Previously we\u0026rsquo;ve recommended express as a development dependency (don\u0026rsquo;t forget to also add @types/express too), as a powerful \u0026amp; lightweight NodeJS server framework. However, we\u0026rsquo;ll be going with the built-in NodeJS support for standing up a web-server; however again the choice is yours here.\nWe\u0026rsquo;ll also want to add some more scripts to our package.json to copy over the necessary files from the monaco-editor-wrapper \u0026amp; monaco-editor-worker into the public folder. We\u0026rsquo;ll be referencing these library assets to setup the webpage for Langium + Monaco.\n{ ... \u0026quot;prepare:public\u0026quot;: \u0026quot;node scripts/prepare-public.mjs\u0026quot;, \u0026quot;build:web\u0026quot;: \u0026quot;npm run build \u0026amp;\u0026amp; npm run prepare:public \u0026amp;\u0026amp; npm run build:worker \u0026amp;\u0026amp; node scripts/copy-monaco-assets.mjs\u0026quot;, } Both scripts reference mjs files that need to be added as well into the scripts folder:\nscripts/prepare-public.mjs\nimport * as esbuild from 'esbuild' import shell from 'shelljs' // setup \u0026amp; copy over css \u0026amp; html to public shell.mkdir('-p', './public'); shell.cp('-fr', './src/static/*.css', './public/'); shell.cp('-fr', './src/static/*.html', './public'); // bundle minilogo.ts, and also copy to public await esbuild.build({ entryPoints: ['./src/static/minilogo.ts'], minify: true, sourcemap: true, bundle: true, outfile: './public/minilogo.js', }); scripts/copy-monaco-assets.mjs\nimport shell from 'shelljs' // copy workers to public shell.mkdir('-p', './public/monaco-editor-workers/workers'); shell.cp( '-fr', './node_modules/monaco-editor-workers/dist/index.js', './public/monaco-editor-workers/index.js' ); shell.cp( '-fr', './node_modules/monaco-editor-workers/dist/workers/editorWorker-es.js', './public/monaco-editor-workers/workers/editorWorker-es.js' ); shell.cp( '-fr', './node_modules/monaco-editor-workers/dist/workers/editorWorker-iife.js', './public/monaco-editor-workers/workers/editorWorker-iife.js' ); This saves us from writing these extra details into our package json, and focusing on the overall goal each step.\nThe last script, build:web is there to provide a convenient way to invoke all the intermediate build steps in sequence. However you\u0026rsquo;ll want to wait before running the build:web script, as we still need to add our static assets to make that work; which will come in the next step.\nAs a quick note, if you went with another editor you would want to make sure that the assets required for that editor will also be copied into public folder as part of your output.\nSetting up a Static Page And now for the actual HTML page itself, plus it\u0026rsquo;s supporting assets. To keep things organized, we\u0026rsquo;re splitting up the JS and CSS. We\u0026rsquo;ll be putting all of these files into a new location from our project root, src/static/.\nHere\u0026rsquo;s the raw contents of the HTML content stored in src/static/index.html. This will serve as a frame for Monaco to be setup within.\n\u0026lt;!DOCTYPE html\u0026gt; \u0026lt;html\u0026gt; \u0026lt;head\u0026gt; \u0026lt;meta charset='utf-8'\u0026gt; \u0026lt;!-- Page \u0026amp; Monaco styling --\u0026gt; \u0026lt;link href=\u0026quot;styles.css\u0026quot; rel=\u0026quot;stylesheet\u0026quot;/\u0026gt; \u0026lt;title\u0026gt;MiniLogo in Langium\u0026lt;/title\u0026gt; \u0026lt;/head\u0026gt; \u0026lt;body\u0026gt; \u0026lt;h1\u0026gt;MiniLogo in Langium\u0026lt;/h1\u0026gt; \u0026lt;!-- Use a wrapper to display Monaco + Canvas side-by-side --\u0026gt; \u0026lt;div id=\u0026quot;page-wrapper\u0026quot;\u0026gt; \u0026lt;!-- Monaco half --\u0026gt; \u0026lt;div class=\u0026quot;half\u0026quot;\u0026gt; \u0026lt;div class=\u0026quot;wrapper\u0026quot;\u0026gt; \u0026lt;div id=\u0026quot;monaco-editor-root\u0026quot;\u0026gt;\u0026lt;/div\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;!-- Canvas half --\u0026gt; \u0026lt;div class=\u0026quot;half\u0026quot;\u0026gt; \u0026lt;canvas id='minilogo-canvas' width=500 height=600\u0026gt;\u0026lt;/canvas\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;!-- Status message location --\u0026gt; \u0026lt;div style=\u0026quot;text-align:center\u0026quot;\u0026gt; \u0026lt;span id=\u0026quot;status-msg\u0026quot;\u0026gt;\u0026lt;/span\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;br/\u0026gt; \u0026lt;footer\u0026gt; \u0026lt;br/\u0026gt; \u0026lt;p style=\u0026quot;font-style:italic\u0026quot;\u0026gt;Powered by\u0026lt;/p\u0026gt; \u0026lt;img width=\u0026quot;125\u0026quot; src=\u0026quot;https://langium.org/assets/langium_logo_w_nib.svg\u0026quot; alt=\u0026quot;Langium\u0026quot;\u0026gt; \u0026lt;/footer\u0026gt; \u0026lt;!-- Monaco Configuration --\u0026gt; \u0026lt;script type=\u0026quot;module\u0026quot; src=\u0026quot;minilogo.js\u0026quot;\u0026gt;\u0026lt;/script\u0026gt; \u0026lt;/body\u0026gt; \u0026lt;/html\u0026gt; And here\u0026rsquo;s the associated CSS stored in src/static/styles.css. This will style Monaco correctly so it renders as expected.\nhtml,body { background: rgb(33,33,33); font-family: 'Lucida Sans', 'Lucida Sans Regular', 'Lucida Grande', 'Lucida Sans Unicode', Geneva, Verdana, sans-serif; color: white; /* for monaco */ margin: 0; padding: 0; width: 100%; height: 100%; } h1 { text-align: center; } #minilogo-canvas { display: block; margin: 8px auto; text-align: center; } #page-wrapper { display: flex; max-width: 2000px; margin: 4px auto; padding: 4px; min-height: 75vh; justify-content: center; } #page-wrapper .half { display: flex; width: 40vw; } .build { display: block; margin: 8px auto; width: 300px; height: 30px; background: none; border: 2px #fff solid; color: #fff; transition: 0.3s; font-size: 1.2rem; border-radius: 4px; } .build:hover { border-color: #6cf; color: #6cf; cursor: pointer; } .build:active { color: #fff; border-color: #fff; } footer { text-align: center; color: #444; font-size: 1.2rem; margin-bottom: 16px; } @media(max-width: 1000px) { #page-wrapper { display: block; } #page-wrapper .half { display: block; width: auto; } #minilogo-canvas { margin-top: 32px; } #page-wrapper { min-height: auto; } } /* for monaco */ .wrapper { display: flex; flex-direction: column; height: 100%; width: 100%; } #monaco-editor-root { flex-grow: 1; } #status-msg { color: red; } Finally, there\u0026rsquo;s the actual Javascript setting up our Monaco instance (stored in src/static/minilogo.ts), and for setting up Langium as well. This is the most complex part of setting up Langium + Monaco in the web, so we\u0026rsquo;ll walk through the file in parts.\n(Update on Oct. 4th, 2023: Previously we wrote this as src/static/setup.js. This new file can be considered the same, but reworked into TypeScript \u0026amp; updated for the new versions of Langium \u0026amp; the MER.)\nFirst, we need to import and setup the worker, as well as some language client wrapper configuration.\nimport { MonacoEditorLanguageClientWrapper, UserConfig } from \u0026quot;monaco-editor-wrapper/bundle\u0026quot;; import { buildWorkerDefinition } from \u0026quot;monaco-editor-workers\u0026quot;; import { addMonacoStyles } from 'monaco-editor-wrapper/styles'; /** * Setup Monaco's own workers and also incorporate the necessary styles for the monaco-editor */ function setup() { buildWorkerDefinition( './monaco-editor-workers/workers', new URL('', window.location.href).href, false ); addMonacoStyles('monaco-editor-styles'); } Then, we\u0026rsquo;ll want to instantiate our language client wrapper. In previous versions of the monaco-editor-wrapper package (before 2.0.0), configuration was performed by manually setting properties on the MonacoEditorLanguageClientWrapper instance. However, as of 3.1.0 (at the time of writing this), the constructor for MonacoEditorLanguageClientWrapper now takes a configuration object as its first argument. This configuration object allows us to set the same properties as before, but with more fine-grained control over all the properties that are set.\nWe\u0026rsquo;re going to walk through the parts that will be used to build up this configuration first, and then joining the actual configuration object together afterwards.\nTo start, let\u0026rsquo;s keep in mind that our current language id will be minilogo. This should match the id of the language that will be recognized by our language server.\nThen, we\u0026rsquo;ll want to add some static syntax highlighting. To do this we have a couple choices, using a TextMate or a Monarch grammar. Both will provide us with the ability to parse our language, and apply styling to our tokens. However we have to choose one, we cannot use both simultaneously. This is related to how Monaco itself is configured with regards to whether we\u0026rsquo;re using the VSCode API config, or the classic editor config. This makes sense to a degree, as we can only prepare the editor one way or the other.\nFor MiniLogo, our monarch grammar will look like so:\n/** * Returns a Monarch grammar definition for MiniLogo */ function getMonarchGrammar() { return { keywords: [ 'color','def','down','for','move','pen','to','up' ], operators: [ '-',',','*','/','+','=' ], symbols: /-|,|\\(|\\)|\\{|\\}|\\*|\\/|\\+|=/, tokenizer: { initial: [ { regex: /#(\\d|[a-fA-F]){3,6}/, action: {\u0026quot;token\u0026quot;:\u0026quot;string\u0026quot;} }, { regex: /[_a-zA-Z][\\w_]*/, action: { cases: { '@keywords': {\u0026quot;token\u0026quot;:\u0026quot;keyword\u0026quot;}, '@default': {\u0026quot;token\u0026quot;:\u0026quot;string\u0026quot;} }} }, { regex: /(?:(?:-?[0-9]+)?\\.[0-9]+)|-?[0-9]+/, action: {\u0026quot;token\u0026quot;:\u0026quot;number\u0026quot;} }, { include: '@whitespace' }, { regex: /@symbols/, action: { cases: { '@operators': {\u0026quot;token\u0026quot;:\u0026quot;operator\u0026quot;}, '@default': {\u0026quot;token\u0026quot;:\u0026quot;\u0026quot;} }} }, ], whitespace: [ { regex: /\\s+/, action: {\u0026quot;token\u0026quot;:\u0026quot;white\u0026quot;} }, { regex: /\\/\\*/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;,\u0026quot;next\u0026quot;:\u0026quot;@comment\u0026quot;} }, { regex: /\\/\\/[^\\n\\r]*/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;} }, ], comment: [ { regex: /[^\\/\\*]+/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;} }, { regex: /\\*\\//, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;,\u0026quot;next\u0026quot;:\u0026quot;@pop\u0026quot;} }, { regex: /[\\/\\*]/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;} }, ], } }; } We can produce this Monarch grammar by updating our langium-config.json to produce a Monarch file as output. Note that although we\u0026rsquo;re talking about MiniLogo here, we based this example off of the hello-world example produced by the yeoman generator. As such, we still have hello world names here and there, and for this tutorial we\u0026rsquo;ll just use the same name again as for the TextMate grammar.\n... \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/minilogo.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/minilogo.monarch.ts\u0026quot; } To generate this file, run npm run langium:generate. You can then copy over the definition of the grammar from syntaxes/hello-world.monarch.ts (or whatever other name you have given this file). Keep in mind that this generated monarch grammar is very simple. If you want more complex highlighting, we recommend writing your own custom monarch grammar, and storing it somewhere else to prevent it from being overridden. If you\u0026rsquo;re interested, you can find more details about the Monarch grammar highlighting language here.\nThen, we want to setup the code that shows up by default. The following is a fixed MiniLogo program that should display a white diamond in the top left corner of the screen.\n/** * Retrieves the program code to display, either a default or from local storage */ function getMainCode() { let mainCode = ` def test() { move(100, 0) pen(down) move(100, 100) move(-100, 100) move(-100, -100) move(100, -100) pen(up) } color(white) test() `; // optionally: use local storage to save the code // and seek to restore any previous code from our last session if (window.localStorage) { const storedCode = window.localStorage.getItem('mainCode'); if (storedCode !== null) { mainCode = storedCode; } } return mainCode; } Since we\u0026rsquo;re planning to use a language server with Monaco, we\u0026rsquo;ll need to setup a language client config too. To do this we\u0026rsquo;ll also need to generate a worker using our language server worker file, but that\u0026rsquo;s fairly straightforward to setup here. Keep in mind that you\u0026rsquo;ll need to have access to the bundle produced from your main-browser.ts from before. Here the built result is copied over as public/minilogo-server-worker.js.\n/** * Creates \u0026amp; returns a fresh worker using the MiniLogo language server */ function getWorker() { const workerURL = new URL('minilogo-server-worker.js', window.location.href); return new Worker(workerURL.href, { type: 'module', name: 'MiniLogoLS' }); } By creating the worker in advance, we give ourselves the ability to directly interact with the worker/LS independent of the wrapper itself, and to even pre-configure it before use. This can be hugely beneficial, especially if we expect to customize our LS on the fly.\nLastly, let\u0026rsquo;s setup the user config, which will be used to startup the wrapper.\ntype WorkerUrl = string; /** * Classic configuration for the monaco editor (for use with a Monarch grammar) */ interface ClassicConfig { code: string, htmlElement: HTMLElement, languageId: string, worker: WorkerUrl | Worker, monarchGrammar: any; } /** * Generates a valid UserConfig for a given Langium example * * @param config An extended or classic editor config to generate a UserConfig from * @returns A completed UserConfig */ function createUserConfig(config: ClassicConfig): UserConfig { // setup urls for config \u0026amp; grammar const id = config.languageId; // generate langium config return { htmlElement: config.htmlElement, wrapperConfig: { editorAppConfig: { $type: 'classic', languageId: id, useDiffEditor: false, code: config.code, theme: 'vs-dark', languageDef: config.monarchGrammar }, serviceConfig: { enableModelService: true, configureConfigurationService: { defaultWorkspaceUri: '/tmp/' }, enableKeybindingsService: true, enableLanguagesService: true, debugLogging: false } }, languageClientConfig: { options: { $type: 'WorkerDirect', worker: config.worker as Worker, name: `${id}-language-server-worker` } } }; } This particular UserConfig will be for configuring a classic editor, rather than a VSCode extension-based editor. This is because we\u0026rsquo;re using a Monarch grammar, which is not supported by the extension configuration. However, if we wanted to use a TextMate grammar, we could use the extension based configuration instead.\neditorAppConfig: { $type: 'vscodeApi', languageId: id, useDiffEditor: false, code: config.code, ... } You would just need to fill in the rest of the details for associating a TextMate grammar \u0026amp; such. Here\u0026rsquo;s an example from the monaco-components repo.\nRegardless of how the user config is setup, we can now invoke that helper function with a handful of configuration details, and have a working UserConfig to pass to the wrapper.\n// create a wrapper instance const wrapper = new MonacoEditorLanguageClientWrapper(); // start up with a user config await wrapper.start(createUserConfig({ htmlElement: document.getElementById(\u0026quot;monaco-editor-root\u0026quot;)!, languageId: 'minilogo', code: getMainCode(), worker: getWorker(), monarchGrammar: getMonarchGrammar() })); That\u0026rsquo;s it! Now if everything was configured correctly, we should have a valid wrapper that will display the code we want in our browser.\nServing via NodeJS Now that we have our files all setup, and our build process prepared, we can put together a mini server application to make viewing our public assets easy. We\u0026rsquo;ll do this by adding src/web/app.ts to our project, and giving it the following contents:\n/** * Simple server app for serving generated examples locally * Based on: https://developer.mozilla.org/en-US/docs/Learn/Server-side/Node_server_without_framework */ import * as fs from \u0026quot;node:fs\u0026quot;; import * as http from \u0026quot;node:http\u0026quot;; import * as path from \u0026quot;node:path\u0026quot;; const port = 3000; const MIME_TYPES: Record\u0026lt;string,string\u0026gt; = { default: \u0026quot;application/octet-stream\u0026quot;, html: \u0026quot;text/html; charset=UTF-8\u0026quot;, js: \u0026quot;application/javascript\u0026quot;, css: \u0026quot;text/css\u0026quot;, }; const STATIC_PATH = path.join(process.cwd(), \u0026quot;./public\u0026quot;); const toBool = [() =\u0026gt; true, () =\u0026gt; false]; const prepareFile = async (url: string) =\u0026gt; { const paths = [STATIC_PATH, url]; if (url.endsWith(\u0026quot;/\u0026quot;)) { paths.push(\u0026quot;index.html\u0026quot;); } const filePath = path.join(...paths); const pathTraversal = !filePath.startsWith(STATIC_PATH); const exists = await fs.promises.access(filePath).then(...toBool); const found = !pathTraversal \u0026amp;\u0026amp; exists; // there's no 404, just redirect to index.html in all other cases const streamPath = found ? filePath : STATIC_PATH + \u0026quot;/index.html\u0026quot;; const ext = path.extname(streamPath).substring(1).toLowerCase(); const stream = fs.createReadStream(streamPath); return { found, ext, stream }; }; http .createServer(async (req, res) =\u0026gt; { const file = await prepareFile(req.url!); const statusCode = file.found ? 200 : 404; const mimeType: string = MIME_TYPES[file.ext] || MIME_TYPES.default; res.writeHead(statusCode, { \u0026quot;Content-Type\u0026quot;: mimeType }); file.stream.pipe(res); console.log(`${req.method} ${req.url} ${statusCode}`); }) .listen(port); console.log(`Server for MiniLogo assets listening on http://localhost:${port}`); If you would like to compact this, and don\u0026rsquo;t mind adding additional deps to your project, you can include express and @types/express to your project, and use the following code instead:\n/** * Simple express app for serving generated examples */ import express from 'express'; const app = express(); const port = 3000; app.use(express.static('./public')); app.listen(port, () =\u0026gt; { console.log(`Server for MiniLogo assets listening on http://localhost:${port}`); }); And to invoke the server, we need to add one more script to our package.json.\n{ ... \u0026quot;serve\u0026quot;: \u0026quot;node ./out/web/app.js\u0026quot; } That\u0026rsquo;s it! Now we can build all the assets, and run express to be able to view our demo of Langium in the web from localhost:3000.\nnpm run build:web npm run serve You should be greeted with a page that contains a working Monaco instance and a small MiniLogo program in the editor. This editor has the highlighting we would expect, and also is fully connected to the language server for our language. This means we have full LSP support for operations that we would expect to have in a native IDE, such as VSCode.\nAnd that\u0026rsquo;s it, we have successfully implemented Langium + Monaco in the web for our language. It\u0026rsquo;s not doing much at this time besides presenting us with an editor, but in the next tutorial we\u0026rsquo;ll talk about using the same setup to add generation in the web. Since our generation has already been configured natively in prior tutorials, we can use what we\u0026rsquo;ve written to quickly implement a web application that translates MiniLogo programs into drawing instructions for an HTML5 canvas.\n"},{"id":6,"href":"/docs/learn/minilogo/generation_in_the_web/","title":"Generation in the Web","parent":"Minilogo tutorial","content":" Handling Document Validations Listening for Notifications in the Client Interpreting Draw Commands (Drawing) Updated on Oct. 4th, 2023 for usage with monaco-editor-wrapper 3.1.0 \u0026amp; above.\nIn this tutorial we\u0026rsquo;ll be talking about how to perform generation in the web by listening for document builder notifications. There are multiple ways to hook into Langium to utilize the generator, such as by directly exporting the generator API. However, by listening to notifications from the document builder, we can do this with less code. This lets us quickly integrate new functionality into our existing Langium + Monaco integration, and focus more on what we would want to do with the generated output.\n(This tutorial previously utilized custom LSP commands to achieve the same goal of generation. This is still a valid approach, but we\u0026rsquo;ve found setting up listening for notifications this way is much more straightforward. We\u0026rsquo;ve implemented this in our own example languages as well, and would recommend it going forward.)\nWe\u0026rsquo;ll assume that you\u0026rsquo;ve already looked over most of the other tutorials at this point. It is particularly important that you have a language with working generation, and have a working instance of Langium + Monaco for your language (or another editor of your choice). In the case that you don\u0026rsquo;t have a language to work with, you can follow along with MiniLogo, which is the example language used throughout many of these tutorials.\nSince we\u0026rsquo;re working with MiniLogo here, we already know that our generated output is in the form of drawing instructions that transform some drawing context. The generated output that we\u0026rsquo;ve implemented so far consists of a JSON array of commands, making it very easy to interpret. Now that we\u0026rsquo;re working in a web-based context, this approach lends itself naturally towards manipulating an HTML5 canvas.\nThe parts that we still need to setup are:\n handle document validations, and generate notifications with our generator output listen for these notifications in the client, and extract the generated output interpret the generated output as drawing commands, and update the canvas Handling Document Validations This is the first step we\u0026rsquo;ll need, since without being able to generate notifications in the first place we would have nothing to listen to.\nThankfully a lot of the groundwork has already been done in previous tutorials, as well as within Langium itself. We just need to setup the an onBuildPhase listener for the document builder in our LS. Using the LS entry point main-browser.ts that we setup in the last tutorial on Langium + Monaco, we can add the following code to the end of our startLanguageServer function.\n// modified import from the previous tutorial: Langium + Monaco import { BrowserMessageReader, BrowserMessageWriter, Diagnostic, NotificationType, createConnection } from 'vscode-languageserver/browser.js'; // additional imports import { Model } from './generated/ast.js'; import { Command, getCommands } from './minilogo-actions.js'; import { generateStatements } from '../generator/generator.js'; // startLanguageServer... // Send a notification with the serialized AST after every document change type DocumentChange = { uri: string, content: string, diagnostics: Diagnostic[] }; const documentChangeNotification = new NotificationType\u0026lt;DocumentChange\u0026gt;('browser/DocumentChange'); // use the built-in AST serializer const jsonSerializer = MiniLogo.serializer.JsonSerializer; // listen on fully validated documents shared.workspace.DocumentBuilder.onBuildPhase(DocumentState.Validated, documents =\u0026gt; { // perform this for every validated document in this build phase batch for (const document of documents) { const model = document.parseResult.value as Model; let json: Command[] = []; // only generate commands if there are no errors if(document.diagnostics === undefined || document.diagnostics.filter((i) =\u0026gt; i.severity === 1).length === 0 ) { json = generateStatements(model.stmts); } // inject the commands into the model // this is safe so long as you careful to not clobber existing properties // and is incredibly helpful to enrich the feedback you get from the LS per document (model as unknown as {$commands: Command[]}).$commands = json; // send the notification for this validated document, // with the serialized AST + generated commands as the content connection.sendNotification(documentChangeNotification, { uri: document.uri.toString(), content: jsonSerializer.serialize(model, { sourceText: true, textRegions: true }), diagnostics: document.diagnostics ?? [] }); } }); And that\u0026rsquo;s it for setting up the onBuildPhase listener itself. We still need to address the usage of generateMiniLogoCmds, which is tied to the LS implementation.\nBased on the work done in previous tutorials, we already have set up a working generator with MinLogo. If you haven\u0026rsquo;t already set this up you can go back to the tutorial on generation and give it a look over. Ideally, we\u0026rsquo;ll already have setup our generateStatements function for MiniLogo, meaning so long as the imported module doesn\u0026rsquo;t have any modules that are browser incompatible, we should be able to use it as is. Based on the previous setup however, we should have a generator.js file that is free of such conflicts, as much of them should be separated into the cli directly.\nThis saves us quite a bit of time, since we don\u0026rsquo;t need to handle setting up \u0026amp; dispatching a document for validation, we simply tap into the existing workflow and collect the result when it\u0026rsquo;s ready. This is a great example of how Langium\u0026rsquo;s architecture allows us to easily extend existing functionality, and add new features without having to rewrite existing code.\nAs a concluding note for this section, don\u0026rsquo;t forget to rebuild your language server bundle! It might not be a bad idea to clean as well, just to be sure everything is working as expected at this step.\nListening for Notifications in the Client The next step we need to make is to actually listen for these notifications from the client\u0026rsquo;s end. This takes us back to the Langium + Monaco setup in the previous tutorial.\nAfter starting the wrapper successfully, we want to retrieve the MonacoLanguageClient instance (a wrapper around the language client itself) and listen for browser/DocumentChange notifications.\n// wrapper has started... // get the language client const client = wrapper.getLanguageClient(); if (!client) { throw new Error('Unable to obtain language client!'); } // listen for document change notifications client.onNotification('browser/DocumentChange', onDocumentChange); function onDocumentChange(resp: any) { let commands = JSON.parse(resp.content).$commands; // ... do something with these commands } Now this works, but when do we receive notifications, and how often? Well a good thing you asked, because if you started this up and began editing your program, you would be receiving a notification for every single change! Including whitespace changes. Now that\u0026rsquo;s probably not what we\u0026rsquo;re looking for, but the content is correct, we just want to slow it down a bit. We can do this by setting a timeout and a semaphore to prevent multiple notifications from being processed at once.\nlet running = false; let timeout: number | null = null; function onDocumentChange(resp: any) { // block until we're finished with a given run if (running) { return; } // clear previous timeouts if (timeout) { clearTimeout(timeout); } timeout = window.setTimeout(async () =\u0026gt; { running = true; let commands = JSON.parse(resp.content).$commands; await updateMiniLogoCanvas(commands); running = false; }, 200); // delay of 200ms is arbitrary, choose what makes the most sense in your use case } And now we have a nice delay where repeated updates are discarded, until we have about 200ms without a subsequent update. That allows us to take the commands we\u0026rsquo;re working with, and start doing something with them. The semaphore will prevent following updates from overriding the current run, allowing it to finish before starting a new execution.\nYou may have also noticed we added updateMiniLogoCanvas as the action to perform with our commands. This will be implemented in the next step, where we interpret our drawing commands.\nThat\u0026rsquo;s it for listening for notifications! Now that we have our commands extracted, we\u0026rsquo;ll can actually perform a series of drawing actions on an HTML5 canvas.\nInterpreting Draw Commands (Drawing) If you\u0026rsquo;ve gotten to this point then you\u0026rsquo;re on the final stretch! The last part we need to implement is the actual logic that takes our drawing commands and updates the canvas. This logic will be the content of the updateMiniLogoCanvas function, and we\u0026rsquo;ll walk through each step here.\nFirst, let\u0026rsquo;s get a handle on our canvas, as well as the associated 2D context.\nconst canvas : HTMLCanvasElement | null = document.getElementById('minilogo-canvas') as HTMLCanvasElement | null; if (!canvas) { throw new Error('Unable to find canvas element!'); } const context = canvas.getContext('2d'); if (!context) { throw new Error('Unable to get canvas context!'); } We\u0026rsquo;ll also want to clean up the context, in case we already drew something there before. This will be relevant when we\u0026rsquo;re updating the canvas multiple times with a new program.\ncontext.clearRect(0, 0, canvas.width, canvas.height); Next, we want to setup a background grid to display. It\u0026rsquo;s not essential for drawing, but it looks nicer than an empty canvas.\ncontext.beginPath(); context.strokeStyle = '#333'; for (let x = 0; x \u0026lt;= canvas.width; x+=(canvas.width / 10)) { context.moveTo(x, 0); context.lineTo(x, canvas.height); } for (let y = 0; y \u0026lt;= canvas.height; y+=(canvas.height / 10)) { context.moveTo(0, y); context.lineTo(canvas.width, y); } context.stroke(); After drawing a grid, let\u0026rsquo;s reset the stroke to a white color.\ncontext.strokeStyle = 'white'; Let\u0026rsquo;s also setup some initial drawing state. This will be used to keep track of the pen state, and where we are on the canvas.\n// maintain some state about our drawing context let drawing = false; let posX = 0; let posY = 0; And let\u0026rsquo;s begin evaluating each of our commands. To do this, we\u0026rsquo;ll setup an interval that repeatedly shifts the top element from our list of commands, evaluates it, and repeats. Once we\u0026rsquo;re out of commands to evaluate, we\u0026rsquo;ll clear the interval. The whole invocation will be wrapped in a promise, to make it easy to await later on. Feel free to adjust the delay (or remove it entirely) in your version.\nconst doneDrawingPromise = new Promise((resolve) =\u0026gt; { // use the command list to execute each command with a small delay const id = setInterval(() =\u0026gt; { if (cmds.length \u0026gt; 0) { dispatchCommand(cmds.shift() as MiniLogoCommand, context); } else { // finish existing draw if (drawing) { context.stroke(); } clearInterval(id); resolve(''); } }, 1); }); dispatchCommand itself only needs to handle 4 cases:\n penUp penDown move color Knowing this, and the details about what properties each command type can have, we can evaluate each command and update our context. This can be done with a switch and a case for each command type.\nBe sure to add this function inside the updateMiniLogoCanvas function, otherwise it will not have access to the necessary state!\n// dispatches a single command in the current context function dispatchCommand(cmd: MiniLogoCommand, context: CanvasRenderingContext2D) { if (cmd.name) { switch (cmd.name) { // pen is lifted off the canvas case 'penUp': drawing = false; context.stroke(); break; // pen is put down onto the canvas case 'penDown': drawing = true; context.beginPath(); context.moveTo(posX, posY); break; // move across the canvas // will draw only if the pen is 'down' case 'move': const x = cmd.args.x; const y = cmd.args.y; posX += x; posY += y; if (!drawing) { // move, no draw context.moveTo(posX, posY); } else { // move \u0026amp; draw context.lineTo(posX, posY); } break; // set the color of the stroke case 'color': if ((cmd.args as { color: string }).color) { // literal color or hex context.strokeStyle = (cmd.args as { color: string }).color; } else { // literal r,g,b components const args = cmd.args as { r: number, g: number, b: number }; context.strokeStyle = `rgb(${args.r},${args.g},${args.b})`; } break; // fallback in case we missed an instruction default: throw new Error('Unrecognized command received: ' + JSON.stringify(cmd)); } } } Now that we can interpret commands into drawing instructions, we\u0026rsquo;re effectively done with setting up the last part of MiniLogo. Since we\u0026rsquo;re listening to document updates, we don\u0026rsquo;t need to do anything other than to just start it up and start with an example program.\nThat\u0026rsquo;s it, we\u0026rsquo;re all done writing up our TS file. We should now be able to run the following (assuming the generator script is also executed by build:web), and get our results in localhost:3000.\nnpm run build:web npm run serve If all went well, you should see a white diamond sketched out on the canvas when the page loads. If not, double check that you receive \u0026amp; use the code value correctly in your createUserConfig function. You can also add the program yourself from here:\ndef test() { move(100, 0) pen(down) move(100, 100) move(-100, 100) move(-100, -100) move(100, -100) pen(up) } color(white) test() Once you have something drawing on the screen, you\u0026rsquo;re all set, congratulations! You\u0026rsquo;ve just successfully written your own Langium-based language, deployed it in the web, and hooked up generation to boot. In fact, you\u0026rsquo;ve done quite a lot if you\u0026rsquo;ve gone through all of these tutorials so far.\n writing your own grammar implementing custom validation customizing your CLI adding generation configuring code bundling building an extension setting up Langium + Monaco in the web adding a document build phase listener listening for notifications in the client, and using the results And the concepts that we\u0026rsquo;ve gone over from the beginning to now are not just for MiniLogo of course, they can be easily generalized to work for your own language as well. As you\u0026rsquo;ve been going through these tutorials, we hope that you\u0026rsquo;ve been thinking about how you could have done things differently too. Whether a simple improvement, or another approach, we believe it\u0026rsquo;s this creative kind of thinking that takes an idea of a language and really allows it to grow into something great.\nOne easy note is how the example code shown in these tutorials was designed to be easy to demonstrate. It could definitely be improved with better error checking, better logic, generator optimizations, etc; something to keep in mind.\nIt\u0026rsquo;s also easy to imagine how one could extend their generator to produce their own functionality besides drawing. For example, imagine that you might have multiple generator targets, as there is no requirement to have a single generator output form like we\u0026rsquo;ve done in these tutorials. You could add as many different output forms as you need for each specific target, and even share some functionality between generators.\nWe hope that these tutorials have given you a practical demonstration of how to construct a language in Langium, and facilitated further exploration into more advanced topics \u0026amp; customizations. If you\u0026rsquo;re interested about learning more about Langium, you can continue through our other tutorials, reach out to us via discussions on Github, or continue working on your Langium-based language.\n"},{"id":7,"href":"/docs/reference/glossary/","title":"Glossary","parent":"Reference","content":"Anyone who is new to DSL development should carefully read the following primer on the terms we are using in our documentation:\nabstract syntax tree: A tree of elements that represents a text document. Each element is a simple JS object that combines multiple input tokens into a single object. Commonly abbreviated as AST.\ndocument: An abstract term to refer to a text file on your file system or an open editor document in your IDE.\ngrammar: Defines the form of your language. In Langium, a grammar is also responsible for describing how the AST is built.\nparser: A program that takes a document as its input and computes an abstract syntax tree as its output.\nparser rule: A parser rule describes how a certain AST element is supposed to be parsed. This is done by invoking other parser rules or terminals.\nterminal: A terminal is the smallest parsable part of a document. It usually represents small pieces of text like names, numbers, keywords or comments.\ntoken: A token is a substring of the document that matches a certain terminal. It contains information about which kind of terminal it represents as well as its location in the document.\n"},{"id":8,"href":"/docs/recipes/lexing/","title":"Lexing","parent":"Recipes","content":""},{"id":9,"href":"/docs/recipes/lexing/case-insensitive-languages/","title":"Case-insensitive languages","parent":"Lexing","content":"Some programming languages such as SQL or Structured Text use case insensitivity to provide more flexibility when writing code. For example most SQL databases accept select statements starting with select, SELECT or even SeLeCt.\nIn case you want to provide your users the same flexibility with your language, there are different levels of case-insensitivity in Langium:\n You can make Langium\u0026rsquo;s parser completely case insensitive using the language configuration You can include case-insensitivity for specific terminal rules You can make cross references case insensitive All of these options can be enabled independent of one another.\nCase-insensitivity by configuration To make Langium case-insensitive, you have to set the caseInsensitive option to true in the LangiumConfig object which is located in the langium-config.json file at the root of your Langium project. You can set this up for every single language.\n{ ... \u0026quot;languages\u0026quot;: [ { \u0026quot;id\u0026quot;: \u0026quot;hello-world\u0026quot;, \u0026quot;caseInsensitive\u0026quot;: true, // \u0026lt;-- makes the specified language case insensitive ... }, ... ], ... } Case-insensitivity on demand If you want to include case-insensitivity only where you need it, you can use the i flag inside of your grammar\u0026rsquo;s regular expressions\n// append `i` to any regex to make it case insensitive terminal ID: /[A-Z]/i; Note that regular expressions can only be used inside of terminal rules.\nCase-insensitivity for identifiers and cross-references But be aware of that both ways will only take care of all the keywords in your grammar. If you want identifiers and cross-references to be case-insensitive as well, you have to adjust your scoping for each cross-reference case. This can be accomplished by setting the caseInsensitive option to true within the options when you are creating a new scope object.\nThere are several implementations of scopes. MapScope is very commonly used:\nnew MapScope(descriptions, parentScope, { caseInsensitive: true }); "},{"id":10,"href":"/docs/reference/grammar-language/","title":"Grammar Language","parent":"Reference","content":" Language Declaration Import of other grammar languages Terminal Rules Return Types Hidden Terminal Rules Parser Rules Declaration The Entry Rule Extended Backus-Naur Form Expressions Cardinalities Groups Alternatives Keywords Assignments Cross-References Unassigned Rule Calls Unordered Groups Simple Actions Tree-Rewriting Actions Data Type Rules Rule Fragments Guard Conditions More Examples More on Terminal Rules Extended Backus-Naur Form Terminals Terminal Groups Terminal Alternatives Character Range Wildcard Token Until Token Negated Token Terminal Rule Calls Terminal Fragments The grammar language describes the syntax and structure of your language. The Langium grammar language is implemented using Langium itself and therefore follows the same syntactic rules as any language created with Langium. The grammar language will define the structure of the abstract syntax tree (AST) which in Langium is a collection of TypeScript types describing the content of a parsed document and organized hierarchically. The individual nodes of the tree are then represented with JavaScript objects at runtime.\nIn the following, we describe the Langium syntax and document structure.\nLanguage Declaration An entry Langium grammar file (i.e. a grammar which contains an entry rule) always starts with a header which declares the name of the language. For example, a language named MyLanguage would be declared with:\ngrammar MyLanguage Every grammar file has a .langium extension and the entry grammar file needs to be referenced in langium-config.json. If you used the Yeoman generator to start your project, the configuration is already prepared.\nImport of other grammar languages It is possible to reuse grammar rules from other .langium files by importing them into your own grammar file.\nimport './path/to/an/other/langium/grammar'; This will import all grammar rules from the imported grammar file. It is therefore crucial to ensure that there are no duplicate rules between the different grammar files.\nContrary to entry grammars, imported grammars do not need to start with the keyword grammar.\nTerminal Rules The first step in parsing your language is lexing, which transforms a stream of characters into a stream of tokens. A token is a sequence of one or many characters which is matched by a terminal rule, creating an atomic symbol. The names of terminal rules are conventionally written in upper case.\nThe Langium parser is created using Chevrotain which has a built-in lexer based on Javascript Regular Expressions.\nLangium also allows the use of Extended Backus-Naur Form (EBNF) Expressions for terminals, but we highly recommend that you write your terminals using Regular Expressions instead. EBNF expressions are internally translated by langium into Regular Expressions, as they are intended to allow porting Xtext grammars into Langium grammars \u0026ndash; given their similarity.\nWith that said, both types of expressions can be used jointly in the same grammar.\nThe declaration of a terminal rule starts with the keyword terminal:\nterminal ID: /[_a-zA-Z][\\w_]*/; Here, the token ID will match a stream of characters starting with the character _, a small letter, or a capital letter followed by a sequence of zero or many (cardinality *) alphanumeric characters (\\w) or _.\nThe order in which terminal rules are defined is critical as the lexer will always return the first match.\nReturn Types A terminal rule returns an instance of a TypeScript primitive type. If no return type is specified, the terminal rule will return a string by default.\nterminal ID: /[_a-zA-Z][\\w_]*/; terminal INT returns number: /[0-9]+/; Here, the terminal rule ID will return an instance of string while the terminal rule INT will return an instance of number.\nThe available return types in Langium are:\n string number boolean bigint Date Hidden Terminal Rules The lexer tries to match every character in the document to a terminal rule or a keyword. It is therefore necessary to specify which characters or sequence of characters need to be ignored during lexing and parsing. Generally, you would want to ignore whitespaces and comments. This is achieved by adding the keyword hidden when defining a terminal rule. These hidden terminal rules are global and will be valid for all parser rules in the document.\nhidden terminal WS: /\\s+/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; Parser Rules While terminal rules indicate to the lexer what sequence of characters are valid tokens, parser rules indicate to the parser what sequence of tokens are valid. Parser rules lay the structure of objects to be created by the parser and result in the creation of the abstract syntax tree (AST) which represents the syntactic structure of your language. In Langium, parser rules are also responsible for defining the type of objects to be parsed.\nDeclaration A parser rule always starts with the name of the rule followed by a colon.\nPerson: 'person' name=ID; In this example, the parser will create an object of type Person. This object will have a property name which value and type must match the terminal rule ID (i.e. the property name is of type string and cannot start with a digit or special character).\nBy default, the parser will create an object with an inferred type corresponding to the parser rule name. It is possible to override this behavior by explicitly defining the type of the object to be created. This is done by adding the keyword returns followed by a separately declared type, or the keyword infers followed by the name of the type to be inferred for this rule (more about this in the next chapter):\nPerson infers OtherType: 'person' name=ID; The parser rule Person will now lead to the creation of objects of type OtherType instead of Person.\nThe Entry Rule The entry rule is a parser rule that defines the starting point of the parsing step. The entry rule starts with the keyword entry and matches other parser rules.\nentry Model: (persons+=Person | greetings+=Greeting)*; In this example, the entry rule Model defines a group of alternatives. The parser will go through the input document and try to parse Person or Greeting objects and add them to the persons or greetings arrays, respectively. The parser reads the token stream until all inputs have been consumed.\nExtended Backus-Naur Form Expressions Parser rules are defined using Extended Backus-Naur Form-like (EBNF) expressions similar to the Xtext notation.\nCardinalities A cardinality defines the number of elements in a given set. Four different cardinalities can be defined for any expression:\n exactly one (no operator) zero or one (operator ?) zero or many (operator *) one or many (operator +) Groups Expressions can be put in sequence specifying the order they have to appear:\nPerson: 'person' name=ID address=Address; In this example, the rule Person must start with the person keyword followed by an ID token and an instance of the Address rule.\nAlternatives It is possible to match one of multiple valid options by using the pipe operator |. The already mentioned Model example specifies to parse either Person or Greeting, zero or many times (cardinality *):\nentry Model: (persons+=Person | greetings+=Greeting)*; Keywords Keywords are inline terminals which need to match a character sequence surrounded by single or double quotes, for example 'person' or \u0026quot;person\u0026quot;. Keywords must not be empty and must not contain white space.\nAssignments Assignments define properties on the type returned by the surrounding rule. There are three different ways to assign an expression (right side) to a property (left side).\n = is used for assigning a single value to a property.\nPerson: 'person' name=ID Here, the property name will accept only one expression matching the terminal rule ID.\n += is used to assign multiple values to an array property.\nContact: addresses+=STRING addresses+=STRING; Here, the array property addresses will accept two expressions matching the terminal rule STRING.\n ?= is used to assign a value to a property of type boolean. The value of the property of type boolean is set to true if the right part of the assignment is consumed by the parser.\nEmployee: 'employee' name=ID (remote?='remote')? Here the value of the property remote will be set to true if the keyword remote was successfully parsed as a part of the rule call. If the keyword remote is not consumed (cardinality is ?), the property remote is set to false.\n Cross-References With Langium, you can declare cross-references directly in the grammar. A cross-reference allows to reference an object of a given type. The syntax is:\nproperty=[Type:TOKEN] The property will be a reference to an object of type Type identified by the token TOKEN. If the TOKEN is omitted, the parser will use the terminal or data type rule associated with the name assignment of the Type rule. If no such rule exists, then the token is mandatory.\nPerson: 'person' name=ID; Greeting: 'Hello' person=[Person:ID] '!'; The Person in square brackets does not refer to the parser rule Person but instead refers to an object of type Person. It will successfully parse a document like:\nperson Bob Hello Bob ! but the following:\nperson Bob Hello Sara ! will result in an error message since the cross reference resolution will fail because a Person object with the name \u0026lsquo;Sara\u0026rsquo; has not been defined, even though \u0026lsquo;Sara\u0026rsquo; is a valid ID.\nUnassigned Rule Calls Parser rules do not necessarily need to create an object, they can also refer to other parser rules which in turn will be responsible for returning the object. For example, in the Arithmetics example:\nAbstractDefinition: Definition | DeclaredParameter; The parser rule AbstractDefinition will not create an object of type AbstractDefinition. Instead, it calls either the Definition or DeclaredParameter parser rule which will be responsible for creating an object of a given type (or call other parser rules if they are unassigned rule calls themselves).\nIn contrast, an assigned rule call such as parameter=DeclaredParameter means that an object is created in the current parser rule and assigns the result of the DeclaredParameter parser rule to the specified property parameter of that object.\nUnordered Groups In regular groups, expressions must occur in the exact order they are declared.\nPerson: 'person' name=ID age=INT Here a Person object needs to first declare the property name then age.\nperson Bob 25 will successfully be parsed to an object of type Person while\nperson 25 Bob will throw an error.\nHowever, it is possible to declare a group of properties in an unordered fashion using the \u0026amp; operator\nPerson: 'person' name=ID \u0026amp; age=INT will now allow name and age to be declared in any order.\nperson 25 Bob will then successfully create an object of type Person.\nCardinality (?,*,+ operators) also applies to unordered group. Please note that assignments with a cardinality of + or * have to appear continuously and cannot be interrupted by an other assignment and resumed later.\nSimple Actions It is possible for a rule to return different types depending on declaration\ninterface TypeOne { name: string } RuleOne returns TypeOne: 'keywordOne' name=ID | RuleTwo; interface TypeTwo extends TypeOne {} RuleTwo returns TypeTwo: 'keywordTwo' name=ID; A rule call is one of the ways to specify the return type. With more complex rules, the readability will be highly impacted. Actions allow to improve the readability of the grammar by explicitly defining the return type. Actions are declared inside of curly braces {}:\nRuleOne returns TypeOne: 'keywordOne' name=ID | {TypeTwo} 'keywordTwo' name=ID; The example above requires that the return types TypeOne and TypeTwo are declared separately (see the next chapter). If the type returned by the action is created on-the-fly, the keyword infer needs to be added:\nRuleOne infers TypeOne: 'keywordOne' name=ID | {infer TypeTwo} 'keywordTwo' name=ID; Now both TypeOne and TypeTwo are inferred from the rule definition. Note that we use the keyword infers (declarative) for the grammar rule, but infer (imperative) for the action.\nTree-Rewriting Actions The parser is built using Chevrotain which implements a LL(k) parsing algorithm (left-to-right). Conceptually, a LL(k) grammar cannot have rules containing left recursion.\nConsider the following:\nAddition: Addition '+' Addition | '(' Addition ')' | value=INT; The parser rule Addition is left-recursive and will not be parseable. We can go around this issue by left-factoring the rule, i.e. by factoring out the common left-factor. We introduce a new rule SimpleExpression:\nAddition: SimpleExpression ('+' right=SimpleExpression)*; SimpleExpression: '(' Addition ')' | value=INT; Unfortunately, left-factoring does not come without consequences and can lead to the generation of unwanted nodes. It is possible to \u0026ldquo;clean\u0026rdquo; the tree by using tree-rewriting actions.\nAddition returns Expression: SimpleExpression ({Addition.left=current} '+' right=SimpleExpression)*; SimpleExpression: '(' Addition ')' | value=INT; Essentially this means that when a + keyword is found, a new object of type Addition is created and the current object is assigned to the left property of the new object. The Addition then becomes the new current object. In imperative pseudo code it may look like this:\nfunction Addition() { let current = SimpleExpression() while (nextToken == '+') { let newObject = new Addition newObject.left = current current = newObject current.right = SimpleExpression() } } Please refer to this blog post for further details.\nData Type Rules Data type rules are similar to terminal rules as they match a sequence of characters. However, they are parser rules and are therefore context-dependent. This allows for more flexible parsing, as they can be interspersed with hidden terminals, such as whitespaces or comments. Contrary to terminal rules, they cannot use regular expressions to match a stream of characters, so they have to be composed of keywords, terminal rules or other data type rules.\nThe following example from the domain model example uses the QualifiedName data type rule to enable references to other elements using their fully qualified name.\nQualifiedName returns string: ID ('.' ID)*; Data type rules need to specify a primitive return type.\nRule Fragments If you are facing repetitive patterns in your grammar definition, you can take advantage of Rule Fragments to improve the grammar\u0026rsquo;s maintainability.\nStudent: 'student' firstName=ID lastName=ID address=STRING phoneNumber=STRING grades=Grades; Teacher: 'teacher' firstName=ID lastName=ID address=STRING phoneNumber=STRING classes=Classes; TechnicalStaff: 'tech' firstName=ID lastName=ID address=STRING phoneNumber=STRING; The parser rules Student, Teacher, and TechnicalStaff partly share the same syntax. If, for example, the assignment for phoneNumber had to be updated, we would need to make changes everywhere the phoneNumber assignment was used. We can introduce Rule Fragments to extract similar patterns and improve maintainability:\nfragment Details: firstName=ID lastName=ID address=STRING phoneNumber=STRING; Student: 'student' Details grades=Grades; Teacher: 'teacher' Details classes=Classes; TechnicalStaff: 'tech' Details; Fragment rules are not part of the AST and will therefore never create an object, instead they can be understood as being textually inserted where they are referenced.\nGuard Conditions It may be useful to group parser rules with small variations inside of a single parser rule. Given the following example:\nentry Model: element+=RootElement; RootElement infers Element: isPublic?='public'? 'element' name=ID '{' elements+=Element* '}'; Element: 'element' name=ID '{' elements+=Element* '}'; The only difference between RootElement and Element is that the former has the boolean property isPublic. We can refactor the grammar so that only Element is present in the grammar with a guard condition that will determine which concrete syntax should be used by the parser:\nentry Model: element+=Element\u0026lt;true\u0026gt;; Element\u0026lt;isRoot\u0026gt;: (\u0026lt;isRoot\u0026gt; isPublic?='public')? 'element' name=ID '{' elements+=Element\u0026lt;false\u0026gt;* '}'; Element has the guard isRoot, which will determine whether the optional group containing the isPublic property is allowed to be parsed.\nThe entry rule Model sets the value of isRoot to true with element+=Element\u0026lt;true\u0026gt;, while isRoot is set to false inside of the Element\u0026lt;isRoot\u0026gt; parser rule with elements+=Element\u0026lt;false\u0026gt;.\nIn general, a guard condition on a group decides whether the parser is allowed to parse the group or not depending on the result of the evaluated condition. Logical operations can be applied, such as \u0026amp; (and), | (or) and ! (not) to fine-tune the exact conditions in which the group is supposed to be parsed.\nAdditionally, guard conditions can also be used inside of alternatives. See the following example:\nentry Model: element+=Element\u0026lt;true\u0026gt;; Element\u0026lt;isRoot\u0026gt;: (\u0026lt;isRoot\u0026gt; 'root' | \u0026lt;!isRoot\u0026gt; 'element') name=ID '{' elements+=Element\u0026lt;false\u0026gt;* '}'; The parser will always exclude alternatives whose guard conditions evaluate to false. All other alternatives remain possible options for the parser to choose from.\nMore Examples Not all parser rules need to be mentioned in the entry rule, as shown in this example:\nentry Model: (persons+=Person | greetings+=Greeting)*; Person: 'person' name=ID address=Address; Greeting: 'Hello' person=[Person] '!'; Address: street=STRING city=ID postcode=INT; Here the Person parser rule includes a property address which matches the parser rule Address. We decided that an Address will never be present in the input document on its own and will always be parsed in relation to a Person. It is therefore not necessary to include an array of Address inside of the entry rule.\n Keywords are meant to provide a visible structure to the language and guide the parser in deciding what type of object needs to be parsed. Consider the following:\nStudent: name=ID; Teacher: name=ID; Person: Student | Teacher; In this example, a Person can either be a Student or a Teacher. This grammar is ambiguous because the parser rules Student and Teacher are identical. The parser will not be able to differentiate between the parser rules for Student and Teacher when trying to parse a Person. Keywords can help removing such ambiguity and guide the parser in defining if a Student or Teacher needs to be parsed. We can add a keyword to the parser rule Student, Teacher, or to both of them:\nStudent: 'student' name=ID; Teacher: 'teacher' name=ID; Person: Student | Teacher; Now the ambiguity is resolved and the parser is able to differentiate between the two parser rules.\nParser rules can have many keywords:\nPerson: 'person' name=ID 'age' age=INT; If an assignment has a cardinality of + or *, then the expressions belong to a single group and must not be interrupted by other expressions.\nParagraph: 'paragraph' (sentences+=STRING)+ id=INT; Here, the property sentences will accept one or many expressions matching the terminal rule STRING followed by an INT. The parsing of a document containing:\nparagraph \u0026quot;The expression group \u0026quot; 3 \u0026quot;was interrupted\u0026quot; will throw an error since the STRING expressions are not continuous. It is however possible to interrupt and resume a sequence of expressions by using hidden terminal symbols:\nparagraph \u0026quot;expression one\u0026quot; /* comment */ \u0026quot;expression two\u0026quot; 3 The above example will be successfully parsed.\nMore on Terminal Rules Extended Backus-Naur Form Terminals For full disclosure, we recommend using regular expressions when writing your terminals, as EBNF expressions are translated to regular expressions internally anyway. EBNF support is primarily intended for supporting grammars that were originally written in Xtext, but are being ported to Langium.\nAs mentioned earlier, terminal rules can be described using regular expressions or EBNF expressions.\nEBNF expressions are very similar to parser rules, which are described above. In this section, we describe which EBNF expressions are supported for terminals and their equivalent in Javascript Regular Expressions where possible.\nTerminal Groups Tokens can be put in sequence specifying the order they have to appear:\nterminal FLIGHT_NUMBER: ('A'..'Z')('A'..'Z')('0'..'9')('0'..'9')('0'..'9')('0'...'9')?; In this example, the token FLIGHT_NUMBER must start with two capital letters followed by three or four digits.\nTerminal Alternatives It is possible to match one of multiple valid options by using the pipe operator |. The terminal rule STRING can use alternatives to match a sequence of characters between double quotes \u0026quot;\u0026quot; or single quotes '':\nterminal STRING: '\u0026quot;' !('\u0026quot;')* '\u0026quot;' | ''' !(''')* '''; In regular expression, alternatives are also possible with the pipe operator |:\nterminal STRING: /\u0026quot;[^\u0026quot;]*\u0026quot;|'[^']*'/; Character Range The operator .. is used to declare a character range. It is equivalent to the operator - within a character class in a regular expression. It matches any character in between the left character and the right character (inclusive on both ends).\nterminal INT returns number: ('0'..'9')+; is equivalent to the regular expression:\nterminal INT returns number: /[0-9]+/; Here, INT is matched to one or more characters (by using the operand +, which defines a cardinality of \u0026lsquo;one or many\u0026rsquo;) between 0 and 9 (inclusive on both ends).\nWildcard Token The operator . is used to match any character and is similar in regular expression.\nterminal HASHTAG: '#'.+; In this example, the terminal rule HASHTAG matches a sequence of character starting with # followed by one or many (cardinality +) characters.\nEquivalent in regular expression:\nterminal HASHTAG: /#.+/; Until Token The operator -\u0026gt; indicates that all characters should be consumed from the left token until the right token occurs. For example, the terminal rule for multi-line comment can be implemented as:\nterminal ML_COMMENT: '/*' -\u0026gt; '*/'; Langium will transform the until token into the regular expression [\\s\\S]*? which matches any character non-greedily:\nterminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; Negated Token It is possible to negate tokens using the operator !. In Langium this produces a negative lookahead. I.e., it does not consume tokens, but it is a \u0026lsquo;guard\u0026rsquo; for what the following expression can recognize.\nFor example, if you want to recognize a word that doesn\u0026rsquo;t start with no, then you could write such an expression in EBNF like so:\nterminal NONO: (!'no')('a'..'z'|'A'..'Z')+; For reference, this would correspond to the following regular expression:\nterminal NONO: /(?!no)[a-zA-Z]+/; Note, if you\u0026rsquo;re coming from Xtext, negated tokens work differently here. In Xtext, negated tokens allow recognizing the complement of a set of characters (or anything \u0026lsquo;but\u0026rsquo; what is listed in the negation), very much akin to a negated character class in regular expressions. This is very important to keep in mind if you\u0026rsquo;re porting a grammar from Xtext, as Langium\u0026rsquo;s interpretation of negated tokens deviates from that of Xtext.\nTerminal Rule Calls A terminal rule can include other terminal rules in its definition.\nterminal DOUBLE returns number: INT '.' INT; Note that it is easy to create conflicts between terminal rules when using terminal rule calls. See Data Type Rules for further details.\nTerminal Fragments Fragments allow for sub-definition of terminal rules to be extracted. They are not consumed by the lexer and have to be consumed by other terminal rules.\nterminal fragment CAPITAL_LETTER: ('A'..'Z'); terminal fragment SMALL_LETTER: ('a'..'z'); terminal NAME: CAPITAL_LETTER SMALL_LETTER+; In this example, the lexer will not transform a single capital or small letter into a valid token but will match a sequence of one capital letter followed by one or many small letters.\n"},{"id":11,"href":"/docs/recipes/scoping/qualified-name/","title":"Qualified Name Scoping","parent":"Scoping","content":"Qualified name scoping refers to a style of referencing elements using a fully qualified name. Such a fully qualified name is usually composed of the original name of the target element and the names of its container elements. You will usually see this method of scoping in C-like languages using namespaces or in Java using packages. The following code snippet shows an example of how qualified name scoping works from an end-user perspective, by using a function in a C++ namespace:\nnamespace Langium { void getDocumentation(); } void main() { // Should call the `getDocumentation` function defined in the `Langium` namespace Langium::getDocumentation(); } As can be seen, using qualified name scoping is quite helpful in this case. It allows us to reference the getDocumentation function through the scope computed \u0026amp; made available by the Langium namespace, even though it\u0026rsquo;s not directly accessible within the scope of main by itself.\nNote that such behavior can also be accomplished using class member scoping. However, there is one core advantage to using globally available elements: Compared to member scoping, this type of scoping requires few resources. The lookup required for qualified name scoping can be done in near constant time with just a bit of additional computation on a per-document basis, whereas member scoping needs to do a lot of computation on a per-reference basis. With large workspaces, complex scoping might become a performance bottleneck.\nThis behavior can be achieved in Langium by exporting the getDocumentation function under the name Langium::getDocumentation. To do this, we will first set up a new ScopeComputation class that extends the DefaultScopeComputation. This class will be responsible for our custom scope computation. Then, we\u0026rsquo;ll want to bind our custom scope computation class in our module:\n// Scope computation for our C++-like language export class CppScopeComputation extends DefaultScopeComputation { constructor(services: LangiumServices) { super(services); } } // Services module for overriding the scope computation // Your language module is usually placed in your `\u0026lt;dsl-name\u0026gt;-module.ts` file export const CppModule: Module\u0026lt;CppServices, PartialLangiumServices \u0026amp; CppAddedServices\u0026gt; = { references: { ScopeComputation: (services) =\u0026gt; new CppScopeComputation(services) } } Next, we can start implementing our custom scoping by overriding the computeExports function. This function is particularly important, as it allows us to change export nodes of our model using qualified names: We\u0026rsquo;ll also want to annotate this function with override, since there\u0026rsquo;s already a default definition provided.\nexport class CppScopeComputation extends DefaultScopeComputation { // Emitting previous implementation for brevity /** * Export all functions using their fully qualified name */ override async computeExports(document: LangiumDocument): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const exportedDescriptions: AstNodeDescription[] = []; for (const childNode of streamAllContents(document.parseResult.value)) { if (isFunctionDeclaration(childNode)) { const fullyQualifiedName = this.getQualifiedName(childNode, childNode.name); // `descriptions` is our `AstNodeDescriptionProvider` defined in `DefaultScopeComputation` // It allows us to easily create descriptions that point to elements using a name. exportedDescriptions.push(this.descriptions.createDescription(childNode, fullyQualifiedName, document)); } } return exportedDescriptions; } /** * Build a qualified name for a model node */ private getQualifiedName(node: AstNode, name: string): string { let parent: AstNode | undefined = node.$container; while (isNamespace(parent)) { // Iteratively prepend the name of the parent namespace // This allows us to work with nested namespaces name = `${parent.name}::${name}`; parent = parent.$container; } return name; } Once we start exporting functions using their fully qualified name, references such as QualifiedName::target will start working correctly. We can even nest multiple namespaces to create Fully::Qualified::Name::target. However, this leads us to another problem. We can now only reference functions using their fully qualified names, even if they\u0026rsquo;re locally available:\nnamespace QualifiedName { void target(); void test() { // Will not link correctly target(); // Requires the new fully qualified name QualifiedName::target(); } } To rectify this problem, we have to override the computeLocalScopes method, which provides access to elements that aren\u0026rsquo;t exported globally. We can also use this method to provide secondary access to globally available objects using a local name.\nexport class CppScopeComputation extends DefaultScopeComputation { // Emitting previous implementation for brevity override async computeLocalScopes(document: LangiumDocument): Promise\u0026lt;PrecomputedScopes\u0026gt; { const model = document.parseResult.value as CppProgram; // This map stores a list of descriptions for each node in our document const scopes = new MultiMap\u0026lt;AstNode, AstNodeDescription\u0026gt;(); this.processContainer(model, scopes, document); return scopes; } private processContainer( container: CppProgram | Namespace, scopes: PrecomputedScopes, document: LangiumDocument ): AstNodeDescription[] { const localDescriptions: AstNodeDescription[] = []; for (const element of container.elements) { if (isFunctionDeclaration(element)) { // Create a simple local name for the function const description = this.descriptions.createDescription(element, element.name, document); localDescriptions.push(description); } else if (isNamespace(element)) { const nestedDescriptions = this.processContainer(element, scopes, document); for (const description of nestedDescriptions) { // Add qualified names to the container // This could also be a partial qualified name const qualified = this.createQualifiedDescription(element, description, document); localDescriptions.push(qualified); } } } scopes.addAll(container, localDescriptions); return localDescriptions; } private createQualifiedDescription( container: Namespace, description: AstNodeDescription, document: LangiumDocument ): AstNodeDescription { // `getQualifiedName` has been implemented in the previous section const name = this.getQualifiedName(container.name, description.name); return this.descriptions.createDescription(description.node!, name, document); } } This new change now allows us to use local names of functions in the local scope, while they are still exported using their fully qualified name to the global scope. Another example for this style of scoping can be seen in the domain-model example language. Also, click the following note to see the full implementation of the scope computation service.\n Full Implementation export class CppScopeComputation extends DefaultScopeComputation { /** * Export all functions using their fully qualified name */ override async computeExports(document: LangiumDocument): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const exportedDescriptions: AstNodeDescription[] = []; for (const childNode of streamAllContents(document.parseResult.value)) { if (isFunctionDeclaration(childNode)) { const fullyQualifiedName = this.getQualifiedName(childNode, childNode.name); // `descriptions` is our `AstNodeDescriptionProvider` defined in `DefaultScopeComputation` // It allows us to easily create descriptions that point to elements using a name. exportedDescriptions.push(this.descriptions.createDescription(childNode, fullyQualifiedName, document)); } } return exportedDescriptions; } override async computeLocalScopes(document: LangiumDocument): Promise\u0026lt;PrecomputedScopes\u0026gt; { const model = document.parseResult.value as CppProgram; // This multi-map stores a list of descriptions for each node in our document const scopes = new MultiMap\u0026lt;AstNode, AstNodeDescription\u0026gt;(); this.processContainer(model, scopes, document); return scopes; } private processContainer( container: CppProgram | Namespace, scopes: PrecomputedScopes, document: LangiumDocument ): AstNodeDescription[] { const localDescriptions: AstNodeDescription[] = []; for (const element of container.elements) { if (isFunctionDeclaration(element)) { // Create a simple local name for the function const description = this.descriptions.createDescription(element, element.name, document); localDescriptions.push(description); } else if (isNamespace(element)) { const nestedDescriptions = this.processContainer(element, scopes, document); for (const description of nestedDescriptions) { // Add qualified names to the container // This could also be a partially qualified name const qualified = this.createQualifiedDescription(element, description, document); localDescriptions.push(qualified); } } } scopes.addAll(container, localDescriptions); return localDescriptions; } private createQualifiedDescription( container: Namespace, description: AstNodeDescription, document: LangiumDocument ): AstNodeDescription { const name = this.getQualifiedName(container.name, description.name); return this.descriptions.createDescription(description.node!, name, document); } /** * Build a qualified name for a model node */ private getQualifiedName(node: AstNode, name: string): string { let parent: AstNode | undefined = node.$container; while (isNamespace(parent)) { // Iteratively prepend the name of the parent namespace // This allows us to work with nested namespaces name = `${parent.name}::${name}`; parent = parent.$container; } return name; } } "},{"id":12,"href":"/docs/recipes/scoping/","title":"Scoping","parent":"Recipes","content":"You likely know scopes from programming, where some variables are only available from certain areas (such as blocks) in your program. For example, take the short Typescript snippet below. Based on the block (scope) where a variable is declared, it may or may not be available at another location in the same program.\nlet x = 42; x = 3; // References the `x` defined in the previous line if (condition) { let y = 42; } y = 3; // Cannot link, `y` isn't in any of the available scopes This kind of behavior is called lexical scoping. Although this default scoping implementation is suitable for prototyping \u0026ndash; and for some simple languages once finished \u0026ndash; this behavior can be easily modified to fit the needs of your language\u0026rsquo;s domain.\nIn general, the way we resolve references is split into three phases of the document lifecycle:\n Symbol indexing is responsible for making objects globally available for referencing. Scope computation determines which elements are reachable from a given position in your document. Finally, the linking phase eagerly links each reference within a document to its target using your language\u0026rsquo;s scoping rules. In this recipe, we\u0026rsquo;ll look at different scoping kinds and styles and see how we can achieve them using Langium:\n Qualified Name Scoping Class Member Scoping File-based scoping Note that these are just example implementations for commonly used scoping methods. The scoping API of Langium is designed to be flexible and extensible for any kind of use case.\n"},{"id":13,"href":"/showcase/statemachine/","title":"State Machine","parent":"Langium Showcase","content":""},{"id":14,"href":"/docs/learn/workflow/install/","title":"1. Install Yeoman","parent":"Langium's workflow","content":"Before diving into Langium itself, let\u0026rsquo;s get your environment ready for development:\n You have a working Node environment with version 16 or higher. Install Yeoman and the Langium extension generator. npm i -g yo generator-langium For our getting started example, we would also recommend you to install the latest version of vscode.\n"},{"id":15,"href":"/docs/recipes/builtin-library/","title":"Builtin Libraries","parent":"Recipes","content":"Languages usually offer their users some high-level programming features that they do not have to define themselves. For example, TypeScript provides users with typings for globally accessible variables such as the window, process or console objects. They are part of the JavaScript runtime, and not defined by any user or a package they might import. Instead, these features are contributed through what we call builtin libraries.\nLoading a builtin library in Langium is very simple. We first start off with defining the source code of the library using the hello world language from the getting started guide:\nexport const builtinHelloWorld = ` person Jane person John `.trimLeft(); Next, we load our builtin library code through the loadAdditionalDocuments method provided by the DefaultWorkspaceManager:\nimport { AstNode, DefaultWorkspaceManager, LangiumDocument, LangiumSharedServices } from \u0026quot;langium\u0026quot;; import { WorkspaceFolder } from 'vscode-languageserver'; import { URI } from \u0026quot;vscode-uri\u0026quot;; import { builtinHelloWorld } from './builtins'; export class HelloWorldWorkspaceManager extends DefaultWorkspaceManager { private documentFactory: LangiumDocumentFactory; constructor(services: LangiumSharedServices) { super(services); this.documentFactory = services.workspace.LangiumDocumentFactory; } protected override async loadAdditionalDocuments( folders: WorkspaceFolder[], collector: (document: LangiumDocument\u0026lt;AstNode\u0026gt;) =\u0026gt; void ): Promise\u0026lt;void\u0026gt; { await super.loadAdditionalDocuments(folders, collector); // Load our library using the `builtin` URI schema collector(this.documentFactory.fromString(builtinHelloWorld, URI.parse('builtin:///library.hello'))); } } As a last step, we have to bind our newly created workspace manager:\n// Add this to the `hello-world-module.ts` included in the yeoman generated project export type HelloWorldSharedServices = LangiumSharedServices; export const HelloWorldSharedModule: Module\u0026lt;HelloWorldSharedServices, DeepPartial\u0026lt;HelloWorldSharedServices\u0026gt;\u0026gt; = { workspace: { WorkspaceManager: (services) =\u0026gt; new HelloWorldWorkspaceManager(services) } } Be aware that this shared module is not injected by default. You have to add it manually to the inject call for the shared injection container.\nexport function createHellowWorldServices(context: DefaultSharedModuleContext): { shared: LangiumSharedServices, services: HelloWordServices } { const shared = inject( createDefaultSharedModule(context), HelloWorldGeneratedSharedModule, HelloWorldSharedModule ); const services = inject( createDefaultModule({ shared }), HelloWorldGeneratedModule, HelloWorldModule ); shared.ServiceRegistry.register(services); return { shared, services }; } Once everything is wired together, we are done from the perspective of our DSL. At startup, our language server will run the loadAdditionalDocuments method which makes our library available for any workspace documents of the user.\nHowever, when trying to navigate to the builtin library elements, vscode will show users an error message, complaining that it cannot find the builtin library file. This is expected, as the builtin library only lives in memory. To fix this issue, we need to implement a custom FileSystemProvider on the client(src/extension.ts in the hello world example) that allows navigation to the builtin library files:\nimport * as vscode from 'vscode'; import { builtinHelloWorld } from './language/builtins'; export class DslLibraryFileSystemProvider implements vscode.FileSystemProvider { static register(context: vscode.ExtensionContext) { context.subscriptions.push( vscode.workspace.registerFileSystemProvider('builtin', new DslLibraryFileSystemProvider(context), { isReadonly: true, isCaseSensitive: false })); } stat(uri: vscode.Uri): vscode.FileStat { const date = Date.now(); return { ctime: date, mtime: date, size: Buffer.from(builtinHelloWorld).length, type: vscode.FileType.File }; } readFile(uri: vscode.Uri): Uint8Array { // We could return different libraries based on the URI // We have only one, so we always return the same return new Uint8Array(Buffer.from(builtinHelloWorld)); } // The following class members only serve to satisfy the interface private readonly didChangeFile = new vscode.EventEmitter\u0026lt;vscode.FileChangeEvent[]\u0026gt;(); onDidChangeFile = this.didChangeFile.event; watch() { return { dispose: () =\u0026gt; {} }; } readDirectory(): [] { throw vscode.FileSystemError.NoPermissions(); } createDirectory() { throw vscode.FileSystemError.NoPermissions(); } writeFile() { throw vscode.FileSystemError.NoPermissions(); } delete() { throw vscode.FileSystemError.NoPermissions(); } rename() { throw vscode.FileSystemError.NoPermissions(); } } ... // register the file system provider on extension activation export function activate(context: vscode.ExtensionContext) { DslLibraryFileSystemProvider.register(context); } This registers an in-memory file system for vscode to use for the builtin file schema. Every time vscode is supposed to open a file with this schema, it will invoke the stat and readFile methods of the registered file system provider.\n"},{"id":16,"href":"/docs/recipes/scoping/class-member/","title":"Class Member Scoping","parent":"Scoping","content":"In this guide we will take a look at member based scoping. It\u0026rsquo;s a mechanism you are likely familiar with from object oriented languages such as Java, C# and JavaScript:\nclass A { b: B; } class B { value: string; } function test(): void { const a = new A(); const b = a.b; // Refers to the `b` defined in class `A` const value = b.value; // Refers to the `value` defined in class `B` } Member based scoping like this requires not only a modification of the default scoping provider, but also some other prerequisites. This includes adding a member call mechanism in your grammar and a rudimentary type system. For this guide, we will use excerpts from the langium-lox project to demonstrate how you can set this up yourself. This project implements a strongly-typed version of the Lox language from the popular book Crafting Interpreters.\nWe\u0026rsquo;ll first start with the MemberCall grammar rule, which references one of our NamedElements. These elements could be variable declarations, functions, classes or methods and fields of those classes. Additionally, we want to allow function calls on elements. Note that the grammar has no notion of whether these elements can actually be executed as functions. Instead, we always allow function calls on every named element, and simply provide validation errors in case an element is called erroneously. After parsing the first member call, we continue parsing further members as long as the input text provides us with further references to elements; which are separated by dots.\ntype NamedElement = FunctionDeclaration | VariableDeclaration | MethodMember | FieldMember | Class; MemberCall: // Reference a named element of our grammar // Variables, functions, etc. element=[NamedElement:ID] // Parse an operation call on this element (explicitOperationCall?='(' ( // Parse any arguments for the operation call arguments+=Expression (',' arguments+=Expression)* )? ')')? // Create a new `MemberCall` and assign the old one to the `previous` property // The previous member call can either be the member call that was parsed in the previous section // Or one that is parsed in the next section due to the repetition at the end of this group ({infer MemberCall.previous=current} // We repeat the named element reference (\u0026quot;.\u0026quot; element=[NamedElement:ID] ( // Parse an operation call again explicitOperationCall?='(' ( arguments+=Expression (',' arguments+=Expression)* )? ')')? // Our language allows to return functions in functions // So we need to be able to call multiple functions without any element references | ( explicitOperationCall?='(' ( arguments+=Expression (',' arguments+=Expression)* )? ')')) )*; A very important aspect of these chained member calls is the action ({infer MemberCall.previous=current}) which rewrites the resulting AST. In this case, it reverses the direction of member call AST nodes. Instead of starting with the first encountered member call and then traversing down to the last, we start with the last and traverse the list of member calls up using the previous property. The reason for doing this becomes clear when looking at the scope provider for the Lox language:\nexport class LoxScopeProvider extends DefaultScopeProvider { override getScope(context: ReferenceInfo): Scope { // target element of member calls if (context.property === 'element' \u0026amp;\u0026amp; isMemberCall(context.container)) { const memberCall = context.container; const previous = memberCall.previous; if (!previous) { return super.getScope(context); } const previousType = inferType(previous); if (isClassType(previousType)) { return this.scopeClassMembers(previousType.literal); } // When the target of our member call isn't a class // This means it is either a primitive type or a type resolution error // Simply return an empty scope return EMPTY_SCOPE; } return super.getScope(context); } private scopeClassMembers(classItem: Class): Scope { // Since Lox allows class-inheritance, // we also need to look at all members of possible super classes for scoping const allMembers = getClassChain(classItem).flatMap(e =\u0026gt; e.members); return this.createScopeForNodes(allMembers); } } When trying to compute the type of an expression, we are only interested in the final piece of the member call. However, to derive the type and scope of the final member call, we have to recursively identify the type of the previous member call. This is done by looking at the member call stored in the previous property and inferring its type. See here for the full implementation of the type inference system in Lox. This kind of type inference requires scoping.\nTo illustrate this behavior a bit better, take a look at the following code snippet:\nclass Container { sub: SubContainer } class SubContainer { name: string } // Constructor call var element = Container(); // Member access println(element.sub.name); We recursively alternate between the scope provider and the type inference system until we arrive at a member call without any previous member call. At this point we resolve the reference using the default lexical scoping which is builtin into Langium. With our scope provider in place, we can visualize how it interacts with Langium\u0026rsquo;s implementation of Lox in the following sequence diagram:\n sequenceDiagram participant R as Language Runtime participant T as Type System participant S as Scope Provider participant L as Lexical Scope R-T: (1) Query type of expression\n`element.sub.name` T-S: (2) Query `name` node S-T: (3) Query `sub` type T-S: (4) Query `sub` node S-T: (5) Query `element` type T-S: (6) Query `element` node S-L: (7) Query `element` node L-S: (7) Return `element` node S-T: (6) Return `element` node T-S: (8) Query `Container` node S-L: (9) Query `Container` node L-S: (9) Return `Container` node S-T: (8) Return `Container` node T-S: (5) Return `Container` type result S-T: (4) Return `sub` node T-S: (3) Return `SubContainer` type result S-T: (2) Return `name` node T-R: (1) Return `string` type result When trying to infer the type of the expression element.sub.name, we can see that this results in quite a lot of computations throughout the scoping and type systems. It is therefore recommended to cache type inference information as this naive approach to inference can quickly lead to performance issues.\n"},{"id":17,"href":"/docs/reference/configuration-services/","title":"Configuration via Services","parent":"Reference","content":"Langium supports the configuration of most aspects of your language and language server via a set of services. Those services are configured by modules, which are essentially mappings from a service name to its implementation.\nWe can separate services and modules into two main categories:\nShared Services The shared services are services that are shared across all Langium languages. In many applications there is only one Langium language, but the overall structure of the services is the same.\n The ServiceRegistry is responsible for registering and accessing the different languages and their services. The Connection service is used in a language server context; it sends messages to the client and registers message handlers for incoming messages. The AstReflection service provides access the structure of the AST types. Shared services involved in the document lifecycle (future documentation) Language Specific Services The language specific services are services specific to one Langium language and isolated from other languages.\n Services for LSP features Services involved in the document lifecycle (future documentation) Utility services (e.g. References, JsonSerializer) Customization If you have used the Yeoman generator, the entry point to services customization is found in the src/language/...-module.ts file, where \u0026lsquo;\u0026hellip;\u0026rsquo; is the name of your language. There you can register new services or override the default implementations of services. Langium implements the Inversion of Control principle via the Dependency Injection pattern, which promotes loosely-coupled architectures, maintainability, and extensibility.\nFor the following sections, we will use the arithmetics example to describe the procedure for replacing or adding services. Note that all names prefixed with Arithmetics should be understood as being specific to the language named Arithmetics, and in your project those services' names will be prefixed with your own language name.\nPlease note that it is not mandatory to implement all custom code via dependency injection. The main reason for using dependency injection is when your custom code depends on other services. In many cases you can use plain functions instead of service classes to implement your application logic.\nOverriding and Extending Services Thanks to the dependency injection pattern used in Langium, your can change the behavior of a service or add to its functionality in one place without having to modify every piece of code that depends on the service to be overridden or extended.\nThe arithmetics example provides a custom implementation of the ScopeProvider service, which overrides functionalities from the default implementation DefaultScopeProvider.\nFirst, we need to register the new implementation of ScopeProvider inside of the ArithmeticsModule:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { references: { ScopeProvider: (services) =\u0026gt; new ArithmeticsScopeProvider(services) } }; In the ArithmeticsModule singleton instance, we map a property with the name of our service (here ScopeProvider) to a concrete implementation of the service. This means that the first time we access the service named ScopeProvider, a new instance of the class ArithmeticsScopeProvider will be created instead of the default implementation DefaultScopeProvider. The provided factory function is invoked only once, which means that all services are handled as singletons.\nIn order to successfully override an existing service, the property name (here ScopeProvider) must match exactly that of the default implementation.\nThe ArithmeticsScopeProvider overrides two methods from DefaultScopeProvider:\nexport class ArithmeticsScopeProvider extends DefaultScopeProvider { protected createScope(elements: Stream\u0026lt;AstNodeDescription\u0026gt;, outerScope: Scope): Scope { return new StreamScope(elements, outerScope, { caseInsensitive: true }); } protected getGlobalScope(referenceType: string): Scope { return new StreamScope(this.indexManager.allElements(referenceType), undefined, { caseInsensitive: true }); } } The functions createScope and getGlobalScope are already defined in DefaultScopeProvider but needed to be overridden to add the option {caseInsensitive: true}. This is achieved through inheritance: By using the keyword extends, ArithmeticsScopeProvider inherits from DefaultScopeProvider, which means that it can access properties and methods as well as override methods declared in the superclass.\nIn the DefaultScopeProvider, those two methods are declared as:\nprotected createScope(elements: Stream\u0026lt;AstNodeDescription\u0026gt;, outerScope: Scope): Scope { return new StreamScope(elements, outerScope); } protected getGlobalScope(referenceType: string): Scope { return new StreamScope(this.indexManager.allElements(referenceType)); } Now, when we call either createScope or getGlobalScope from the ScopeProvider service, the call will be made from the ArithmeticsScopeProvider instead of the DefaultScopeProvider. Functions that were not overridden will still be called from DefaultScopeProvider via inheritance.\nOf course it is also possible to replace the default implementation with a completely separate one that does not inherit from the default service class.\nAdding New Services To add services that are not available by default in Langium, e.g. application specific ones, we first need to edit the type ArithmeticsAddedService. By default, the Yeoman-based generator adds a validator service where you can implement validation rules specific to your language. New services are added as properties to the type declaration:\nexport type ArithmeticsAddedServices = { ArithmeticsValidator: ArithmeticsValidator } The ArithmeticsAddedService type now has a property ArithmeticsValidator of type ArithmeticsValidator.\nFor the sake of organization and clarity, the services can be nested inside of other properties acting as \u0026ldquo;groups\u0026rdquo;:\nexport type ArithmeticsAddedServices = { validation: { ArithmeticsValidator: ArithmeticsValidator }, secondGroup: { AnotherServiceName: AnotherServiceType }, nthGroup: { withASubGroup: { YetAnotherServiceName: YetAnotherServiceType } } } Now that we have declared our new services inside of the ArithmeticsAddedServices type definition, we need to specify to the module how we want them to be implemented. To do so, we need to update the ArithmeticsModule:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { validation: { ArithmeticsValidator: () =\u0026gt; new ArithmeticsValidator() } }; Similarly to overridden services, the first access to the ArithmeticsValidator property will create a new instance of the class ArithmeticsValidator.\nThe ArithmeticsValidator service does not depend on other services, and no argument is passed during the instantiation of the class. If you implement a service that depends on other services, the constructor of your service should expect \u0026lt;yourDslName\u0026gt;Services as argument. The initializer function can expect that object as argument and pass it to your services constructor, such as:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { ServiceWithDependencies = (services) =\u0026gt; new ServiceClass(services); } The services which ServiceClass depends on need to be registered in the constructor:\nexport class ServiceClass { private readonly serviceOne: ServiceOne; private readonly serviceTwo: ServiceTwo; private readonly serviceN: ServiceN; constructor(services: ArithmeticsServices) { this.serviceOne = services.ServiceOne; this.serviceTwo = services.Group.ServiceTwo; this.serviceN = services.Group.SubGroup.ServiceN; } /* service logic */ } Resolving cyclic dependencies In case one of the services the ServiceClass above depends on, also has a dependency back to the ServiceClass, your module will throw an error similar to this: Cycle detected. Please make \u0026quot;ServiceClass\u0026quot; lazy. Ideally, such cyclic dependencies between services should be avoided. Sometimes, cycles are unavoidable though. In order to make them lazy, assign a lambda function that returns the service in the constructor. You can then invoke this function in your service logic to get access to the depending service:\nexport class ServiceClass { private readonly serviceOne: () =\u0026gt; ServiceOne; constructor(services: ArithmeticsServices) { this.serviceOne = () =\u0026gt; services.ServiceOne; // \u0026lt;-- lazy evaluated service } /* service logic */ method() { this.serviceOne().methodOne(); } } Using ArithmeticsValidator in other services The ArithmeticsValidator needs to be registered inside of the ValidationRegistry. This done by overriding ValidationRegistry with ArithmeticsValidationRegistry.\nBriefly, ArithmeticsValidator implements two checks, checkDivByZero and checkNormalisable:\nexport class ArithmeticsValidator { checkDivByZero(binExpr: BinaryExpression, accept: ValidationAcceptor): void { ... } checkNormalisable(def: Definition, accept: ValidationAcceptor): void { ... } } These two new checks need to be registered inside of the ValidationRegistry. We extend ValidationRegistry with ArithmeticsValidationRegistry to implement our new functionalities:\nexport class ArithmeticsValidationRegistry extends ValidationRegistry { constructor(services: ArithmeticsServices) { super(services); const validator = services.validation.ArithmeticsValidator; const checks: ArithmeticsChecks = { BinaryExpression: validator.checkDivByZero, Definition: validator.checkNormalisable }; this.register(checks, validator); } } Inside of the ArithmeticsValidationRegistry, we obtain our ArithmeticsValidator with const validator = services.validation.ArithmeticsValidator, which will create a new instance of ArithmeticsValidator. Then we declare the checks to be registered and register them inside of the registry via the function register which is declared in the superclass. The ArithmeticsValidationRegistry only adds validation checks to the ValidationRegistry, but does not override any functionality from it.\nThe implementation of ArithmeticsValidationRegistry needs to be registered in ArithmeticsModule. The complete ArithmeticsModule is:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { references: { ScopeProvider: (services) =\u0026gt; new ArithmeticsScopeProvider(services) }, validation: { ValidationRegistry: (services) =\u0026gt; new ArithmeticsValidationRegistry(services), ArithmeticsValidator: () =\u0026gt; new ArithmeticsValidator() } }; Language Server Protocol If you want to modify aspects of the Language Server, this section will help you find the relevant service for handling a given LSP request.\nCompletionProvider The CompletionProvider service is responsible for handling a Completion Request at a given cursor position. When a Completion Request is submitted by the client to the server, the CompletionProvider will create a CompletionList of all possible CompletionItem to be presented in the editor. The CompletionProvider service computes a new CompletionList after each keystroke.\nDocumentSymbolProvider The DocumentSymbolProvider service is responsible for handling a Document Symbols Request. The DocumentSymbolProvider is used to return a hierarchy of all symbols found in a document as an array of DocumentSymbol.\nHoverProvider The HoverProvider service is responsible for handling a Hover Request at a given text document position. By default, Langium implements tooltips with the content of the preceding multiline comment when hovering a symbol.\nFoldingRangeProvider The FoldingRangeProvider service is responsible for handling a Folding Range Request. This service identifies all the blocks that can be folded in a document.\nReferenceFinder The ReferenceFinder service is responsible for handling a Find References Request. This service is used to find all references to a given symbol inside of a document.\nDocumentHighlighter The DocumentHighlighter service is responsible for handling a Document Highlights Request. This service will find all references to a symbol at a given position (via the References service) and highlight all these references in a given document.\nRenameHandler The RenameHandler service is responsible for handling a Rename Request or a Prepare Rename Request. First, the service will check the validity of the Prepare Rename Request. If the request is valid, the service will find all references to the selected symbol inside of a document and replace all occurrences with the new value.\n"},{"id":18,"href":"/docs/features/","title":"Features","parent":"What is Langium?","content":"Designing programming languages from the ground up is hard, independent of whether your language is a \u0026ldquo;simple\u0026rdquo; domain specific language or a full-fledged general-purpose programming language. Not only do you have to keep up with the requirements of your domain experts, but you have to deal with all the technical complexity that comes with building a language, including questions such as:\n How do I get from a string to a semantic model which I can work with? How do I resolve references to other parts of my model, even if they are located in a separate file? How do I provide a great editing experience to users of my language? This is the point where Langium comes into play. Langium aims to lower the barrier to entry for creating a language by removing the technical complexity, allowing you to focus on your domain\u0026rsquo;s requirements.\nIn this chapter, you\u0026rsquo;ll get a closer look at the requirements developers usually have to implement by themselves when building a programming language:\n Language Parsing Semantic Models Cross References and Linking Workspace Management Editing Support Try it out Langium provides out-of-the-box solutions for these problems, with the ability to fine-tune every part of it to fit your domain requirements.\n Language Parsing Programming languages and domain specific languages (DSLs) cannot be parsed using simple regular expressions (RegExp). Instead they require a more sophisticated parsing strategy. To define a custom language in Langium, you interact with a high level representation of your context-free grammar using the Langium grammar language, in a similar fashion to EBNF.\nBased on the grammar, Langium is then able to construct a parser which transforms an input string into a semantic model representation. Just as the name suggests, this model captures the essential structure to describe your language.\nSemantic Models Langium grammars are not only used to parse your language, but also to generate a semantic model for your Language as TypeScript interfaces. When a program in your language is then parsed, the generated AST will be automatically produced using these interfaces. The following language snippet parses a simple object { name: 'John' }.\nPerson: 'person' // keyword name=ID // semantic assignment ; To interact with the semantic model in a type safe manner, the langium-cli tool generates TypeScript type definitions from your parser rules. The Person parser rule generates the following interface:\ninterface Person { name: String } These interfaces allow you to safely traverse your abstract syntax tree. In case your grammar changes, they will also notify you of any breaking changes which might require you to change your domain logic.\nCross References and Linking To express any kind of relationship between elements in your language, you will need to reference them. The process of resolving these references, i.e. identifying what element of your language hides behind a certain name, is called linking. Performing the linking process in a deterministic manner with a lot of objects in your project requires sound linking design.\nLangium accomplishes this feature by using the concept of \u0026lsquo;scoping\u0026rsquo;. You likely know scopes from programming, where some variables are only available from certain scopes:\nlet x = 42; x = 3; // References the `x` defined in the previous line if (something) { let y = 42; } y = 3; // Cannot link, `y` isn't in any of the available scopes The same occurs in Langium. To enable more complex scoping behavior, you can add custom domain scoping. For example, common object-oriented languages need a more involved scoping mechanism to resolve references to fields and methods of a class:\nclass X { y(): void { ... } } const instance = new X(); // Symbol `X` is in the local scope instance.y(); // Symbol `y` exists in the scope of the `X` class Once your domain specific scoping rules have been defined, Langium will take care of linking by itself, reporting any errors.\nWorkspace Management Like with common modularized programming languages, domain logic written in your DSL will usually be split across multiple files to facilitate ease of use and maintenance. This is also possible using Langium, which automatically tries to pick up any files belonging to your current project.\nWhen running a Langium based language in a language server, all files in your workspace (the folder containing your current project) belonging to your DSL will automatically be picked up and processed. In addition, any changes in your workspace will be handled as well. Dealing with added, changed or deleted files in a workspace with multiple hundreds of files can become complicated and decrease performance drastically if not done correctly. Langium employs heuristics to only invalidate and recompute what is actually necessary.\nThe workspace management also keeps track of the global scope. This allows users of your DSL to reference elements across files within the same workspace.\nEditing Support The Langium framework is deeply integrated with the language server protocol (LSP). The LSP aims to reduce integration efforts when designing a language by providing an interface that all IDEs can use to provide editing support. This allows Langium based languages to easily interact with common IDEs and editors with LSP support, including Visual Studio Code, Eclipse, IntelliJ and many more.\nThe LSP includes commonly used language features, such as code completion, custom validations/diagnostics, finding references, formatting and many more. This allows for deep IDE integration without binding your language to a single IDE. Langium offers out-of-the-box support for most of these language features, with additional extension points for your domain specific requirements.\nTry it out You can try out most of these features using our showcase and playground. The languages shown there are written using Langium and integrated in the monaco-editor.\nIf you\u0026rsquo;re interested in Langium, you can check out our learning page next. There you\u0026rsquo;ll learn how to get started writing your first language, and to learn more about how Langium can help you achieve your language designing goals.\n"},{"id":19,"href":"/docs/learn/minilogo/","title":"Minilogo tutorial","parent":"Learn Langium","content":"In this section you\u0026rsquo;ll find helpful tutorials aimed at teaching you how to implement Langium for concrete applications.\nThese tutorials revolve around the MiniLogo language, implemented in Langium. They describe how to implement MiniLogo from the ground up, and how to customize the various features of MiniLogo.\nBy working through each of these tutorials, you\u0026rsquo;ll be learning about:\n writing a grammar in Langium implementing validation for your language customizing a CLI for your tooling writing up a simple generator building a VSCode extension running Langium in the web with the Monaco editor implementing generation in the web By the end of this series, you should be equipped to start working on your own language, and also have a pretty good idea for how you can integrate Langium into other projects as well.\nWith that being said, hop on in to the first guide on Writing a Grammar in Langium!\n"},{"id":20,"href":"/docs/learn/workflow/scaffold/","title":"2. Scaffold a Langium project","parent":"Langium's workflow","content":"To create your first working DSL, execute the Yeoman generator:\n\u0026gt; yo langium ┌─────┐ ─┐ ┌───┐ │ ╶─╮ ┌─╮ ╭─╮ ╷ ╷ ╷ ┌─┬─╮ │ ,´ │ ╭─┤ │ │ │ │ │ │ │ │ │ │ │╱ ╰─ ╰─┘ ╵ ╵ ╰─┤ ╵ ╰─╯ ╵ ╵ ╵ ` ╶─╯ Welcome to Langium! This tool generates a VS Code extension with a \u0026quot;Hello World\u0026quot; language to get started quickly. The extension name is an identifier used in the extension marketplace or package registry. ❓ Your extension name: hello-world The language name is used to identify your language in VS Code. Please provide a name to be shown in the UI. CamelCase and kebab-case variants will be created and used in different parts of the extension and language server. ❓ Your language name: Hello World Source files of your language are identified by their file name extension. You can specify multiple file extensions separated by commas. ❓ File extensions: .hello Your language can be run inside of a VSCode extension. ❓ Include VSCode extension? Yes You can add CLI to your language. ❓ Include CLI? Yes You can run the language server in your web browser. ❓ Include Web worker? Yes You can add the setup for language tests using Vitest. ❓ Include language tests? Yes Yeoman will prompt you with a few basic questions about your DSL:\n Extension name: Will be used as the folder name of your extension and its package.json. Language name: Will be used as the name of the grammar and as a prefix for some generated files and service classes. File extensions: A comma separated list of file extensions for your DSL. The following questions are about the project parts you want to include in your project:\n VS Code extension: will be used to run your language inside of a VS Code extension. CLI: will add a CLI to your language. Web worker: will add the setup for running the language server in your web browser. Language tests: will add the setup for language tests. Afterwards, it will generate a new project and start installing all dependencies, including the langium framework as well as the langium-cli command line tool required for generating code based on your grammar definition.\nAfter everything has successfully finished running, open your newly created Langium project with vscode via the UI (File \u0026gt; Open Folder\u0026hellip;) or execute the following command, replacing hello-world with your chosen project name:\ncode hello-world Sneak peek using the VS Code extension Press F5 or open the debug view and start the available debug configuration to launch the extension in a new Extension Development Host window. Open a folder and create a file with your chosen file extension (.hello is the default). The hello-world language accepts two kinds of entities: The person and Hello entity. Here\u0026rsquo;s a quick example on how to use them both:\nperson Alice Hello Alice! person Bob Hello Bob! The file src/language/hello-world.langium in your newly created project contains your grammar.\n"},{"id":21,"href":"/showcase/arithmetics/","title":"Arithmetics","parent":"Langium Showcase","content":""},{"id":22,"href":"/docs/reference/document-lifecycle/","title":"Document Lifecycle","parent":"Reference","content":"LangiumDocument is the central data structure in Langium that represents a text file of your DSL. Its main purpose is to hold the parsed Abstract Syntax Tree (AST) plus additional information derived from it. After its creation, a LangiumDocument must be \u0026ldquo;built\u0026rdquo; before it can be used in any way. The service responsible for building documents is called DocumentBuilder.\nA LangiumDocument goes through seven different states during its lifecycle:\n Parsed when an AST has been generated from the content of the document. IndexedContent when the AST nodes have been processed by the IndexManager. ComputedScopes when local scopes have been prepared by the ScopeComputation. Linked when the Linker has resolved cross-references. IndexedReferences when the references have been indexed by the IndexManager. Validated when the document has been validated by the DocumentValidator. Changed when the document has been modified. State 1 is the initial state after creation of a document, and states 2 to 6 are part of its build process. State 7 is a final state used to mark the document as invalid due to a change in the source text.\nThe following diagram depicts how the DocumentBuilder processes LangiumDocuments depending on their state. More details about each step of the lifecycle can be found below.\n graph TD; N(LangiumDocumentFactory) -.-|Creation of LangiumDocuments| C{{Parsed}} A(DocumentBuilder) --|Indexing of symbols| D(IndexManager) -.- E{{IndexedContent}} A --|Computing scopes| F(ScopeComputation) -.- G{{ComputedScopes}} A --|Linking| H(Linker) -.- I{{Linked}} A --|Indexing of cross-references| J(IndexManager) -.- K{{IndexedReferences}} A --|Validation| L(DocumentValidator) -.- M{{Validated}} click N \"./#creation-of-langiumdocuments\" click D \"./#indexing-of-symbols\" click F \"./#computing-scopes\" click H \"./#linking\" click J \"./#indexing-of-cross-references\" click L \"./#validation\" Creation of LangiumDocuments When the workspace is initialized, all files having an extension matching those defined in langium-config.json are collected by the WorkspaceManager service. The LangiumDocumentFactory service creates a new instance of LangiumDocument for each source file. Those documents are then stored in memory by the LangiumDocuments service so they can be accessed later.\nFiles in the workspace are mapped to instances of TextDocument as implemented by the vscode-languageserver package. Such a TextDocument holds the content of the respective file as a string. In contrast, a LangiumDocument represents the file content as an AST. This means that the creation of a LangiumDocument by the LangiumDocumentFactory service is accompanied by the parsing of the content of a TextDocument into an AST. During the creation of a LangiumDocument (i.e. after the document has been parsed), its state is set to Parsed.\ngraph LR; A(LangiumDocuments\nmanages LangiumDocument instances) -- B(LangiumDocumentFactory\ncreates a LangiumDocument) B -- C(LangiumParser\nparses a string into an AST) Once all LangiumDocuments have been created, the DocumentBuilder service will sequentially process each LangiumDocument as described below.\nIndexing of Symbols Symbols are AST nodes that can be identified with a name and hence can be referenced from a cross-reference. Symbols that are exported can be referenced from other documents, while non-exported symbols are local to the document containing them. The IndexManager service keeps an index of all symbols that are exported from documents in the workspace. The set of all these exported symbols is called the global scope.\nIndexing of the exported symbols of an AST is executed on documents with the state Parsed. The default ScopeComputation service creates an AstNodeDescription for the root node (i.e. the node created by parsing the entry rule) and each named AstNode directly descending from the root node. This AstNodeDescription contains the type of the node, its identifier (i.e. the name property), the URI of the document where the node is located, and the location of the node inside of the document. The generated set of AstNodeDescriptions makes symbols from a LangiumDocument accessible to other documents in the same workspace.\nThe default ScopeComputation can be overridden to change the selection of exported symbols, or to export them with different names than the plain value of their name property. However, keep in mind that you cannot access any cross-references in this phase because that requires the document state to be at least ComputedScopes, which happens later in the build process.\nOnce the initial indexing is done, the document\u0026rsquo;s state is set to IndexedContent.\ngraph LR; A(IndexManager\nmanages exported content\nof LangiumDocuments) -- B(ScopeComputation\ncreates descriptions\nof all exported symbols) B -- C(NameProvider\nresolves the name of an AstNode) B -- D(AstNodeLocator\ngets the path of an AstNode) Computing Scopes This phase is executed on documents with the state IndexedContent and is required to complete prior to resolving cross-references.\nLocal scope computation consists of gathering all symbols contained in the AST, done by the ScopeComputation service (in addition to the indexing explained in the previous section). Metadata of the gathered symbols are represented with AstNodeDescription like in the initial indexing phase. These metadata are attached to the LangiumDocument in a multi-map structure that associates a (possibly empty) set of symbol descriptions to each container node of the AST, called the precomputed scopes. These are used in the linking phase to construct the actual scope of a cross-reference, i.e. all possible symbols that are reachable. A symbol in the precomputed scopes is reachable from a specific cross-reference if it is associated with a direct or indirect container of that reference. Symbols associated to the root node are reachable from the whole AST, while symbols associated with an inner node are reachable from the respective sub-tree.\nThe default implementation of the ScopeComputation service attaches the description of every symbol to its direct container. This means that the container holds information about which named nodes are nested inside of it. You can override this default behavior to change the position where a symbol is reachable, or to change the name by which it can be referenced. It is even possible to associate the same symbol to multiple container nodes, possibly with different names, to control precisely where and how references to it can be resolved. However, keep in mind that you cannot access any cross-references in this phase. More complex, context-dependent scope mechanisms can be implemented in the ScopeProvider (see next section).\nThe \u0026ldquo;Domainmodel\u0026rdquo; example includes a customization of scopes precomputation where every entity contained in a package declaration is exposed using its qualified name, that is the concatenation of the package name and entity name separated with . (similar to Java).\nIn languages with a type system, you would typically implement computation of types in an additional pre-processing step in order to make type information available in the document. This additional step can be registered to run after scope computation with the onBuildPhase method of DocumentBuilder. How types are computed heavily depends on the kind of type system, so there is no default implementation for it.\nOnce local scopes are computed and attached to the document, the document\u0026rsquo;s state is set to ComputedScopes.\ngraph LR; A(ScopeComputation\ngathers all symbols from the AST and\nstores their metadata in a MulitMap) -- B(NameProvider\nresolves the name of an AST node) A -- C(AstNodeDescriptionProvider\ncreates descriptions of the\ngathered symbols) C -- D(AstNodeLocator\ngets the path of an AstNode) Linking Once local scopes have been prepared, cross-references are resolved via the Linker service. The Linker retrieves all cross-references in a LangiumDocument and tries to resolve them. Reference resolution consists of three main steps:\n Query the ScopeProvider to obtain a scope. A scope describes all symbols that are reachable from the AST node holding the cross-reference. In the obtained scope, find the description of a symbol whose name matches the identifier given at the cross-reference. Load the AST node for that description. The AST node is given either directly (for a local symbol) or indirectly though a path string (for a symbol exported from another document). The default implementation of the ScopeProvider service creates a hierarchical scope by traveling from the given cross-reference via its container nodes up to the root of the AST, and collecting symbol descriptions from the precomputed scopes (created in the preceding phase). The symbols are filtered to match the type of the cross-reference target. Symbols that are closer to the cross-reference shadow those that are further above in the AST, which means they have higher priority to be chosen as cross-reference targets. As the last resort, the global scope computed in the initial indexing phase is included in the hierarchical scope. Symbols that cannot be found locally are looked up in the global scope.\nThe ScopeProvider can be overridden to implement complex scenarios for scoping and cross-reference resolution. Since cross-references can be linked lazily in this phase, it is possible to create a scope for a cross-reference depending on the resolved target of another cross-reference.\nOnce the linking is complete, the document\u0026rsquo;s state is set to Linked.\ngraph LR; A(Linker\nlinks references to their target AstNodes) -- B(ScopeProvider\ncreates a Scope for the context of a Reference) A -- C(AstNodeLocator\nresolves an AstNode from its path) Indexing of Cross-References Once the cross-references have been resolved by the linker, the IndexManager kicks in a second time to create descriptions of cross-references between different documents. Such a ReferenceDescription implies a dependency from its source document to its target document. This information ensures an efficient lookup to identify which other documents may be impacted by a change in a LangiumDocument.\nAfter the cross-references have been indexed, the document\u0026rsquo;s state is set to IndexedReferences.\ngraph LR; A(IndexManager\nmanages metadata of cross-references\nbetween documents) -- B(ReferenceDescriptionProvider\ncreates descriptions of all cross-references) B -- C(AstNodeLocator\ngets the path of an AstNode) Validation The DocumentValidator creates an array of Diagnostics from a LangiumDocument. This array contains all errors that have occurred during lexing, parsing, and linking, and the results of a set of custom validations with varying severity (error, warning, info). The custom validations are registered with the ValidationRegistry service.\nAfter the diagnostics have been created, the document\u0026rsquo;s state is set to Validated.\ngraph LR; A(DocumentValidator\ntranslate parser and linker errors to Diagnostics,\nand executes custom validation checks) -- B(ValidationRegistry\nmanages custom validation checks for each AST node type) At this point, all documents have been processed by the DocumentBuilder and the workspace is ready to process requests from the editor (e.g. completion).\nModifications of a document When a TextDocument is modified, the language client (IDE) notifies the language server, which triggers corresponding events. In Langium, a change in a TextDocument\u0026rsquo;s content leads to the invalidation of the associated LangiumDocument. The document\u0026rsquo;s state is set to Changed and the document\u0026rsquo;s entry is removed from the LangiumDocuments service. If the TextDocument was deleted, the corresponding LangiumDocument is removed from the index in the IndexManager service. If the document\u0026rsquo;s content was modified, a new instance of LangiumDocument is created as described above. All other documents that may have been affected as a result of the modification get their references unlinked and their state is modified such that they run through the linking phase again. The DocumentBuilder then processed the newly created document along with all other documents that have not reached the Validated state yet.\nTo determine which documents are affected by a change, the IndexManager uses the reference descriptions gathered in the reference indexing phase.\n"},{"id":23,"href":"/docs/recipes/scoping/file-based/","title":"File-based scoping","parent":"Scoping","content":"Goal By default, Langium will always expose all top-level AST elements to the global scope. That means they are visible to all other documents in your workspace. However, a lot of languages are better served with a JavaScript-like import/export mechanism:\n Using export makes a symbol from the current file available for referencing from another file. Using import allows to reference symbols for a different file. To make things easier I will modify the \u0026ldquo;Hello World\u0026rdquo; example from the learning section.\nStep 1: Change the grammar First off, we are changing the grammar to support the export and the import statements. Let\u0026rsquo;s take a look at the modified grammar:\ngrammar HelloWorld entry Model: ( fileImports+=FileImport //NEW: imports per file | persons+=Person | greetings+=Greeting )*; FileImport: //NEW: imports of the same file are gathered in a list 'import' '{' personImports+=PersonImport (',' personImports+=PersonImport)* '}' 'from' file=STRING; PersonImport: person=[Person:ID] ('as' name=ID)?; Person: published?='export'? 'person' name=ID; //NEW: export keyword type Greetable = PersonImport | Person Greeting: 'Hello' person=[Greetable:ID] '!'; hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w_]*/; terminal STRING: /\u0026quot;(\\\\.|[^\u0026quot;\\\\])*\u0026quot;|'(\\\\.|[^'\\\\])*'/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; After changing the grammar you need to regenerate the abstract syntax tree (AST) and the language infrastructure. You can do that by running the following command:\nnpm run langium:generate Step 2: Exporting persons to the global scope The index manager shall get all persons that are marked with the export keyword. In Langium this is done by overriding the ScopeComputation.getExports(…) function. Here is the implementation:\nexport class HelloWorldScopeComputation extends DefaultScopeComputation { override async computeExports(document: LangiumDocument\u0026lt;AstNode\u0026gt;): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const model = document.parseResult.value as Model; return model.persons .filter(p =\u0026gt; p.published) .map(p =\u0026gt; this.descriptions.createDescription(p, p.name)); } } After that, you need to register the HelloWorldScopeComputation in the HelloWorldModule:\nexport const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { //... references: { ScopeComputation: (services) =\u0026gt; new HelloWorldScopeComputation(services) } }; Having done this, will make all persons that are marked with the export keyword available to the other files through the index manager.\nStep 3: Importing from specific files The final step is to adjust the cross-reference resolution by overriding the DefaultScopeProvider.getScope(…) function:\nexport class HelloWorldScopeProvider extends DefaultScopeProvider { override getScope(context: ReferenceInfo): Scope { switch(context.container.$type as keyof HelloWorldAstType) { case 'PersonImport': if(context.property === 'person') { return this.getExportedPersonsFromGlobalScope(context); } break; case 'Greeting': if(context.property === 'person') { return this.getImportedPersonsFromCurrentFile(context); } break; } return EMPTY_SCOPE; } //... } Do not forget to add the new service to the HelloWorldModule:\nexport const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { //... references: { ScopeComputation: (services) =\u0026gt; new HelloWorldScopeComputation(services), ScopeProvider: (services) =\u0026gt; new HelloWorldScopeProvider(services) //NEW! } }; You noticed the two missing functions? Here is what they have to do.\nThe first function (getExportedPersonsFromGlobalScope(context)) will take a look at the global scope and return all exported persons respecting the files that were touched by the file imports. Note that we are outputting all persons that are marked with the export keyword. The actual name resolution is done internally later by the linker.\nprivate getExportedPersonsFromGlobalScope(context: ReferenceInfo): Scope { //get document for current reference const document = AstUtils.getDocument(context.container); //get model of document const model = document.parseResult.value as Model; //get URI of current document const currentUri = document.uri; //get folder of current document const currentDir = dirname(currentUri.path); const uris = new Set\u0026lt;string\u0026gt;(); //for all file imports of the current file for (const fileImport of model.fileImports) { //resolve the file name relatively to the current file const filePath = join(currentDir, fileImport.file); //create back an URI const uri = currentUri.with({ path: filePath }); //add the URI to URI list uris.add(uri.toString()); } //get all possible persons from these files const astNodeDescriptions = this.indexManager.allElements(Person, uris).toArray(); //convert them to descriptions inside of a scope return this.createScope(astNodeDescriptions); } The second function (getImportedPersonsFromCurrentFile(context)) will take a look at the current file and return all persons that are imported from other files.\nprivate getImportedPersonsFromCurrentFile(context: ReferenceInfo) { //get current document of reference const document = AstUtils.getDocument(context.container); //get current model const model = document.parseResult.value as Model; //go through all imports const descriptions = model.fileImports.flatMap(fi =\u0026gt; fi.personImports.map(pi =\u0026gt; { //if the import is name, return the import if (pi.name) { return this.descriptions.createDescription(pi, pi.name); } //if import references to a person, return that person if (pi.person.ref) { return this.descriptions.createDescription(pi.person.ref, pi.person.ref.name); } //otherwise return nothing return undefined; }).filter(d =\u0026gt; d != undefined)).map(d =\u0026gt; d!); return this.createScope(descriptions); } Result Now, let\u0026rsquo;s test the editor by npm run build and starting the extension. Try using these two files. The first file contains the Simpsons family.\nexport person Homer export person Marge person Bart person Lisa export person Maggy The second file tries to import and greet them.\nimport { Marge, Homer, Lisa, //reference error, because not exported Maggy as Baby } from \u0026quot;persons.hello\u0026quot; Hello Lisa! //reference error, because no valid import Hello Maggy! //reference error, because name was overwritten with 'Baby' Hello Homer! Hello Marge! Hello Baby! Full Implementation import { AstNode, AstNodeDescription, AstUtils, DefaultScopeComputation, DefaultScopeProvider, EMPTY_SCOPE, LangiumDocument, ReferenceInfo, Scope } from \u0026quot;langium\u0026quot;; import { CancellationToken } from \u0026quot;vscode-languageclient\u0026quot;; import { HelloWorldAstType, Model, Person } from \u0026quot;./generated/ast.js\u0026quot;; import { dirname, join } from \u0026quot;node:path\u0026quot;; export class HelloWorldScopeComputation extends DefaultScopeComputation { override async computeExports(document: LangiumDocument\u0026lt;AstNode\u0026gt;): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const model = document.parseResult.value as Model; return model.persons .filter(p =\u0026gt; p.published) .map(p =\u0026gt; this.descriptions.createDescription(p, p.name)) ; } } export class HelloWorldScopeProvider extends DefaultScopeProvider { override getScope(context: ReferenceInfo): Scope { switch(context.container.$type as keyof HelloWorldAstType) { case 'PersonImport': if(context.property === 'person') { return this.getExportedPersonsFromGlobalScope(context); } break; case 'Greeting': if(context.property === 'person') { return this.getImportedPersonsFromCurrentFile(context); } break; } return EMPTY_SCOPE; } protected getExportedPersonsFromGlobalScope(context: ReferenceInfo): Scope { //get document for current reference const document = AstUtils.getDocument(context.container); //get model of document const model = document.parseResult.value as Model; //get URI of current document const currentUri = document.uri; //get folder of current document const currentDir = dirname(currentUri.path); const uris = new Set\u0026lt;string\u0026gt;(); //for all file imports of the current file for (const fileImport of model.fileImports) { //resolve the file name relatively to the current file const filePath = join(currentDir, fileImport.file); //create back an URI const uri = currentUri.with({ path: filePath }); //add the URI to URI list uris.add(uri.toString()); } //get all possible persons from these files const astNodeDescriptions = this.indexManager.allElements(Person, uris).toArray(); //convert them to descriptions inside of a scope return this.createScope(astNodeDescriptions); } private getImportedPersonsFromCurrentFile(context: ReferenceInfo) { //get current document of reference const document = AstUtils.getDocument(context.container); //get current model const model = document.parseResult.value as Model; //go through all imports const descriptions = model.fileImports.flatMap(fi =\u0026gt; fi.personImports.map(pi =\u0026gt; { //if the import is name, return the import if (pi.name) { return this.descriptions.createDescription(pi, pi.name); } //if import references to a person, return that person if (pi.person.ref) { return this.descriptions.createDescription(pi.person.ref, pi.person.ref.name); } //otherwise return nothing return undefined; }).filter(d =\u0026gt; d != undefined)).map(d =\u0026gt; d!); return this.createScope(descriptions); } } "},{"id":24,"href":"/docs/recipes/formatting/","title":"Formatting","parent":"Recipes","content":"Langium\u0026rsquo;s formatting API allows to easily create formatters for your language. We start building a custom formatter for our language by creating a new class that inherits from AbstractFormatter.\nimport { AbstractFormatter, AstNode, Formatting } from 'langium'; export class CustomFormatter extends AbstractFormatter { protected format(node: AstNode): void { // This method is called for every AstNode in a document } } ... // Bind the class in your module export const CustomModule: Module\u0026lt;CustomServices, PartialLangiumServices\u0026gt; = { lsp: { Formatter: () =\u0026gt; new CustomFormatter() } }; The entry point for the formatter is the abstract format(AstNode) method. The AbstractFormatter calls this method for every node of our model. To perform custom formatting for every type of node, we will use pattern matching. In the following example, we will take a closer look at a formatter for the domain-model language. In particular, we will see how we can format the root of our model (DomainModel) and each nested element (Entity and PackageDeclaration).\nTo format each node, we use the getNodeFormatter method of the AbstractFormatter. The resulting generic NodeFormatter\u0026lt;T extends AstNode\u0026gt; provides us with methods to select specific parts of a parsed AstNode such as properties or keywords.\nOnce we have selected the nodes of our document that we are interested in formatting, we can start applying a specific formatting. Each formatting option allows to prepend/append whitespace to each note. The Formatting namespace provides a few predefined formatting options which we can use for this:\n newLine Adds one newline character (while preserving indentation). newLines Adds a specified amount of newline characters. indent Adds one level of indentation. Automatically also adds a newline character. noIndent Removes all indentation. oneSpace Adds one whitespace character. spaces Adds a specified amount of whitespace characters. noSpace Removes all spaces. fit Tries to fit the existing text into one of the specified formattings. We first start off by formatting the Domainmodel element of our DSL. It is the root node of every document and just contains a list of other elements. These elements need to be realigned to the root of the document in case they are indented. We will use the Formatting.noIndent options for that:\nif (ast.isDomainmodel(node)) { // Create a new node formatter const formatter = this.getNodeFormatter(node); // Select a formatting region which contains all children const nodes = formatter.nodes(...node.elements); // Prepend all these nodes with no indent nodes.prepend(Formatting.noIndent()); } Our other elements, namely Entity and PackageDeclaration, can be arbitrarily deeply nested, so using noIndent is out of the question for them. Instead we will use indent on everything between the { and } tokens. The formatter internally keeps track of the current indentation level:\nif (ast.isEntity(node) || isPackageDeclaration(node)) { const formatter = this.getNodeFormatter(node); const bracesOpen = formatter.keyword('{'); const bracesClose = formatter.keyword('}'); // Add a level of indentation to each element // between the opening and closing braces. // This even includes comment nodes formatter.interior(bracesOpen, bracesClose).prepend(Formatting.indent()); // Also move the newline to a closing brace bracesClose.prepend(Formatting.newLine()); // Surround the name property of the element // With one space to each side formatter.property(\u0026quot;name\u0026quot;).surround(Formatting.oneSpace()); } Note that most predefined Formatting methods accept additional arguments which make the resulting formatting more lenient. For example, the prepend(newLine({ allowMore: true })) formatting will not apply formatting in case the node is already preceeded by one or more newlines. It will still correctly indent the node in case the indentation is not as expected.\n Full Code Sample import { AbstractFormatter, AstNode, Formatting } from 'langium'; import * as ast from './generated/ast'; export class DomainModelFormatter extends AbstractFormatter { protected format(node: AstNode): void { if (ast.isEntity(node) || ast.isPackageDeclaration(node)) { const formatter = this.getNodeFormatter(node); const bracesOpen = formatter.keyword('{'); const bracesClose = formatter.keyword('}'); formatter.interior(bracesOpen, bracesClose).prepend(Formatting.indent()); bracesClose.prepend(Formatting.newLine()); formatter.property('name').surround(Formatting.oneSpace()); } else if (ast.isDomainmodel(node)) { const formatter = this.getNodeFormatter(node); const nodes = formatter.nodes(...node.elements); nodes.prepend(Formatting.noIndent()); } } } "},{"id":25,"href":"/docs/recipes/lexing/indentation-sensitive-languages/","title":"Indentation-sensitive languages","parent":"Lexing","content":"Some programming languages (such as Python, Haskell, and YAML) use indentation to denote nesting, as opposed to special non-whitespace tokens (such as { and } in C++/JavaScript). This can be difficult to express in the EBNF notation used for defining a language grammar in Langium, which is context-free. To achieve that, you can make use of synthetic tokens in the grammar which you would then redefine using Chevrotain in a custom token builder.\nStarting with Langium v3.2, such token builder (and an accompanying lexer) are provided for easy plugging into your language.\nConfiguring the token builder and lexer To be able to use the indendation tokens in your grammar, you first have to import and register the IndentationAwareTokenBuilder and IndentationAwareLexer services in your module as such:\nimport { IndentationAwareTokenBuilder, IndentationAwareLexer } from 'langium'; // ... export const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { // ... parser: { TokenBuilder: () =\u0026gt; new IndentationAwareTokenBuilder(), Lexer: (services) =\u0026gt; new IndentationAwareLexer(services), }, }; // ... The IndentationAwareTokenBuilder constructor optionally accepts an object defining the names of the tokens you used to denote indentation and whitespace in your .langium grammar file. It defaults to:\n{ indentTokenName: 'INDENT', dedentTokenName: 'DEDENT', whitespaceTokenName: 'WS', } Writing the grammar In your langium file, you have to define terminals with the same names you passed to IndentationAwareTokenBuilder (or the defaults shown above if you did not override them). For example, let\u0026rsquo;s define the grammar for a simple version of Python with support for only if and return statements, and only booleans as expressions:\ngrammar PythonIf entry Statement: If | Return; If: 'if' condition=BOOLEAN ':' INDENT thenBlock+=Statement+ DEDENT ('else' ':' INDENT elseBlock+=Statement+ DEDENT)?; Return: 'return' value=BOOLEAN; terminal BOOLEAN returns boolean: /true|false/; terminal INDENT: 'synthetic:indent'; terminal DEDENT: 'synthetic:dedent'; hidden terminal WS: /[\\t ]+/; hidden terminal NL: /[\\r\\n]+/; The important terminals here are INDENT, DEDENT, and WS. INDENT and DEDENT are used to delimit a nested block, similar to { and } (respectively) in C-like languages. Note that INDENT indicates an increase in indentation, not just the existence of leading whitespace, which is why in the example above we used it only at the beginning of the block, not before every Statement.\nThe content you choose for these 3 terminals doesn\u0026rsquo;t matter since it will overridden by IndentationAwareTokenBuilder anyway. However, you might still want to choose tokens that don\u0026rsquo;t overlap with other terminals for easier use in the playground.\nPlayground compatibility Since the Langium playground doesn\u0026rsquo;t support overriding the default services, you cannot use indentation-aware grammar there. However, you can get around this by defining the indentation terminals in a way that doesn\u0026rsquo;t overlap with other terminals, and then actually using them to simulate indentation.\nFor example, for the grammar above, you can write:\nif false: synthetic:indent return true synthetic:dedent else: synthetic:indent if false: synthetic:indent return false synthetic:dedent synthetic:dedent instead of:\nif false: return true else: if false: return false since all whitespace will be ignored anyway.\nWhile this approach doesn\u0026rsquo;t easily scale, it can be useful for testing when defining your grammar.\nDrawbacks Using this token builder, all leading whitespace becomes significant, no matter the context. This means that it will no longer be possible for an expression to span multiple lines if one of these lines starts with whitespace and an INDENT token is not explicitly allowed in that position.\nFor example, the following Python code wouldn\u0026rsquo;t parse:\nx = [ 1, # ERROR: Unexpected INDENT token ] without explicitly specifying that INDENT is allowed after [.\nThis can be worked around by using multi-mode lexing.\n"},{"id":26,"href":"/docs/recipes/keywords-as-identifiers/","title":"Keywords as Identifiers","parent":"Recipes","content":"As you write your grammar, you will add keywords such as var, get or function to improve the readability of your language and to add structure. These keywords get a special keyword highlighting whenever they are used, by default even at unintended locations according to your grammar, and are handled separately from other terminals such as names, identifiers, numbers and so on. You will quickly notice that a function such as function get() will lead to parser errors by default, as get is identified as keyword and not as identifier. This guide is all about how to explicitly enable these keywords (highlighted in blue) to be supported as identifiers (highlighted in white) as well.\nLet\u0026rsquo;s look at the \u0026ldquo;hello-world\u0026rdquo; example in the playground or as a new local project created with yo langium (for details, how to set up your first Langium project, read getting started):\nHere, it is not possible to introduce a person whose name is \u0026ldquo;Hello\u0026rdquo;, since Hello is a dedicated keyword of the language. Additionally, we cannot greet a person called \u0026ldquo;Hello\u0026rdquo; as well. The same applies to the keyword \u0026ldquo;person\u0026rdquo;, but let\u0026rsquo;s focus on enabling \u0026ldquo;Hello\u0026rdquo; as name for persons.\nTo enable keywords as identifiers, you need to apply the following three steps:\n Step 1: Modify the grammar to explicitly parse keywords as property values Step 2: Change the semantic type of the resulting token Step 3: Ensure that your editor styles the chosen semantic token type Step 1: Modify the grammar to explicitly parse keywords as property values The first step is to modify the grammar to explicitly parse keywords as property values. At the moment, the parser rule for introducing persons looks like this:\nPerson: 'person' name=ID; terminal ID: /[_a-zA-Z][\\w_]*/; Note that the terminal rule for ID already covers the string \u0026ldquo;Hello\u0026rdquo;. However, since the parser rule for greeting persons uses \u0026ldquo;Hello\u0026rdquo; as keyword, the keyword takes precedence:\nGreeting: 'Hello' person=[Person:ID] '!'; Roughly summarized, the background for this behaviour is that Langium\u0026rsquo;s internally used LL(k) parser implementation named Chevrotain first does lexing/tokenizing, i.e. splitting text into single tokens, such as keywords, identifiers and delimiters. The actual parsing, i.e. the application of the parser rules, is performed afterwards on these tokens. Chevrotain uses regular expressions (regex) for splitting text into tokens. Since keywords are implemented as regex as well and take precedence, all occurrences of \u0026ldquo;Hello\u0026rdquo; are treated as keywords for the parser rule named Greeting, even a \u0026ldquo;Hello\u0026rdquo; intented to be a name, which finally causes the two syntax errors.\nIn order to explicitly enable parsing \u0026ldquo;Hello\u0026rdquo; as name as well, modify the parser rule for persons in this way:\nPerson: 'person' name=(ID | 'Hello'); terminal ID: /[_a-zA-Z][\\w_]*/; // the terminal rule for ID is unchanged! Now Langium knows that keyword \u0026ldquo;Hello\u0026rdquo; may also occur as value for the name property of the parser rule for persons. That\u0026rsquo;s it! (Don\u0026rsquo;t forget to run npm run langium:generate after updating the grammar.)\nSince the name property is used for cross-references by the parser rule for greetings, \u0026ldquo;Hello\u0026rdquo; needs to be supported here as well. For that we recommend to introduce a data type rule like \u0026ldquo;PersonID\u0026rdquo; in the example, since it makes it easier to support more keywords in the future:\nPerson: 'person' name=PersonID; Greeting: 'Hello' person=[Person:PersonID] '!'; PersonID returns string: ID | 'Hello'; Now, your editor accepts \u0026ldquo;Hello\u0026rdquo; as value for persons' names. Nevertheless, the name \u0026ldquo;Hello\u0026rdquo; still is highlighted in blue and looks like a keyword \u0026ldquo;Hello\u0026rdquo;. This leads us to the second step.\nStep 2: Change the semantic type of the resulting token The second step is to change the semantic type of the resulting token in order to adjust the highlighting in the editor: While parsing text with Langium is done in a language server, the highlighting is done in editors (the language clients). Editors like VS Code usually use syntax highlighting basing on the tokenized text. This highlighting can be complemented by semantic highlighting with additional semantic types for the tokens from the language server.\nIn case of Langium and VS Code, VS Code uses by default TextMate grammars, which can be seen as collections of regex (and which is generated by npm run langium:generate), to split the text into tokens and to assign a (syntactic) type to these tokens. The color for highlighting the token is chosen depending on the assigned type. In the example, a regex for the \u0026ldquo;Hello\u0026rdquo; keyword matches all strings \u0026ldquo;Hello\u0026rdquo; in text, resulting in the blue color even for \u0026ldquo;Hello\u0026rdquo; used as name.\nSince Langium applies the parser rules to the token stream, Langium is able to distinguish \u0026ldquo;Hello\u0026rdquo; tokens used as keyword for greetings and \u0026ldquo;Hello\u0026rdquo; tokens used as name for persons and therefore is able to assign different semantic types to \u0026ldquo;Hello\u0026rdquo; tokens for persons and for greetings. According to the Language Server Protocol (LSP), these semantic token types are sent to editors like VS Code, which complement the syntactic types of tokens with these semantic types. The default highlighting of tokens according to the syntactic type is then altered according to the semantic token type and the color theme selected by the user (editor preferences).\nIn Langium, the SemanticTokenProvider service is responsible for assigning language-dependent semantic types to tokens. Therefore, we customize the default semantic token provider like this:\nimport { AbstractSemanticTokenProvider, AstNode, SemanticTokenAcceptor } from 'langium'; import { isPerson } from './generated/ast.js'; import { SemanticTokenTypes } from 'vscode-languageserver'; export class HelloWorldSemanticTokenProvider extends AbstractSemanticTokenProvider { protected override highlightElement(node: AstNode, acceptor: SemanticTokenAcceptor): void { if (isPerson(node)) { acceptor({ node, property: 'name', type: SemanticTokenTypes.class }); } } } For all persons (isPerson(...) in line 7), we explicitly specify the semantic type for the token of their 'name' property. Here, we use SemanticTokenTypes.class as semantic type. For your case, select a predefined type which fits your domain best. Since the name is used as cross-reference by greetings, a similar check and assignment of a semantic token type needs to be done for the person property of Greeting as well.\nAfter creating the semantic token provider, you need to register the HelloWorldSemanticTokenProvider in hello-world-module.ts in the following way:\nexport const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { // ... lsp: { SemanticTokenProvider: (services) =\u0026gt; new HelloWorldSemanticTokenProvider(services) } }; Now rebuild and restart your application and test the improvements of the second step:\nThe HelloWorldSemanticTokenProvider works, and you might see a different highlighting XOR you might not see any difference, e.g. \u0026ldquo;Hello\u0026rdquo; is still blue here. This leads us to the third step.\nStep 3: Ensure that your editor styles the chosen semantic token type The third step is to ensure that your editor supports the assigned semantic tokens: Depending on your editor and the currently selected color theme, the semantic token type selected in HelloWorldSemanticTokenProvider might not be supported or didn\u0026rsquo;t get a different color in the color theme. The easiest way to detect such problems is to change the current color theme and to try some others. Note that VS Code allows to switch off semantic highlighting for all themes with the setting editor.semanticHighlighting.enabled.\nAfter switching from \u0026ldquo;Dark (Visual Studio)\u0026rdquo; to \u0026ldquo;Dark Modern\u0026rdquo; in VS Code, the example looks as expected. You can switch the current color theme in VS Code with cmd + K cmd + T (or via the menu: Code -\u0026gt; Settings\u0026hellip; -\u0026gt; Theme -\u0026gt; Color Theme).\nNow \u0026ldquo;Hello\u0026rdquo; is highlighted in purple if used as keyword, it\u0026rsquo;s written in green if it is used as value for the name of a person. Another solution is to select a different semantic type for your token in step two.\nWhile step one is mandatory to enable keywords as values in general, step two improves the user experience of your language. While step one and step two can be handled in the LSP server once for your language, step three highly depends on your editor and its color themes (in the LSP clients), which makes step three quite complicated to handle.\nNow you have learned how to enable keywords as regular values for properties. Feel free to enable the keyword \u0026ldquo;person\u0026rdquo; as name for persons in the example on your own.\nWord to the wise: Enabling certain strings to be used interchangeably as keywords and identifiers/values is possible, but has some costs. It always needs to be evaluated per case, whether accepting the costs is required and worth it. Additionally, using keywords as identifiers impacts the user experience, therefore, involve the users of your language!\nSome hints beyond this guide:\n In multi-grammar projects, only keywords of the included grammars are affected by this general problem, but not keywords of other languages or Langium grammar files. In order to get an overview about the keywords of your language, have a look into the generated TextMate grammar *.tmLanguage.json and search for the pattern named keyword.control.*, which contains a regex with the keywords. Read about the concept of semantic tokens in the Language Server Protocol (LSP) including predefined semantic types for tokens. Read about how VS Code realizes semantic highlighting using semantic tokens. Dive into tokenizing of Chevrotain with regex. "},{"id":27,"href":"/showcase/","title":"Langium Showcase","parent":"Langium","content":"Showcase Welcome to Langium's showcase! Here you can find examples of languages created using Langium, all running in the browser (no backend involved). "},{"id":28,"href":"/showcase/minilogo/","title":"MiniLogo","parent":"Langium Showcase","content":""},{"id":29,"href":"/docs/reference/","title":"Reference","parent":"Documentation","content":"This section contains the reference documentation for Langium.\nWhere to go from here? Glossary If you are looking for a specific term or concept, you can find it in the glossary.\nGrammar language If you are looking for a specific grammar language feature, you can find it in the grammar language.\nArchitecture If you are looking for a specific architecture feature, here are some nice readings:\n Configuration via services Document lifecycle Semantic model "},{"id":30,"href":"/docs/introduction/showcases/","title":"Showcases","parent":"What is Langium?","content":""},{"id":31,"href":"/docs/learn/workflow/write_grammar/","title":"3. Write the grammar","parent":"Langium's workflow","content":"Your Langium project is now setup and ready to be used. The next step is to define the grammar of your language. The grammar is the most important part of your language definition. It defines the syntax of your language and how the language elements are structured.\nThe grammar is defined in a .langium file. Make sure that you have installed the VS Code extension for Langium. This extension provides syntax highlighting and code completion for .langium files. Here\u0026rsquo;s the grammar from the Hello-World example that was generated by the Yeoman generator:\ngrammar HelloWorld hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w]*/; entry Model: (persons+=Person | greetings+=Greeting)*; Person: 'person' name=ID; Greeting: 'Hello' person=[Person] '!'; Let\u0026rsquo;s go through this one by one:\ngrammar HelloWorld Before we tell Langium anything about our grammar contents, we first need to give it a name - in this case it\u0026rsquo;s HelloWorld. The langium-cli will pick this up to prefix any generated services with this name.\nhidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w]*/; Here we define our two needed terminals for this grammar: The whitespace WS and identifier ID terminals. Terminals parse a part of our document by matching it against their regular expression. The WS terminal parses any whitespace characters with the regex /\\s+/. This allows us to consume whitespaces in our document. As the terminal is declared as hidden, the parser will parse any whitespace and discard the results. That way, we don\u0026rsquo;t have to care about how many whitespaces a user uses in their document. Secondly, we define our ID terminal. It parses any string that starts with an underscore or letter and continues with any amount of characters that match the \\w regex token. It will match Alice, _alice, or _al1c3 but not 4lice or #alice. Langium is using the JS regex dialect for terminal definitions.\nentry Model: (persons+=Person | greetings+=Greeting)*; The Model parser rule is the entry point to our grammar. Parsing always starts with the entry rule. Here we define a repeating group of alternatives: persons+=Person | greetings+=Greeting. This will always try to parse either a Person or a Greeting and add it to the respective list of persons or greetings in the Model object. Since the alternative is wrapped in a repeating group *, the parser will continue until all input has been consumed.\nPerson: 'person' name=ID; The Person rule starts off with the 'person' keyword. Keywords are like terminals, in the sense that they parse a part of the document. The set of keywords and terminals create the tokens that your language is able to parse. You can imagine that the 'person' keyword here is like an indicator to tell the parser that an object of type Person should be parsed. After the keyword, we assign the Person a name by parsing an ID.\nGreeting: 'Hello' person=[Person] '!'; Like the previous rule, the Greeting starts with a keyword. With the person assignment we introduce the cross reference, indicated by the brackets []. A cross reference will allow your grammar to reference other elements that are contained in your file or workspace. By default, Langium will try to resolve this cross reference by parsing the terminal that is associated with its name property. In this case, we are looking for a Person whose name property matches the parsed ID.\nThat finishes the short introduction to Langium! Feel free to play around with the grammar and use npm run langium:generate to regenerate the generated TypeScript files. To go further, we suggest that you continue with our tutorials.\n"},{"id":32,"href":"/docs/recipes/multiple-languages/","title":"Multiple dependent languages","parent":"Recipes","content":"This guide is about integrating multiple dependent languages in one Langium project.\nOne common situation where it makes sense to create dependent languages is when you only want to read concepts in one language and predefine them in another file (probably also a built-in one). Think of splitting SQL into a defining CREATE TABLE table (...)) and a reading part (SELECT * FROM table).\n Notice that for n independent languages, you can simply create n independent Langium projects.\n If you want to see a living example, I recommend to visit the requirements example of the main Langium repository.\nOur plan The entire change touches several files. Let\u0026rsquo;s summarize what needs to be done:\n the grammar (the *.langium file) needs to be split into the three parts that were discussed above the Langium configuration (the langium-config.json file in the Langium project root) needs to split the language configuration into three parts the module file of your language (XXX-module.ts) needs to create the new language services as well. Last, but not least, you have to cleanup all dependent files. Here we can give general hints. if you have a VSCode extension the package.json needs to be adapted the extension entry point file (src/extension/main.ts) needs to be changed slightly Our scenario To keep this guide easy, I will use the hello-world project of the learning section.\nLet’s imagine that we have three languages:\n the first language defines persons the second language greets persons of the first language the third language configures which person you are Just as a finger practice, let\u0026rsquo;s require that you cannot greet yourself.\n flowchart Implementation --|requires| Definition Configuration --|requires| Definition Implementation --|requires| Configuration Let\u0026rsquo;s start Grammar The most relevant change might be in the grammar. Here is the original grammar from the hello-world example, which is generated by Langium\u0026rsquo;s Yeoman generator:\ngrammar MultipleLanguages entry Model: (persons+=Person | greetings+=Greeting)*; Person: 'person' name=ID; Greeting: 'Hello' person=[Person:ID] '!'; hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w_]*/; terminal INT returns number: /[0-9]+/; terminal STRING: /\u0026quot;(\\\\.|[^\u0026quot;\\\\])*\u0026quot;|'(\\\\.|[^'\\\\])*'/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; Now, split it into three new files (let\u0026rsquo;s call the entry rules units and the files we can name like multiple-languages-(configuration|definition|implementation).langium):\nOur definition grammar:\ngrammar MultiDefinition entry DefinitionUnit: (persons+=Person)*; Person: 'person' name=ID; hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w_]*/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; Our configuration grammar (note the import):\ngrammar MultiConfiguration import \u0026quot;multiple-languages-definition\u0026quot;; entry ConfigurationUnit: 'I' 'am' who=[Person:ID] '.'; Our implementation grammar (note the import again):\ngrammar MultiImplementation import \u0026quot;multiple-languages-definition\u0026quot;; entry ImplementationUnit: (greetings+=Greeting)*; Greeting: 'Hello' person=[Person:ID] '!'; Langium configuration Splitting the grammar alone is not sufficient to generate anything using the CLI. You need to change the langium-config.json in the root folder as well. Let\u0026rsquo;s make it happen!\nThe initial version of this file was:\n{ \u0026quot;projectName\u0026quot;: \u0026quot;MultipleLanguages\u0026quot;, \u0026quot;languages\u0026quot;: [{ \u0026quot;id\u0026quot;: \u0026quot;multiple-languages\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages.monarch.ts\u0026quot; } }], \u0026quot;out\u0026quot;: \u0026quot;src/language/generated\u0026quot; } The actual change is simple: Triple the object in the languages list and fill in reasonable values. Like here:\n{ \u0026quot;projectName\u0026quot;: \u0026quot;MultipleLanguages\u0026quot;, \u0026quot;languages\u0026quot;: [{ \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-configuration\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages-configuration.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.me\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-configuration.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-configuration.monarch.ts\u0026quot; } }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-definition\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages-definition.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.who\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-definition.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-definition.monarch.ts\u0026quot; } }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-implementation\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages-implementation.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-implementation.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-implementation.monarch.ts\u0026quot; } }], \u0026quot;out\u0026quot;: \u0026quot;src/language/generated\u0026quot; } From now on you are able to run the Langium CLI using the NPM scripts (npm run langium:generate). It will generate one file for the abstract syntax tree (AST) containing all languages concepts (it is also a good idea to keep the names of these concepts disjoint).\nFor the next step you need to run the Langium generator once:\nnpm run langium:generate Language module file The module file describes how your language services are built. After adding two more languages, some important classes get generated - which need to be registered properly.\n Open the module file (/src/language/multiple-languages-module.ts).\n You will notice a wrong import (which is ok, we renamed it in the previous steps and derived new classes by code generation).\n Import the new generated modules instead. Replace this line:\nimport { MultipleLanguagesGeneratedModule, MultipleLanguagesGeneratedSharedModule } from './generated/module.js'; with the following:\nimport { MultiConfigurationGeneratedModule, MultiDefinitionGeneratedModule, MultiImplementationGeneratedModule, MultipleLanguagesGeneratedSharedModule } from './generated/module.js'; In the function createMultipleLanguagesServices you will notice an error line now, because we deleted the old class name in the previous step. The code there needs to basically be tripled. But before we do this, we need to define the new output type of createMultipleLanguagesServices. In the end this should lead to this definition:\nexport function createMultipleLanguagesServices(context: DefaultSharedModuleContext): { shared: LangiumSharedServices, Configuration: MultipleLanguagesServices, Definition: MultipleLanguagesServices, Implementation: MultipleLanguagesServices } { const shared = inject( createDefaultSharedModule(context), MultipleLanguagesGeneratedSharedModule ); const Configuration = inject( createDefaultModule({ shared }), MultiConfigurationGeneratedModule, MultipleLanguagesModule ); const Definition = inject( createDefaultModule({ shared }), MultiDefinitionGeneratedModule, MultipleLanguagesModule ); const Implementation = inject( createDefaultModule({ shared }), MultiImplementationGeneratedModule, MultipleLanguagesModule ); shared.ServiceRegistry.register(Configuration); shared.ServiceRegistry.register(Definition); shared.ServiceRegistry.register(Implementation); registerValidationChecks(Configuration); registerValidationChecks(Definition); registerValidationChecks(Implementation); return { shared, Configuration, Definition, Implementation }; } After this step, Langium is set up correctly. But if you try to build now, the compiler will throw you some errors, because the old concepts of the AST are not existing anymore.\n Be aware of the fact that we are using MultipleLanguagesModule in all three services, three independent services! If you want to avoid this (because of duplicated state etc.), you should put some work into creating instances for each service.\n Cleanup Let\u0026rsquo;s clean up the error lines. Here are some general hints:\n keep in mind, that you are dealing with three file types now, namely *.me, *.who and *.hello you can distinguish them very easily by selecting the right sub service from the result object of createMultipleLanguagesServices, which is either Configuration, Definition, or Implementation, but not shared all these services have a sub service with file extensions: [Configuration,Definition,...].LanguageMetaData.fileExtensions: string[] so, when you are obtaining any documents from the DocumentBuilder you can be sure that they are parsed by the matching language service to distinguish them on your own, use the AST functions for determining the root type, for example for the Configuration language use isConfigurationUnit(document.parseResult.value) VSCode extension If you have a VSCode extension, you need to touch two files: package.json and src/extension/main.ts.\nFile package.json In this file we define what services this extension will contribute to VSCode.\nBefore the change only one language and grammar was defined:\n//... \u0026quot;contributes\u0026quot;: { \u0026quot;languages\u0026quot;: [ { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages\u0026quot;, \u0026quot;multiple-languages\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; } ], \u0026quot;grammars\u0026quot;: [ { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages.tmLanguage.json\u0026quot; } ] }, //... After the change, we tripled the information. Be aware of that the language ids must match the ids from the Langium configuration. Also make sure that the paths to the syntax files and the language configuration are correct.\n For the language configuration for VSCode, we reused the old file three times. If you want to make a more precise configuration per language, you should also split this file. But let\u0026rsquo;s use the same for a moment and for simplicity.\n //... \u0026quot;contributes\u0026quot;: { \u0026quot;languages\u0026quot;: [ { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-configuration\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages Configuration\u0026quot;, \u0026quot;multiple-languages-configuration\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.me\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-definition\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages Definition\u0026quot;, \u0026quot;multiple-languages-definition\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.who\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-implementation\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages Implementation\u0026quot;, \u0026quot;multiple-languages-implementation\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; } ], \u0026quot;grammars\u0026quot;: [ { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages-configuration\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages-configuration\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages-configuration.tmLanguage.json\u0026quot; }, { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages-definition\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages-definition\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages-definition.tmLanguage.json\u0026quot; }, { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages-implementation\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages-implementation\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages-implementation.tmLanguage.json\u0026quot; } ] }, File src/extension/main.ts And here is the extension file before the change:\n// Options to control the language client const clientOptions: LanguageClientOptions = { documentSelector: [{ scheme: 'file', language: 'multiple-languages' }] }; After the change, it should look like this (the language IDs should be the same as they are in the Langium configuration):\n// Options to control the language client const clientOptions: LanguageClientOptions = { documentSelector: [ { scheme: 'file', language: 'multiple-languages-configuration' }, { scheme: 'file', language: 'multiple-languages-definition' }, { scheme: 'file', language: 'multiple-languages-implementation' } ] }; Test the extension Now everything should be executable. Do not forget to build!\nLet\u0026rsquo;s run the extension and create some files in our workspace:\nDefinition people.who person Markus person Michael person Frank Configuration thats.me I am Markus. Implementation greetings.hello Hello Markus! Hello Michael! Checklist You should be able now\u0026hellip;:\n to see proper syntax highlighting to trigger auto completion for keywords to jump to the definition by Cmd/Ctrl-clicking on a person\u0026rsquo;s name Add a validator (task) As promised, let\u0026rsquo;s add a simple validation rule, that you cannot greet yourself. Therefore we enter our name in the thats.me file like we did in the previous step.\nTry to include the following code to our validator. This is meant as task, try to find the missing pieces on your own :-).\ncheckNotGreetingYourself(greeting: Greeting, accept: ValidationAcceptor): void { const document = getDocument(greeting); const configFilePath = join(document.uri.fsPath, '..', 'thats.me'); const configDocument = this.documents.getOrCreateDocument(URI.file(configFilePath)); if (greeting.person.ref) { if (configDocument \u0026amp;\u0026amp; isConfigurationUnit(configDocument.parseResult.value)) { if(configDocument.parseResult.value.who.ref === greeting.person.ref) { accept('warning', 'You cannot greet yourself 🙄!', { node: greeting, property: 'person' }); } } } } After doing so, your name should display a warning, stating that you cannot greet yourself.\nTroubleshooting In this section we will list common mistakes.\n One prominent mistake is forgetting to build Langium and Typescript files, before running the extension.\n Since we are basically just copy-pasting given configuration, be aware of what you are pasting. Make sure that the code still makes sense after copying. You probably forgot to adapt the pasted code.\n If you encounter any problems, we are happy to help in our discussions page or our issue tracker.\n"},{"id":33,"href":"/playground/","title":"Playground","parent":"Langium","content":"import { addMonacoStyles, setupPlayground, share, overlay, getPlaygroundState, MonacoEditorLanguageClientWrapper } from \"./libs/worker/common.js\"; import { buildWorkerDefinition } from \"../libs/monaco-editor-workers/index.js\"; addMonacoStyles('monaco-styles-helper'); buildWorkerDefinition( \"../libs/monaco-editor-workers/workers\", new URL(\"\", window.location.href).href, false ); // on doc load addEventListener('load', function() { // get a handle to our various interactive buttons const copiedHint = document.getElementById('copiedHint'); const shareButton = document.getElementById('shareButton'); const grammarRoot = document.getElementById('grammar-root'); const contentRoot = document.getElementById('content-root'); // register a listener for the share button shareButton.onclick = () = { // retrieve the current playground state (grammar + content/program) const playgroundState = getPlaygroundState(); share(playgroundState.grammar, playgroundState.content); // update the display to indicate that the text has been shared shareButton.src = '/assets/checkmark.svg'; copiedHint.style.display = 'block'; // reset again after a second... setTimeout(() = { shareButton.src = '/assets/share.svg'; copiedHint.style.display = 'none'; }, 1000); }; const treeButton = document.getElementById('treeButton'); const grid = document.getElementById('grid'); const key = 'display-ast'; if(localStorage.getItem(key) === 'yes') { grid.classList.toggle('without-tree'); } treeButton.onclick = () = { const shown = !grid.classList.toggle('without-tree'); localStorage.setItem(key, shown ? 'yes' : 'no'); const resizeEvent = new Event('resize'); window.dispatchEvent(resizeEvent); }; const url = new URL(window.location.toString()); const grammar = url.searchParams.get('grammar'); const content = url.searchParams.get('content'); setupPlayground( grammarRoot, contentRoot, grammar, content, overlay ); }); "},{"id":34,"href":"/docs/recipes/","title":"Recipes","parent":"Documentation","content":"Where to go from here? Take your time to study the recipes within the navigation on the left. They are designed to help you with common tasks and challenges you might face when working with Langium. If you have any questions or suggestions, feel free to create an issue or start a discussion on the Github repository.\n"},{"id":35,"href":"/docs/reference/semantic-model/","title":"Semantic Model Inference","parent":"Reference","content":"When AST nodes are created during the parsing of a document, they are given a type. The language grammar dictates the shape of those types and how they might be related to each other. All types form the semantic model of your language. There are two ways by which Langium derives semantic model types from the grammar, by inference and by declaration.\nInference is the default behavior in Langium. During the generation of the semantic model types, Langium infers the possible types directly from the grammar rules. While this is a powerful approach for simple languages and prototypes, it is not recommended for more mature languages since minimal changes in the grammar can easily lead to breaking changes.\nTo minimize the chance of breaking changes, Langium introduces declared types where the semantic model types are explicitly defined by the user in the grammar via a TypeScript-like syntax.\nIn the following, we detail how grammar rules shape the semantic model via inference and declaration.\nInferred Types Inferred types result from letting Langium infer the types of the nodes from the grammar rules. Let\u0026rsquo;s have a look at how various rules shape these type definitions:\nParser Rules The simplest way to write a parser rule is as follows:\nX: name=ID; With this syntax, Langium will infer the type of the node to be generated when parsing the rule. By convention, the type of the node will be named after the name of the rule, resulting in this TypeScript interface in the semantic model:\ninterface X extends AstNode { name: string } It is also possible to control the naming of the interface by using the following syntax:\nX infers MyType: name=ID; resulting in the following interface in the semantic model:\ninterface MyType extends AstNode { name: string } Please note that an interface X is no longer present in the semantic model.\nIt is important to understand that the name of the parser rule and the name of the type it infers work on two separate abstraction levels. The name of the parser rule is used at the parsing level where types are ignored and only the parsing rule is considered, while the name of the type is used at the types level where both the type and the parser rule play a role. This means that the name of the type can be changed without affecting the parsing rules hierarchy, and that the name of the rule can be changed - if it explicitly infers or returns a given type - without affecting the semantic model.\nBy inferring types within the grammar, it is also possible to define several parser rules creating the same semantic model type. For example, the following grammar has two rules X and Y inferring a single semantic model type MyType:\nX infers MyType: name=ID; Y infers MyType: name=ID count=INT; This result in the creation of a single interface in the semantic model \u0026lsquo;merging\u0026rsquo; the two parser rules with non-common properties made optional:\ninterface MyType extends AstNode { count?: number name: string } Terminal Rules Terminal rules are linked to built-in types in the semantic model. They do not result in semantic model types on their own but determine the type of properties in semantic model types inferred from a parser rule:\nterminal INT returns number: /[0-9]+/; terminal ID returns string: /[a-zA-Z_][a-zA-Z0-9_]*/; X: name=ID count=INT; // generated interface interface X extends AstNode { name: string count: number } The property name is of type string because the terminal rule ID is linked to the built-in type string, and the property count is of type number because the terminal rule INT is linked to the built-in type number.\nData type rules Data type rules are similar to terminal rules in the sense that they determine the type of properties in semantic model types inferred from parser rules. However, they lead to the creation of type aliases for built-in types in the semantic model:\nQualifiedName returns string: ID '.' ID; X: name=QualifiedName; // generated types type QualifiedName = string; interface X extends AstNode { name: string } Assignments There are three available kinds of assignments in a parser rule:\n = for assigning a single value to a property, resulting in the property\u0026rsquo;s type to be derived from the right hand side of the assignment. += for assigning multiple values to a property, resulting in the property\u0026rsquo;s type to be an array of the right hand side of the assignment. ?= for assigning a boolean to a property, resulting in the property\u0026rsquo;s type to be a boolean. X: name=ID numbers+=INT (numbers+=INT)* isValid?='valid'?; // generated interface interface X extends AstNode { name: string numbers: Array\u0026lt;number\u0026gt; isValid: boolean } The right-hand side of an assignment can be any of the following:\n A terminal rule or a data type rule, which results in the type of the property to be a built-in type. A parser rule, which results in the type of the property to be the type of the parser rule. A cross-reference, which results in the type of the property to be a Reference to the type of the cross-reference. An alternative, which results in the type of the property to be a type union of all the types in the alternative. X: 'x' name=ID; Y: crossValue=[X:ID] alt=(INT | X | [X:ID]); // generated types interface X extends AstNode { name: string } interface Y extends AstNode { crossValue: Reference\u0026lt;X\u0026gt; alt: number | X | Reference\u0026lt;X\u0026gt; } Unassigned Rule Calls A parser rule does not necessarily need to have assignments. It may also contain only unassigned rule calls. These kind of rules can be used to change the types' hierarchy.\nX: A | B; A: 'A' name=ID; B: 'B' name=ID count=INT; // generated types type X = A | B; interface A extends AstNode { name: string } interface B extends AstNode { name: string count: number } Simple Actions Actions can be used to change the type of a node inside of a parser rule to another semantic model type. For example, they allow you to simplify parser rules which would have to be split into multiple rules.\nX: {infer A} 'A' name=ID | {infer B} 'B' name=ID count=INT; // is equivalent to: X: A | B; A: 'A' name=ID; B: 'B' name=ID count=INT; // generated types type X = A | B; interface A extends AstNode { name: string } interface B extends AstNode { name: string count: number } Assigned actions Actions can also be used to control the structure of the semantic model types. This is a more advanced topic, so we recommend getting familiar with the rest of the documentation before diving into this section.\nLet\u0026rsquo;s consider two different grammars derived from the Arithmetics example. These grammars are designed to parse a document containing a single definition comprised of a name and an expression assignment, with an expression being any amount of additions or a numerical value.\nThe first one does not use assigned actions:\nDefinition: 'def' name=ID ':' expr=Expression; Expression: Addition; Addition infers Expression: left=Value ('+' right=Expression)?; Primary infers Expression: '(' Expression ')' | {Literal} value=NUMBER; When parsing a document containing def x: (1 + 2) + 3, this is the shape of the semantic model node:\n graph TD; expr((expr)) -- left((left)) expr -- right((right)) left -- left_left((left)) left -- left_right((right)) right -- right_left((left)) left_left -- left_left_v{1} left_right -- left_right_{2} right_left -- right_left_v{3} We can see that the nested right -\u0026gt; left nodes in the tree are unnecessary and we would like to remove one level of nesting from the tree.\nThis can be done by refactoring the grammar and adding an assigned action:\nDefinition: 'def' name=ID ':' expr=Addition ';'; Expression: Addition; Addition infers Expression: Primary ({infer Addition.left=current} '+' right=Primary)*; Primary infers Expression: '(' Expression ')' | {Literal} value=NUMBER; Parsing the same document now leads to this semantic model:\ngraph TD; expr((expr)) -- left((left)) expr -- right((right)) left -- left_left((left)) left -- left_right((right)) right -- right_v{3} left_left -- left_left_v{1} left_right -- left_right_{2} While this is a fairly trivial example, adding more layers of expression types in your grammar massively degrades the quality of your syntax tree as each layer will add another empty right property to the tree. Assigned actions alleviate this issue completely.\nDeclared Types Because type inference takes into account every entity of a parser rule, even the smallest changes can update your inferred types. This can lead to unwanted changes in your semantic model and incorrect behavior of services that depend on it. Declared types are a means to minimize the risk of introducing breaking changes when modifying the grammar.\nIn most cases, especially for early language designs, letting the type inference take care of generating your types will be your best choice. As your language starts to mature, it may then be of interest to fix parts of your semantic model using declared types.\nWith that aside, declared types can be especially helpful for more mature and complex languages, where a stable semantic model is key and breaking changes introduced by inferred types can break your language services. Declared types allow the user to fix the type of their parser rules and rely on the power of validation errors to detect breaking changes.\nLet\u0026rsquo;s look at the example from the previous section:\nX infers MyType: name=ID; Y infers MyType: name=ID count=INT; // should be replaced by: interface MyType { name: string count?: number } X returns MyType: name=ID; Y returns MyType: name=ID count=INT; We now explicitly declare MyType directly in the grammar with the keyword interface. The parser rules X and Y creating nodes of type MyType need to explicitly declare the type of the node they create with the keyword returns.\nContrary to inferred types, all properties must be explicitly declared in order to be valid inside of a parser rule. The following syntax:\nZ returns MyType: name=ID age=INT; will show the following validation error A property 'age' is not expected because the declaration of MyType does not include the property age. In short, declared types add a layer of safety via validation to the grammar that prevents mismatches between the expected semantic model types and the shape of the parsed nodes.\nA declared type can also extend types, such as other declared types or types inferred from parser rules:\ninterface MyType { name: string } interface MyOtherType extends MyType { count: number } Y returns MyOtherType: name=ID count=INT; Explicitly declaring union types in the grammar is achieved with the keyword type:\ntype X = A | B; // generates: type X = A | B; Using returns always expects a reference to an already existing type. To create a new type for your rule, use the infers keyword or explicitly declare an interface.\nCross-references, Arrays, and Alternatives Declared types come with special syntax to declare cross-references, arrays, and alternatives:\ninterface A { reference: @B array: B[] alternative: B | C } interface B { name: string } interface C { name: string count: number } X returns A: reference=[B:ID] array+=Y (array+=Y)* alternative=(Y | Z); Y returns B: 'Y' name=ID; Z returns C: 'Z' name=ID count=INT; Actions Actions referring to a declared type have the following syntax:\ninterface A { name: string } interface B { name: string count: number } X: {A} 'A' name=ID | {B} 'B' name=ID count=INT; Note the absence of the keyword infer compared to actions which infer a type.\nReference Unions Trying to reference different types of elements can be an error prone process. Take a look at the following rule which tries to reference either a Function or a Variable:\nMemberCall: (element=[Function:ID] | element=[Variable:ID]); As both alternatives are only an ID from a parser perspective, this grammar is not decidable and the langium CLI script will throw an error during generation. Luckily, we can improve on this by adding a layer of indirection using an additional parser rule:\nNamedElement: Function | Variable; MemberCall: element=[NamedElement:ID]; This allows us to reference either Function or Variable using the common rule NamedElement. However, we have now introduced a rule which is never actually parsed, but only exists for the purpose of the type system to pick up on the correct target types of the reference. Using declared types, we are able to refactor this unused rule, making our grammar more resilient in the process:\n// Note the `type` prefix here type NamedElement = Function | Variable; MemberCall: element=[NamedElement:ID]; We can also use interfaces in place of union types with similar results:\ninterface NamedElement { name: string } // Infers an interface `Function` that extends `NamedElement` Function returns NamedElement: {infer Function} \u0026quot;function\u0026quot; name=ID ...; // This also picks up on the `Function` elements MemberCall: element=[NamedElement:ID]; "},{"id":36,"href":"/showcase/sql/","title":"SQL","parent":"Langium Showcase","content":""},{"id":37,"href":"/docs/introduction/playground/","title":"Try it out!","parent":"What is Langium?","content":""},{"id":38,"href":"/docs/learn/workflow/generate_ast/","title":"4. Generate the AST","parent":"Langium's workflow","content":"After defining the grammar, you can generate the abstract syntax tree (AST) of your language. The AST is a tree representation of the source code that can be used to analyze and transform the code. The AST definition is generated by the Langium CLI. Simply call the following command on your terminal:\nnpm run langium:generate This line will call langium generate on your Langium project. The Langium CLI will generate the files in the src/generated directory. It will create the following files (depending on your given Langium configuration):\n a grammar file: which contains your entire grammar definition in JSON format. a module file: which contains language-specific setup objects for the final module definition of your language. an ast file: which contains the definition of your AST. several syntax highlighting files: like for PrismJS, TextMate or Monarch. The syntax tree An AST of your language is now ready to be get parsed. One important concept in Langium are cross-references. With them you can reference other elements in your language. For example, you can reference a variable in a function call. The AST will contain a reference to the variable. This is useful for code analysis and transformation. Technologies like ANTLR or other parser-only generators do not support this feature. For them you are forced to resolve these references in-place everytime the developer is confronted with them.\nAfter these generation steps, cross-references are not resolved yet. This is done in the next step.\nExample Imagine you are using the Hello-World example from the Yeoman generator. For an input file like this you will get the following syntax tree from Langium during runtime:\nperson John person Jane Hello John! Hello Jane! graph TB Model--persons Model--greetings persons--P1[Person] P1 -- H1('person') P1 -- N1[name] N1 -- NL1('John') persons--P2[Person] P2 -- H2('person') P2 -- N2[name] N2 -- NL2('Jane') greetings--G1[Greeting] G1 -- KW1('hello') G1 -- PRef1[Ref] G1 -- EM1('!') PRef1 -- QM1{?} greetings--G2[Greeting] G2 -- KW2('hello') G2 -- PRef2[Ref] G2 -- EM2('!') PRef2 -- QM2{?} Mind the gaps (question marks) for the cross-references inside the greetings. This job has to be done by the developer. Fortunately Langium provides a default implementation for cross-reference resolution. You can also implement your own resolution strategy.\nHow to test the parser? You can test the parser by comparing the generated AST with the expected AST. Here is an example:\nimport { createHelloWorldServices } from \u0026quot;./your-project//hello-world-module.js\u0026quot;; import { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { Model } from \u0026quot;../../src/language/generated/ast.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); //act const document = await parse(` person John person Jane Hello John! Hello Jane! `); //assert const model = document.parseResult.value; expect(model.persons).toHaveLength(2); expect(model.persons[0].name).toBe(\u0026quot;John\u0026quot;); expect(model.persons[1].name).toBe(\u0026quot;Jane\u0026quot;); expect(model.greetings).toHaveLength(2); //be aware of the fact that the following checks will fail at this point, because the cross-references are not resolved yet expect(model.greetings[0].person.ref?.name).toBe(\u0026quot;John\u0026quot;); expect(model.greetings[1].person.ref?.name).toBe(\u0026quot;Jane\u0026quot;); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":39,"href":"/showcase/domainmodel/","title":"Domain Model","parent":"Langium Showcase","content":""},{"id":40,"href":"/docs/learn/workflow/resolve_cross_references/","title":"5. Resolve cross-references","parent":"Langium's workflow","content":"This step takes place after generating the AST. The AST definition was created and you are able to parse input files. But the AST is not complete yet. It contains cross-references that are not resolved. Cross-references are used to reference other elements in your language.\nProblem Let\u0026rsquo;s illustrate the problem using the Hello-World example from the Yeoman generator:\nperson John person Jane Hello John! Hello Jane! The following syntax tree is generated by the Langium parser during the runtime. Mind the gaps with the question marks. These are the missing pieces you want to fill out in this step.\n graph TB Model--persons Model--greetings persons--P1[Person] P1 -- H1('person') P1 -- N1[name] N1 -- NL1('John') persons--P2[Person] P2 -- H2('person') P2 -- N2[name] N2 -- NL2('Jane') greetings--G1[Greeting] G1 -- KW1('hello') G1 -- PRef1[Ref] PRef1 -- $refText -- RT1('John') G1 -- EM1('!') PRef1 -- QM1{?} greetings--G2[Greeting] G2 -- KW2('hello') G2 -- PRef2[Ref] PRef2 -- $refText -- RT2('Jane') G2 -- EM2('!') PRef2 -- QM2{?} You normally can achieve the cross-reference resolution by implementing a so-called scope provider and a scope computation. When setup correctly given syntax tree will change to this:\ngraph TB Model--persons Model--greetings persons--P1[Person] P1 -- H1('person') P1 -- N1[name] N1 -- NL1('John') persons--P2[Person] P2 -- H2('person') P2 -- N2[name] N2 -- NL2('Jane') greetings--G1[Greeting] G1 -- KW1('hello') G1 -- PRef1[Ref] PRef1 -- $refText -- RT1('John') G1 -- EM1('!') PRef1 -..- P1 greetings--G2[Greeting] G2 -- KW2('hello') G2 -- PRef2[Ref] PRef2 -- $refText -- RT2('Jane') G2 -- EM2('!') PRef2 -..- P2 Resolution of cross-references As already hinted, you can implement a scope provider and a scope computation. Fortunately, Langium comes with default implementations for both. But eventually as your language grows, you might want to implement your own strategy because the default is not sufficient. In the following sections the interpretation of the involved interfaces will be sketched.\nScope provider Terms The scope provider is responsible for providing a scope for a given cross-reference represented by the ReferenceInfo type.\nA scope is a collection of AST nodes that are represented by the AstNodeDescription type.\nThe description is like a (string) path through the AST of a document. It can be also seen as a tuple of document URI, JSON path, name and type of the AST node.\nA reference info contains the concrete AST reference (which points to nothing yet). The info also has the parent AST node (a so-called container) of the reference and the property name under which you can find the reference under its container. In the form of this tuple (container, property, reference) Langium visits all cross-references using the scope provider\u0026rsquo;s getScope method.\nexport interface ScopeProvider { getScope(context: ReferenceInfo): Scope; } export interface ReferenceInfo { reference: Reference container: AstNode property: string index?: number } export interface Scope { getElement(name: string): AstNodeDescription | undefined; getAllElements(): Stream\u0026lt;AstNodeDescription\u0026gt;; } Purpose So, what is the purpose of the scope provider? As mentioned above: it visits each cross-reference and tries to find the corresponding AST nodes over the entire workspace that can be a candidate for the cross-reference\u0026rsquo;s place. It is important to understand that we do not decide here which of these nodes is the perfect match! That decision is part of the so-called linker of the Langium architecture.\nWhether your cross-reference\u0026rsquo;s $refText contains the name Jane does not matter here. We need to provide all nodes that are possible at this position. So in the result, you would return Jane and John AST nodes - for both cross-references!\nThe background for this behavior is that this mechanism can be used for two things: the cross-reference resolution and the code completion. The code completion needs to know all possible candidates for a given cross-reference. The resolution of the cross-reference is done by the linker: Given a scope for a certain cross-reference, the linker decides which of the candidates is the right one - for example the first candidate with the same name.\nScope computation The scope computation is responsible for defining per document file\u0026hellip;\n which AST nodes are getting exported to the global scope. These nodes will be collected by the so-called index manager. which AST nodes (as descriptions) are available in the local scope of a certain AST node. This is meant as a cache computation for the scope provider. The index manager is keeping in mind the global symbols of your language. It can be used by the scope provider to find the right candidates for a cross-reference.\nexport interface ScopeComputation { computeExports(document: LangiumDocument, cancelToken?: CancellationToken): Promise\u0026lt;AstNodeDescription[]\u0026gt;; computeLocalScopes(document: LangiumDocument, cancelToken?: CancellationToken): Promise\u0026lt;PrecomputedScopes\u0026gt;; } So, while the scope computation is defining what symbols are globally exported (like using the export keyword in Typescript), the scope provider is the place to implement the import of these symbols using the index manager and the semantics of your import logic.\nCross-reference resolution from a high-level perspective The AST gets generated by the parser for each document in the workspace. The scope computation is called for each document in the workspace. All exported AST nodes are collected by the index manager. The scope computation is called for each document in the workspace, again. All local scopes get computed and attached to the document. The linker and the scope provider are called for each cross-reference in the workspace. The scope provider uses the index manager to find candidates for each cross-reference. The linker decides which candidate is the right one for each cross-reference. Example For the Hello-World example, you can implement a scope provider and a scope computation like this (keep in mind that this is a alternative solution to the default implementation of Langium, which already works for most cases):\nimport { ReferenceInfo, Scope, ScopeProvider, AstUtils, LangiumCoreServices, AstNodeDescriptionProvider, MapScope, EMPTY_SCOPE } from \u0026quot;langium\u0026quot;; import { isGreeting, isModel } from \u0026quot;./generated/ast.js\u0026quot;; export class HelloWorldScopeProvider implements ScopeProvider { private astNodeDescriptionProvider: AstNodeDescriptionProvider; constructor(services: LangiumCoreServices) { //get some helper services this.astNodeDescriptionProvider = services.workspace.AstNodeDescriptionProvider; } getScope(context: ReferenceInfo): Scope { //make sure which cross-reference you are handling right now if(isGreeting(context.container) \u0026amp;\u0026amp; context.property === 'person') { //Success! We are handling the cross-reference of a greeting to a person! //get the root node of the document const model = AstUtils.getContainerOfType(context.container, isModel)!; //select all persons from this document const persons = model.persons; //transform them into node descriptions const descriptions = persons.map(p =\u0026gt; this.astNodeDescriptionProvider.createDescription(p, p.name)); //create the scope return new MapScope(descriptions); } return EMPTY_SCOPE; } } Please make sure to override the default scope provider in your language module file like this:\n//... export const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { //validation: ... references: { ScopeProvider: (services) =\u0026gt; new HelloWorldScopeProvider(services) } }; //... How to test the linking? You can test the linking by comparing the resolved references with the expected references. Here is the example from the last step.\nimport { createHelloWorldServices } from \u0026quot;./your-project//hello-world-module.js\u0026quot;; import { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { Model } from \u0026quot;../../src/language/generated/ast.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); //act const document = await parse(` person John person Jane Hello John! Hello Jane! `); //assert const model = document.parseResult.value; expect(model.persons).toHaveLength(2); expect(model.greetings).toHaveLength(2); expect(model.greetings[0].person.ref).toBe(model.persons[0]); expect(model.greetings[1].person.ref).toBe(model.persons[1]); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":41,"href":"/docs/learn/workflow/create_validations/","title":"6. Create validations","parent":"Langium's workflow","content":"After resolving the cross-references, you can assume that the syntax tree is complete. Now you can start with the validation of the input files. The validation process is a crucial part of the language engineering workflow. The parser ensures the syntactic correctness of the input files. The validation process ensures the semantic correctness of the input files.\nExample Let\u0026rsquo;s consider the Hello-World example from the Yeoman generator. One semantic of this language could be that each declared person must be greeted at most once. To be clear, the following input file is invalid, we are greeting John twice:\nperson John person Jane Hello John! Hello Jane! Hello John! //should throw: You can great each person at most once! This is the 2nd greeting to John. Implementation To accomplish this, you need to implement a validator. The validator is a visitor that traverses a certain part of the syntax tree and checks for semantic errors. The following code snippet shows how you can implement a validator for the Hello-World example. Mind that the Hello-World already has a validator, you just need to add the following one.\nimport type { ValidationAcceptor, ValidationChecks } from 'langium'; import type { HelloWorldAstType, Model, Person } from './generated/ast.js'; import type { HelloWorldServices } from './hello-world-module.js'; export function registerValidationChecks(services: HelloWorldServices) { const registry = services.validation.ValidationRegistry; const validator = services.validation.HelloWorldValidator; const checks: ValidationChecks\u0026lt;HelloWorldAstType\u0026gt; = { //registers a validator for all Model AST nodes Model: validator.checkPersonAreGreetedAtMostOnce }; registry.register(checks, validator); } export class HelloWorldValidator { checkPersonAreGreetedAtMostOnce(model: Model, accept: ValidationAcceptor): void { //create a multi-counter variable using a map const counts = new Map\u0026lt;Person, number\u0026gt;(); //initialize the counter for each person to zero model.persons.forEach(p =\u0026gt; counts.set(p, 0)); //iterate over all greetings and count the number of greetings for each person model.greetings.forEach(g =\u0026gt; { const person = g.person.ref; //Attention! if the linker was unsucessful, person is undefined if(person) { //set the new value of the counter const newValue = counts.get(person)!+1; counts.set(person, newValue); //if the counter is greater than 1, create a helpful error if(newValue \u0026gt; 1) { accept('error', `You can great each person at most once! This is the ${newValue}${newValue==2?'nd':'th'} greeting to ${person.name}.`, { node: g }); } } }); } } How to test the validator? To test the validator, we can simply use the parseHelper again. The following code snippet shows how you can test the validator:\nimport { createHelloWorldServices } from \u0026quot;./your-project//hello-world-module.js\u0026quot;; import { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { Model } from \u0026quot;../../src/language/generated/ast.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); //act const document = await parse(` person John person Jane Hello John! Hello Jane! Hello John! `, { validation: true }); //enable validation, otherwise the validator will not be called! //assert expect(document.diagnostics).toHaveLength(1); expect(document.diagnostics![0].message).toBe('You can great each person at most once! This is the 2nd greeting to John.'); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":42,"href":"/docs/learn/workflow/generate_everything/","title":"7. Generate artifacts","parent":"Langium's workflow","content":"The syntax was ensured. The semantics were checked. Your workspace is free of errors. Now the AST is a valid representation of your input file written in your language. It is time to generate some cool stuff!\nDepending on your domain and on your requirements there are different ways to generate artifacts from your AST.\nHow to write the generator? The simplest way is to generate text into a string. Let\u0026rsquo;s print out every greeting from the hello-world example.\nimport type { Model } from '../language/generated/ast.js'; export function generateJavaScript(model: Model): string { return `\u0026quot;use strict\u0026quot;; ${model.greetings .map(greeting =\u0026gt; `console.log('Hello, ${greeting.person.ref?.name}!');`) .join(\u0026quot;\\n\u0026quot;) }`; } How to test the generator? You can test the generator by comparing the generated text with the expected text. Here is an example.\nimport { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { createHelloWorldServices } from \u0026quot;./your-project/hello-world-module.js\u0026quot;; import { Model } from \u0026quot;./your-project/generated/ast.js\u0026quot;; import { generateJavaScript } from \u0026quot;./your-project/generator.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); const document = await parse(` person Langium Hello Langium! `, {validation: true}); expect(document.parseResult.lexerErrors).toHaveLength(0); expect(document.parseResult.parserErrors).toHaveLength(0); expect(document.diagnostics ?? []).toHaveLength(0); //act const javaScript = generateJavaScript(document.parseResult.value); //assert expect(javaScript).toBe(`\u0026quot;use strict\u0026quot;; console.log('Hello, Langium!');`); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":43,"href":"/docs/recipes/code-bundling/","title":"Code Bundling","parent":"Recipes","content":"When you first create a Langium project using the Yeoman generator, it will only contain a plain TypeScript configuration, without any additional build processes. However, if you want to make your language available for consumption in a non-development context, you\u0026rsquo;ll want to create a bundle. It is not absolutely necessary in a Node.js context, since you can always resolve local node_modules but it\u0026rsquo;s still recommended for vscode extensions. It improves performance and decreases file size by minifying your code and only including what you actually need.\nWe generally recommend using esbuild to bundle Langium based language servers and extensions. To install it, simply run:\nnpm i --save-dev esbuild You can see a minimal configuration file below that bundles both your language server and your extension.\n//@ts-check import * as esbuild from 'esbuild'; const watch = process.argv.includes('--watch'); const minify = process.argv.includes('--minify'); const ctx = await esbuild.context({ entryPoints: ['src/extension.ts', 'src/language/main.ts'], outdir: 'out', bundle: true, target: \u0026quot;es6\u0026quot;, loader: { '.ts': 'ts' }, external: ['vscode'], // the vscode-module is created on-the-fly and must be excluded. platform: 'node', // VSCode extensions run in a node process sourcemap: !minify, minify }); if (watch) { await ctx.watch(); } else { await ctx.rebuild(); ctx.dispose(); } Store it in a module JavaScript file (.mjs) and create a corresponding script in your package.json file:\n\u0026quot;scripts\u0026quot;: { \u0026quot;build\u0026quot;: \u0026quot;node ./esbuild.mjs\u0026quot; } If you want to use a Langium language server in the browser, you can get away with an even smaller setup with the following script:\n\u0026quot;scripts\u0026quot;: { \u0026quot;build:worker\u0026quot;: \u0026quot;esbuild ./src/main.ts --bundle --format=iife --outfile=./public/languageServerWorker.js\u0026quot; } If you\u0026rsquo;re more inclined to use webpack, a configuration for an extension bundler can be seen below:\nconst path = require('path'); const commonConfig = { target: 'node', mode: 'none', devtool: 'nosources-source-map', externals: { vscode: 'commonjs vscode' // the vscode-module is created on-the-fly and must be excluded }, resolve: { extensions: ['.ts', '.js'] }, module: { rules: [ { test: /\\.js$/, enforce: 'pre', loader: 'source-map-loader', exclude: /vscode/ }, { test: /\\.ts$/, exclude: /node_modules/, use: [ { loader: 'ts-loader' } ] } ] } } const lspConfig = { ...commonConfig, entry: './src/language/main.ts', // the entry point of the language server output: { path: path.resolve(__dirname, 'out', 'language'), filename: 'main.js', libraryTarget: 'commonjs2', devtoolModuleFilenameTemplate: '../../[resource-path]', clean: true } }; const vscodeConfig = { ...commonConfig, entry: './src/extension.ts', // the entry point of this extension output: { path: path.resolve(__dirname, 'out'), filename: 'extension.js', libraryTarget: 'commonjs2', devtoolModuleFilenameTemplate: '../[resource-path]' } }; module.exports = [lspConfig, vscodeConfig]; "},{"id":44,"href":"/","title":"Langium","parent":"","content":" Built to bring language engineering to the next level _ Langium is an open source language engineering tool with first-class support for the Language Server Protocol, written in TypeScript and running in Node.js. This future-proof technology stack enables domain-specific languages\nin VS Code, Eclipse Theia, web applications, and more. Try it! Learn Why Langium? _ TypeScript integration Langium generates a typed abstract syntax tree (AST) definition that perfectly fits your grammar and provides utility functions to help you navigate and process the AST. _ Quality based on experience Langium was developed on the basis of years of practical use of Xtext, which is an integral part of numerous projects and products worldwide. We apply this experience to push language engineering to a new level. _ Low barrier to entry The main goal of Langium is to lower the barrier of creating a DSL or low-code platform. We achieve this by providing a special DSL that describes the syntax and structure of your language: the grammar language. _ Your language, everywhere Built exclusively on web technologies, Langium is not only available for Node.js based environments but works just as well in your browser. When packaged as a language server, you can connect it to most modern IDEs. _ Lean by default, customizable by design Exploiting the power of the Language Server Protocol, Langium provides useful default implementations for most features. If you are in need of something special, you can override the defaults with your custom implementation. _ Versatile use You can easily package a Langium-based DSL as a command line interface (CLI) to create a rich set of interconnected tools: validator, interpreter, code generators, service adapters, etc. Features _ Simple and direct integration .... with the VS Code extension API _ Well-known technology stack .... implemented in TypeScript, runs in Node.js _ Proven quality on a next level .... with a grammar declaration language similar to Xtext _ Declarative approach .... derives a parser and abstract syntax tree from a grammar declaration _ High performance ... by using Chevrotain\u0026mdash;the blazing fast parser library\u0026mdash;under the hood _ Scale it .... with high out-of-the-box functionality and high extensibility Langium vs. Xtext Despite its age, Xtext is still an excellent basis for building languages and related tools with a Java technology stack. In recent years, however, the VS Code extension API has become increasingly relevant, not only for VS Code itself, but also for other tools that support this format, such as Eclipse Theia. This is why Langium has been created. It enables language engineering in TypeScript, the same technology used for VS code extensions. The differences at a glance: _ Langium is clear Building a tool that uses an Xtext-based language server with VS Code or Theia means creating a hybrid technology stack where some parts are implemented in Java and others in TypeScript. Developing and maintaining such a mixed code base is more challenging for the engineers involved, and long-term maintenance is more difficult compared to Langium's coherent technology stack. _ Langium is simple Xtext is heavily based on the Eclipse Modeling Framework (EMF). This can be an advantage if you want to integrate with other Eclipse modeling tools (e.g. Sirius), but it can also be a burden due to its complexity. Langium uses the simplest possible solution to describe an AST (i.e. the parsed contents of a text document): TypeScript interfaces. By relying on the built-in language constructs, we avoid the additional abstraction layers and steep learning curve of a modeling framework. In short : Langium wants to keep the concepts that have made Xtext successful, but lift them onto another platform. "},{"id":45,"href":"/docs/","title":"Documentation","parent":"Langium","content":""},{"id":46,"href":"/docs/learn/workflow/","title":"Langium's workflow","parent":"Learn Langium","content":"Langium\u0026rsquo;s workflow can be expressed as a flow chart diagram, which boils down to the following steps in the diagram. Be aware of the fact that the possibilities go beyond this simple workflow. For more advanced topics, you can find answers in the recipes.\n flowchart TD A([\"1. Install Yeoman\"]); B([\"2. Scaffold a Langium project\"]); C([\"3. Write the grammar\"]); D([\"4. Generate the AST\"]); E([\"5. Resolve cross-references\"]); F([\"6. Create validations\"]); G([\"7. Generate artifacts\"]); H([\"Find advanced topics\"]); A -- B -- C -- D -- E -- F -- G ~~~ H; G -- for each additional\\ngrammar change -- C; click A \"/docs/learn/workflow/install\" click B \"/docs/learn/workflow/scaffold\" click C \"/docs/learn/workflow/write_grammar\" click D \"/docs/learn/workflow/generate_ast\" click E \"/docs/learn/workflow/resolve_cross_references\" click F \"/docs/learn/workflow/create_validations\" click G \"/docs/learn/workflow/generate_everything\" click H \"/docs/recipes\" Explanation This is the workflow we recommend for developing a language with Langium. It is a step-by-step guide that will help you to get started with Langium and to understand the basics of language development.\nThis simple introduction can be seen as three main parts:\n setting up your project environment (1.+2.): this is only done once specifying the language features (3.-7.): this cycle you need to go through for each grammar change everything advanced (8.): The limit of the common workflow is reached here. For specific questions you can find answers in the recipes. While the first part is straight-forward, the last part is about advanced topics that differ from project to project. The middle part will be explained briefly in the following section.\nInitial setup 1. Install Yeoman This step ensures that you start a Langium project with the Yeoman generator. Yeoman is a scaffolding tool that helps you to start a new project with a predefined structure.\n2. Scaffold a Langium project After installing Yeoman, you can scaffold a new Langium project.\nCore workflow 3. Write the grammar The first step in the core workflow starts with the grammar. You will have some language feature in mind that you want to implement. The grammar is used to nail down the syntax of your features. You can use our Langium VS Code extension to get syntax highlighting and code completion for .langium files. If your grammar is free of errors, you can generate the files for the abstract syntax tree (AST).\n4. Generate the AST The AST is the backbone of your language. It is used to represent the structure of your language elements. The AST is generated from the grammar. One important part of the AST are the cross-references. They are used to resolve references between language elements. If you have cross-references in your language, you need to resolve them, after this step. The actual generation is done by a call of the Langium CLI.\n5. Resolve cross-references The cross-references are used to resolve references between language elements (between different sub trees of one file or even elements of other files(!)). This step is quite important, because it is the basis for the next steps. You can also see it like this: Step 4 will generate an AST with gaps, this fifth step will fill these gaps.\n6. Create validations From here we have a fully utilized AST. Now every input file that matches the syntax will be accepted. But we want to have more control over the input. We want to check if the input is semantically correct. This is done by creating validations. They are used to check the input against a set of rules. If the input does not match the rules, an error will be thrown.\n7. Generate artifacts Now you have a fully working language. You can generate whatever you want from the input. This can be code, documentation, or anything else. You can use the AST to traverse the input and generate the output.\nFind advanced topics Everything that is out of the scope of the common workflow is covered in the recipes. Here you can find answers to specific questions or problems that you might encounter during the development of your language.\n"},{"id":47,"href":"/docs/learn/","title":"Learn Langium","parent":"Documentation","content":""},{"id":48,"href":"/showcase/openapi/","title":"OpenAPI SL","parent":"Langium Showcase","content":""},{"id":49,"href":"/tags/","title":"Tags","parent":"Langium","content":""},{"id":50,"href":"/docs/learn/minilogo/writing_a_grammar/","title":"Writing a Grammar","parent":"Minilogo tutorial","content":" Planning Sketching the Grammar Adding Commands Adding Expressions Adding Terminals In this tutorial we will be talking about writing a grammar for your language in Langium. As a motivating example, we\u0026rsquo;ll be describing how to write a grammar for the MiniLogo language. If you\u0026rsquo;re not familiar with MiniLogo, it\u0026rsquo;s a smaller implementation of the Logo programming language. Logo itself is a lot like Turtle from Python. Ultimately, we\u0026rsquo;ll be using MiniLogo to express drawing instructions that can be used to draw on a canvas.\nWe\u0026rsquo;ve already written an implementation of MiniLogo on Github using Langium. This tutorial will be following along with this project, by walking through the grammar implementation step by step. Later tutorials will also follow along with MiniLogo to create an easy to follow series.\nPlanning Before we get started writing the grammar, we\u0026rsquo;ll want to first identify a couple important aspects of our language. Namely, these are:\n The Semantic Domain The Concrete Syntax The Semantic Domain describes the types of values that will be produced by evaluating our language. In the case of MiniLogo our semantic domain is going to have a single part, an updated drawing state that contains information on:\n position whether we\u0026rsquo;re drawing or not color of the drawing stroke We\u0026rsquo;ll also be producing values and updating an environment as well, which are important to keep in mind.\nBasically, a MiniLogo program can be considered equivalent to a series of transformations on some drawing context. This goal for MiniLogo will guide our design throughout these tutorials.\nIn addition, we\u0026rsquo;ll want to get an idea of what our concrete syntax will be. This step can be done on paper if you like, but the overall goal is to get a feel for how you want the language to look. Your choice of concrete syntax will also drive your grammar\u0026rsquo;s design. If your design is chosen well, it can simplify the way your grammar is constructed. If your syntax is complex, the grammar may also be complex as well. Not only this, but it\u0026rsquo;s also important to try and strike a balance between syntax that is special to your language, and syntax that is at least somewhat shared with other languages. The more unfamiliar the language appears, the more likely your users will struggle trying to pick it up.\nIn our case, we\u0026rsquo;re going to use a C-like concrete syntax. This will make it easy to understand the structure of our programs for most users. This is also chosen because it allows us to use curly braces to delimit blocks of code, which is quite easy to implement in Langium. You could also go for a Python style language, where whitespace has significance in determining which block some code belongs to. Unfortunately, this is not as easy to do out of the box with Langium, due to it ignoring whitespace by default, but it can be configured to work for such languages.\nSketching the Grammar Now that we have an idea of our semantics and our concrete syntax, we can then start writing out a grammar. Conveniently, MiniLogo already has a grammar and concrete syntax described, and that in turn is based off of the Logo programming language. MiniLogo itself was designed by Eric Walkingshaw at Oregon State University, and was used to teach students. It\u0026rsquo;s not something that we\u0026rsquo;ve created, but rather something that we found to be an ideal demonstration of Langium\u0026rsquo;s capabilities, while also remaining friendly for newcomers.\nAs an aside, our version of MiniLogo will be an approximation of Dr. Walkingshaw\u0026rsquo;s version. We won\u0026rsquo;t adhere to it completely, and we won\u0026rsquo;t be incorporating some elements, such as variable declarations.\nTo get started sketching the grammar we\u0026rsquo;ll be using the Hello World example from the yeoman generator. You can read about how to get this setup in the learning section of our docs. We\u0026rsquo;ll be working with a fresh from the generator using only the defaults, and building up from that. We\u0026rsquo;ll begin by modifying the default grammar file, and updating it to work for MiniLogo. You can find this file under src/language/hello-world.langium in your new project. If you used a name other than the default, the file will still be there, but using your custom name instead.\nWe\u0026rsquo;ll be overriding the existing langium grammar file completely, so delete the old contents before we begin.\nThe first line that we\u0026rsquo;ll then add is the declaration of our grammar.\ngrammar MiniLogo This simply describes the name of the grammar that will be proceeding, and is required.\nNext, we\u0026rsquo;ll need to describe an entry rule. This will be a parser rule that must be matched first when recognizing a MiniLogo program. This rule is particularly special, because it will become the root of the resulting abstract syntax tree, which captures the essential structure of our program. For MiniLogo, our entry rule Will be Model. You could also make it Program, but whatever you choose it should capture the same notion. Regardless of your choice, this rule should match any number of Statements and/or Definitions to follow the MiniLogo specification.\nentry Model: (stmts+=Stmt | defs+=Def)*; Each instance of a statement will be stored under the stmts property as an element of an Array. The same will be done for Definitions using defs as well. Note the trailing * after the grouping, which means technically a program containing nothing is also a valid MiniLogo program.\nTo iterate on this a little bit further we\u0026rsquo;ll need to describe what a Statement (Stmt) and a Definition (Def) are in the context of MiniLogo.\nFirst, let\u0026rsquo;s talk about Definitions. A definition corresponds to:\n a name a list of parameters a block of statements And we want definitions to look like so in our concrete syntax:\ndef myDef() { ... } ... def anotherDef(x,y,z) { ... } We can recognize this concrete syntax, and capture the relevant information for our AST, with the following rule:\nDef: 'def' name=ID '(' (params+=Param (',' params+=Param)*)? ')' Block; As an additional note, much like regular expressions we use modifiers in our grammar to indicate that definitions can take any number of comma separated parameters.\nYou may be wondering what Block is as well. Block corresponds to a rule fragment, which is akin to a reusable rule body. It\u0026rsquo;s not a rule itself, but an reusable piece that can be reused to complete rules. It\u0026rsquo;s particularly handy when you find yourself writing the same pattern repeatedly, and want to factor it out.\nfragment Block: '{' body+=Stmt* '}'; Then we have Statements, which consist of Commands or Macros.\nStmt: Cmd | Macro; A Command describes an action that transforms the drawing state (which connects to our semantic domain from before). The commands in MiniLogo can be expressed like so:\nCmd: Pen | Move | Color | For; Where each command is also a separate rule:\n Pen: Corresponds to a command that turns on/off drawing Move: Updates the position of the pen (relatively) Color: Sets the stroke color of what is drawn For: A standard for loop control flow These commands describe the essential drawing instructions that we will be representing. We\u0026rsquo;ll go over those in a moment.\nA statement can also be a Macro. A Macro has 2 distinct parts:\n a reference to a Definition (more on this shortly, think of it like a \u0026lsquo;function\u0026rsquo; for now) a list of arguments to apply this definition to In our concrete syntax, we want macros to look like this:\nmyMacro() ... anotherMacro(1, 2, 3 * 3) We can encode this in MiniLogo like so:\nMacro: def=[Def:ID] '(' (args+=Expr (',' args+=Expr)*)? ')'; In this case def will be a Cross Reference to an existing Definition. This is a special syntax that says def will be assigned to a Definition object at runtime identified by an ID terminal token. Although we haven\u0026rsquo;t introduced this terminal yet, it\u0026rsquo;s a simple rule that captures literal strings as tokens. It\u0026rsquo;s also important to note that cross references implicitly utilize the name property to hookup the cross reference to the target object.\nWe also want to add the notion of a Parameter, which is quite simple to write in:\nParam: name=ID; As you may have guessed, by using the name property for a parameter, we\u0026rsquo;re allowing Langium to automatically setup cross references for parameters as well.\nAdding Commands For the commands, we\u0026rsquo;ll go through each one, and show examples of the concrete syntax we\u0026rsquo;re trying to capture:\nPen needs to have two modes, up and down. So it should capture syntax like this:\npen(up) ... pen(down) We can express this with the following parser rule.\nPen: 'pen' '(' mode=('up' | 'down') ')'; Move commands will take a pair of expressions, corresponding to the x and y components, and can look like so:\nmove(1,5) ... move(x * 10, y * 10) We haven\u0026rsquo;t defined it yet, but we can use an Expr rule to represent where our expressions will go, and capture this command like this:\nMove: 'move' '(' ex=Expr ',' ey=Expr ')'; We\u0026rsquo;ll define expressions shortly.\nSimple for loops can be defined too, which should look like this:\nfor x = 0 to 10 { ... } Again, we don\u0026rsquo;t have Expr defined yet, but we can still use it here. Also, since we have a block of statements, we can reuse that Block fragment that was defined earlier.\nFor: 'for' var=Param '=' e1=Expr 'to' e2=Expr Block; Color commands are the last one to add, and they\u0026rsquo;ll change the stroke color in a few ways. The first is by setting the RGB components as integers directly:\ncolor(128,64,255) The second is by passing in the name of a stroke color:\ncolor(blue) The last is by passing a hexadecimal value:\ncolor(#66CCFF) ... color(#6cf) The corresponding rule for this syntax is a special case where we have 3 different overloaded forms of the same command. To capture all of these forms, we can use two different sets of properties:\n r,g,b values for each color a single color value that can be either an ID or HEX We can encode this like so:\nColor: 'color' '(' ((r = Expr ',' g=Expr ',' b=Expr) | color=ID | color=HEX) ')'; What\u0026rsquo;s interesting here is that the color \u0026amp; r,g,b properties are both optional. Since in either case only one or the other will be defined. With the two forms, this is enough information to quickly determine what kind of color command we have, and to handle it correctly later on.\nAdding Expressions Now we\u0026rsquo;re at the core of our language, Expressions. In MiniLogo we want to be able to express not only literal values, but also references and arithmetic operations such as addition, subtraction, multiplication, and division. When implementing expressions, we need to keep in mind that Langium is based off of Chevrotain, which produces top-down parsers. This means we have to watch out for cases that lead to left-recursion. In order to avoid this, we need to be careful not to define a rule with itself on the left-hand side. For example, something like Expr: e1=Expr ... would not work, because the parser would infinitely try to parse another expression forever.\nHowever, we can work around this. We can introduce expressions and avoid left-recursion by writing them from the bottom up in terms of order of operations. We\u0026rsquo;ll start with Add (which also includes subtraction):\nExpr: Add; Then writing a rule to handle the addition (and subtraction) case.\nAdd infers Expr: Mult ({infer BinExpr.e1=current} op=('+'|'-') e2=Mult)*; To explain a bit, the Add rule introduces:\n a parser rule that produces an Expr instance (that\u0026rsquo;s what the infers is doing here) starts by recognizing a Mult instance then if there\u0026rsquo;s a binary operator to parse rewrite this parsed object into a BinExpr that will extend Expr (that\u0026rsquo;s what the second {infer ...} is doing) also capture the first Mult under the e1 property (that\u0026rsquo;s what the current keyword refers to) capture the operand +/- capture the following Mult instance (the right hand side of our binary expression) else simply returns the result of Mult (the case where we don\u0026rsquo;t have a binary expression) We can then repeat this pattern with the Mult rule:\nMult infers Expr: PrimExpr ({infer BinExpr.e1=current} op=('*'|'/') e2=PrimExpr)*; Lastly we can then introduce Primary expressions, or PrimExpr. This rule will match all the primitive cases, such as literals, references, groupings, and negation.\nPrimExpr: Lit | Ref | Group | NegExpr; // literal int Lit: val=INT; // cross-reference to a parameter Ref: val=[Param:ID]; // grouped expression with parentheses Group: '(' ge=Expr ')'; // negated expression NegExpr: '-' ne=Expr; By writing our parser rules first for Addition \u0026amp; Subtraction, and then later for Multiplication and Division, we can construct an abstract syntax text tree that will correctly preserve order of operations.\nAs a note, we could also write these rules without using actions to rewrite our parse tree. When we\u0026rsquo;re talking about actions, we\u0026rsquo;re talking about those cases of {infer ...}. However, then we\u0026rsquo;ll get nodes like Add and Mult, instead of Expr and BinaryExpr. This is a tradeoff that is a bit tough to grasp at first in the grammar, but translates to a more sensible AST to work on later. This is especially helpful when we get to generation.\nAdding Terminals Now that we\u0026rsquo;re almost done with our grammar, we need to add in the terminal rules. Conveniently, the body of a terminal rule can be defined as a Javascript regular expression; sharing the same syntax. This makes it very clear to determine what our terminals should recognize.\n// recognize a hexadecimal sequence, used to recognize colors for the 'Color' command terminal HEX returns string: /#(\\d|[a-fA-F])+/; // recognize an identifier terminal ID returns string: /[_a-zA-Z][\\w_]*/; // recognize an Integer (but represented via a 'number' type) terminal INT returns number: /-?[0-9]+/; Then, lastly, we want to add hidden terminals. These will describe tokens that we want to parse and discard while parsing any input. Since we\u0026rsquo;re adding whitespace \u0026amp; comments as hidden terminals, it\u0026rsquo;s the same as saying we do not care about these tokens while parsing, but we do recognize that they are tokens; they just don\u0026rsquo;t play a role in capturing the structure of our language.\nhidden terminal WS: /\\s+/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; And that\u0026rsquo;s it, we\u0026rsquo;re all set writing up the grammar for MiniLogo. To verify that we correctly implemented the grammar with no problems, we can run the following command in the project root:\nnpm run langium:generate The generation should finish successfully, indicating that our grammar doesn\u0026rsquo;t have any errors in it. In some cases, you may get warnings \u0026ndash; such as from unreachable rules in your grammar \u0026ndash; but these won\u0026rsquo;t prevent the generation from completing successfully. Also, when we\u0026rsquo;re referring to the generation, we\u0026rsquo;re talking about the construction of the following from your grammar:\n a semantic model (that ASTs can be mapped onto) a parser that recognizes our language With that, we have the beginnings of our very own language! Hopefully this gives a good idea of how to express a grammar in Langium, particularly with consideration to your concrete syntax \u0026amp; semantic domain. You can also consider the ways we can express cases that are left-recursive, like expressions, in an alternative fashion. Overall, our grammar should now be ready for the next step of validation in the following tutorial.\n"}] \ No newline at end of file +[{"id":0,"href":"/docs/introduction/","title":"What is Langium?","parent":"Documentation","content":"Langium is an open source language engineering tool with first-class support for the Language Server Protocol, written in TypeScript and running in Node.js.\nWhere to go from here? Features If you need a more detailed list of Langium features, you can find them in the features section.\nTry it out If you want to see Langium in action, you can follow the showcases or even the playground.\nLearn Langium If you are convinced by Langium and want to learn more about it, you can start with the learn section.\nMore details If you are looking for more details about Langium, you can find them in the reference section.\nIf you are searching for a certain guide or recipe, you can find them in the recipes section.\n"},{"id":1,"href":"/docs/learn/minilogo/validation/","title":"Validation","parent":"Minilogo tutorial","content":" Overview The Validation Registry Finding Nodes to Validate Registering Validations In this tutorial, we will be talking about implementing validation for your Langium-based language. We recommend first reading the previous tutorial about writing a grammar, as we will assume you\u0026rsquo;re familiar with the topics covered there. We\u0026rsquo;ll also assume that you have a working language to add validation to, so double check that npm run langium:generate succeeds without errors before you proceed.\nFor this tutorial, we\u0026rsquo;ll be implementing validation for the MiniLogo language, but you can use your own language to follow along as well.\nOverview Adding validation is an important step to building a language, as there are often invalid cases that cannot be filtered out through your grammar alone.\nConsider the case of having unique names for identifiers. In MiniLogo we have definitions with names, and we also have parameters that are identified by name. One problem here is if we have several definitions that share the same name. We could also have a similar problem with parameters, where perhaps the same name is used multiple times in the same definition. In the second case, this is most certainly undesirable, but in the first it depends on how you want your language to handle redefinitions.\nLet\u0026rsquo;s consider the case where you want to allow redeclaring a previous definition. This opens the door to allowing redeclaring or shadowing of definitions. If you ever wanted to extend your language down the road, such as by adding the ability to import other programs (along with their definitions) then you might consider allowing a definition to be redefined. However, it could also lead to unintended redeclarations that may be harder to track down. Ultimately, this choice depends on the desired semantics for your language, and is something you should consider carefully.\nIn this example we\u0026rsquo;re going to disallow names that are non-unique for definitions, and we\u0026rsquo;ll be doing the same for arguments of a definition as well.\nThe Validation Registry In order to express these constraints, we need to modify our language\u0026rsquo;s validator. By default, this can be found in src/language/YOUR-LANGUAGE-validator.ts; with a name that corresponds to your language. This file begins with a validation registry that extends the default validation registry. The validation registry allows us to register validation checks for our language.\nThe constructor for the registry is of particular interest, as it allows associating validation functions with specific nodes in your AST. Here you can see an example of the constructor below for the default hello world language from the yeoman generator.\n/** * Registry for validation checks. */ export class HelloWorldValidationRegistry extends ValidationRegistry { constructor(services: HelloWorldServices) { super(services); const validator = services.validation.HelloWorldValidator; const checks: ValidationChecks\u0026lt;HelloWorldAstType\u0026gt; = { // we want to add checks here... Person: validator.checkPersonStartsWithCapital }; this.register(checks, validator); } } From this example, we have a single validation for the Person node.\nPerson: validator.checkPersonStartsWithCapital Before we changed our grammar in the last tutorial, the Person node corresponded with a parser rule named Person. Similarly, most nodes that we can validate will share the name of the parser rule that instantiates them. However, there are a couple cases where this is different:\n when Rule infers AnotherName (or uses return), the node\u0026rsquo;s type will be AnotherName when the body of a parser rule has an action (like {AnotherName}, possibly starting with infer) this new name will exist instead for this part of the rule body Finding Nodes to Validate With this in mind, we can look back at our grammar that we\u0026rsquo;ve written for MiniLogo (from the last tutorial), and find the parser rules that refer to the nodes we want to validate. For this language we have a pair of cases to check, as mentioned above:\n Validate that definitions have unique names in a Model Validate that arguments have unique names in a Definition In order to perform a validation, we need to know the type of that node to validate. Beyond checking our grammar to find this, we can also check the semantic model (akin to the abstract syntax) of our language. This was generated while running npm run langium:generate, and is located in src/language/generated/ast.ts. Peeking into this model, we can see that our rule for Model was written like so:\nentry Model: (stmts+=Stmt | defs+=Def)*; which produces the following node type in our semantic model:\nexport interface Model extends AstNode { defs: Array\u0026lt;Def\u0026gt; stmts: Array\u0026lt;Stmt\u0026gt; } Registering Validations So, we can register a validation on all nodes of type Model (which should be just the root), like so. Note the import coming from the generated file, which contains the definitions that compose our semantic model. The name ast.ts reflects it\u0026rsquo;s usage as identifying node types that constitute an AST in our language (akin to an abstract syntax).\nimport { Model } from './generated/ast'; ... const checks: ValidationChecks\u0026lt;HelloWorldAstType\u0026gt; = { Model: (m: Model, accept: ValidationAcceptor) =\u0026gt; { // and validate the model 'm' here } }; We also have a perfectly good validator class that\u0026rsquo;s just below this part of the file that we can use, but it\u0026rsquo;s still setup to perform validation on the old Person node. We can safely remove the old function, add our custom validation there, and associate it back with our validation registry checks.\nThe updated validator class looks like so:\n/** * Implementation of custom validations. */ export class HelloWorldValidator { // our new validation function for defs checkUniqueDefs(model: Model, accept: ValidationAcceptor): void { // create a set of visited functions // and report an error when we see one we've already seen const reported = new Set(); model.defs.forEach(d =\u0026gt; { if (reported.has(d.name)) { accept('error', `Def has non-unique name '${d.name}'.`, {node: d, property: 'name'}); } reported.add(d.name); }); } } To call this validator in our registry, we can modify the check that is listed in our registry like so (removing the previously written lambda/arrow function).\nconst checks: ValidationChecks\u0026lt;MiniLogoAstType\u0026gt; = { Model: validator.checkUniqueDefs, }; Great! Now we have a simple validation in place to guard against duplicate definitions in MiniLogo.\nNow that we\u0026rsquo;ve shown how this can be done, we can implement this for parameters as well. Looking at our grammar, we can see params are contained as part of a Definition, so we\u0026rsquo;ll register validation for Definition nodes and report if any parameter are duplicated.\nconst checks: ValidationChecks\u0026lt;MiniLogoAstType\u0026gt; = { Model: validator.checkUniqueDefs, Def: validator.checkUniqueParams }; And we can define this new function in our validator class, which is very close in structure to our first function.\ncheckUniqueParams(def: Def, accept: ValidationAcceptor): void { const reported = new Set(); def.params.forEach(p =\u0026gt; { if (reported.has(p.name)) { accept('error', `Param ${p.name} is non-unique for Def '${def.name}'`, {node: p, property: 'name'}); } reported.add(p.name); }); } Although we\u0026rsquo;ve only implemented a pair of validations, hopefully this demonstrates the flexibility of the validator API. The validator can help enforce constraints or features of your language, and ensure that your programs are correct. You could also explore more customized validations for specific cases, perhaps where a parameter and a definition share the same name \u0026ndash; which is not handled here. So long as you can identify the AST node type that you need to validate, you can implement the logic here.\nThat\u0026rsquo;s all for validation. Next we\u0026rsquo;ll be talking about how we can customize our CLI.\n"},{"id":2,"href":"/docs/learn/minilogo/customizing_cli/","title":"Customizing the CLI","parent":"Minilogo tutorial","content":" Overview About the Command Line Interface Adding a Parse and Validate Action Building and Running the CLI In this tutorial, we\u0026rsquo;ll be talking about customizing the command line interface for your language. We recommend reading through previous tutorials about writing a grammar and validation. Once you have a good grasp on those concepts, then you should be all set for setting up a CLI. We will also continue to use the MiniLogo language as a motivating example.\nOverview Once you have a grammar and some validation in place, you may want to start configuring a basic CLI for your language. This is an important step where your language begins to become more accessible to other programs. Having a CLI for your language is a powerful way to access functionality that is expressed through Langium, but without having to interact directly with Langium. A well designed CLI can be used by other applications to provide advanced language features, without making those other applications unnecessarily complex.\nAbout the Command Line Interface If you\u0026rsquo;ve been using a language built with the yeoman generator for Langium, you should be able to find your CLI defined in src/cli/index.ts. This file describes the general layout of your languages\u0026rsquo;s command line interface, and lets you register specific commands. By default, you\u0026rsquo;re provided with a single command for your CLI, the generate command.\nMuch like the command implies, it allows you to take a program written in your DSL, parse it, and traverse the AST to produce some sort of generated output. We won\u0026rsquo;t talk about the generator itself in this tutorial (that will come in the next tutorial on generation). Instead we\u0026rsquo;ll focus on a simple example for parsing and validating a program, which allows learning more about the CLI itself.\nAdding a Parse and Validate Action To start, let\u0026rsquo;s write up a custom action to allow us to parse and validate a program in our language. If we\u0026rsquo;ve already written up a grammar, and already added some basic validation, then all we have to do is hookup the CLI action here to get this to work. This action will help us verify that our MiniLogo programs have no syntax errors, and also pass our custom validations.\nFeel free to keep (or remove) the existing generate action, as we won\u0026rsquo;t be setting that up until the next tutorial. We\u0026rsquo;ll be sure to present example code for that as well, so don\u0026rsquo;t worry about deleting functions that you\u0026rsquo;ll need later.\nIn order to add our new command, we need to register it in the default export for the index.ts file. In this function, there\u0026rsquo;s a command object, which is a collection of commands for our CLI. Let\u0026rsquo;s call our command parseAndValidate, and give it some extra details, like:\n arguments: Indicating that it takes a single file a description detailing what this action does an action that performs the actual parsing and validation We could also add additional options, but we won\u0026rsquo;t be doing that for this action.\nWe can register our parse and validate action like so:\nprogram .command('parseAndValidate') .argument('\u0026lt;file\u0026gt;', 'Source file to parse \u0026amp; validate (ending in ${fileExtensions})') .description('Indicates where a program parses \u0026amp; validates successfully, but produces no output code') .action(parseAndValidate) // we'll need to implement this function Finally, we need to implement the parseAndValidate function itself. This will allow us to be able to parse \u0026amp; validate our programs, but without producing any output. We just want to know when our program is \u0026lsquo;correct\u0026rsquo; by the constraints of our language implementation.\nUsing parts of the existing generateAction function we got by default, we can do our parsing \u0026amp; validation without having to write too much new code at all.\nimport { extractDocument } from './cli-util'; ... /** * Parse and validate a program written in our language. * Verifies that no lexer or parser errors occur. * Implicitly also checks for validation errors while extracting the document * * @param fileName Program to validate */ export const parseAndValidate = async (fileName: string): Promise\u0026lt;void\u0026gt; =\u0026gt; { // retrieve the services for our language const services = createHelloWorldServices(NodeFileSystem).HelloWorld; // extract a document for our program const document = await extractDocument(fileName, services); // extract the parse result details const parseResult = document.parseResult; // verify no lexer, parser, or general diagnostic errors show up if (parseResult.lexerErrors.length === 0 \u0026amp;\u0026amp; parseResult.parserErrors.length === 0 ) { console.log(chalk.green(`Parsed and validated ${fileName} successfully!`)); } else { console.log(chalk.red(`Failed to parse and validate ${fileName}!`)); } }; Some amount of the contents for our custom action are shared with the generateAction function. This isn\u0026rsquo;t surprising given that we still need to set up our language\u0026rsquo;s services.\nBuilding and Running the CLI Now that we have our new action in place, we\u0026rsquo;ll want to build and verify the CLI works for a program written in our language.\nIf you\u0026rsquo;ve been following along from the hello world example produced by the yeoman generator, then you\u0026rsquo;ll have some errors at this point that need to be corrected as follows.\nIf you have errors with regards to any imports of HelloWorld..., this is likely related to your grammar NAME in your langium file being something different than the original HelloWorld. The name of these imports will change based on your grammar file\u0026rsquo;s name after npm run langium:generate, so in each case you should be able to change each import to MyLanguage... to resolve the issue.\nYou may also have build errors related to the generator logic, especially if it was written for the hello-world semantic model. For now, we can comment out the generator function\u0026rsquo;s contents in src/cli/generator.ts, return an empty string, and comment/remove the imports to make Typescript happy. In the next tutorial, we\u0026rsquo;ll come back to it and implement an initial version of a generator for our language.\nIf you have any other errors while building, double check that the exported \u0026amp; imported names match up. More often than note there\u0026rsquo;s a small discrepancy here, especially when you use a different language name than the default.\nAt this point, you should be able to run the following with no errors from the project root.\nnpm run langium:generate npm run build If everything looks good, you should have access to the CLI in /bin/cli. We also need a program we can test and validate. For the MiniLogo language we have a simple example program that we can validate:\ndef test() { pen(down) move(10,10) pen(up) } test() We\u0026rsquo;ll save this under our project root as test.logo, and we can test that it\u0026rsquo;s correct using our CLI like so:\n./bin/cli parseAndValidate test.logo NOTE: The langium-minilogo repo places test.logo in an examples subdirectory under the project root. So, for that case, the CLI usage would be:\n./bin/cli parseAndValidate examples/test.logo It does not matter where you place your .logo files. Organize them as you see fit.\nWe should get an output indicating that there were no errors with our program.\n Parsed and validated test.logo successfully!\n If you get a message that indicates you need to choose a file with a given extension, you\u0026rsquo;ll want to go back and update your list of extensions in your package.json and your langium-config.json in your project root. Then you\u0026rsquo;ll need to run npm run langium:generate followed by npm run build to get that change incorporated into your CLI.\nIf we wanted to verify that we can get errors, we can modify our program a bit to include a duplicate definition (which we should have a validation for, as we implemented in the validation tutorial).\ndef test() { pen(down) move(10,10) pen(up) } // redefinition of test, should 'not' validate def test() { pen(up) } test() Running the CLI again should show that this program has an error, and better yet it will show us exactly the error in question.\n There are validation errors:\nline 7: Def has non-unique name \u0026lsquo;test\u0026rsquo;. [test]\n This is perfect, as we didn\u0026rsquo;t have to implement too much more logic to get validation in our CLI. Since we already hooked up our validation service before, the CLI just handles the interaction with an external program. This separation of concerns makes for a very flexible implementation that is easy to adapt over time.\nThat sums up how to add basic CLI functionality. In the next tutorial, we will be talking about generation in more detail, specifically about techniques that you can use to traverse your AST and produce a generated output.\n"},{"id":3,"href":"/docs/learn/minilogo/generation/","title":"Generation","parent":"Minilogo tutorial","content":" Setting up the Generator API Deciding Output to Generate Generating from Statements Writing an Expression Evaluator Generating from Statements with the Evaluator Connecting the Generator to the CLI In this tutorial we\u0026rsquo;ll be showing how to implement basic generation for your language. When we\u0026rsquo;re talking about generation, we\u0026rsquo;re talking about transforming an AST from your Langium-based language into some output target. This could be another language of similar functionality (transpilation), a lower level language (compilation), or generating some artifacts/data that will be consumed by another application. If you haven\u0026rsquo;t already, make sure to go back over and check out the tutorial on customizing your CLI, as it touches on details about how to implement endpoints for your application (like generation).\nPer usual, we\u0026rsquo;ll be using the MiniLogo language as a motivating example here.\nWe\u0026rsquo;ll be describing how to write a simple MiniLogo generator to output drawing a JSON array of drawing instructions. This tutorial will give you a general idea of how you can traverse an AST to produce generated output.\nSetting up the Generator API To write the generator, we\u0026rsquo;re going to work in the src/cli/generator.ts file. If you\u0026rsquo;re using a language produced by the yeoman generator for Langium, then you should already have a function in here called generateJavascript. For MiniLogo, we\u0026rsquo;ll change this to generateCommands, which will generate drawing commands to be handled later. We will also change the function signature to take a Model, and return a string of the generated file path.\n// import the 'Model' type from our semantic model import { Model } from '../language/generated/ast.ts'; export function generateCommands(mode: Model, filePath: string, destination: string | undefined): string { // ... } This function will serve as our generator endpoint. All MiniLogo programs that we want to generate from will be processed from here.\nNow, our objective is to take a program like this:\ndef test() { pen(down) move(10,10) pen(up) } test() And translate it into a generated JSON-like list of drawing commands like so:\n[ { cmd: 'penDown' }, { cmd: 'move', x: 10, y: 10 }, { cmd: 'penUp' } ] Deciding Output to Generate Notice that there\u0026rsquo;s no notion of macros, definitions, for loops, or other constructs that are present in MiniLogo. We only need to produce a generated output that contains information relevant to our semantic domain. If you remember this term from the very beginning of writing our grammar, then you\u0026rsquo;ll likely also remember that our semantic domain is a series of transformations performed on a drawing context. With this in mind, we can safely reduce a MiniLogo program to such a series of transformations on the pen, position, and color. We don\u0026rsquo;t need to include anything else. In this context, you could think of it like a form of evaluation.\nTo be able to produce this output, we need to be able to traverse through all nodes of our AST. We can perform such a traversal by creating functions that map from our AST to our generated output. This is as simple as accessing the properties stored on a node, and writing functions to process the types of those properties such that generation is defined for every type of node in your AST.\nAn example of this would be defining a generateStatements function that takes a list of Statements, and produces some generated result from those statements. Anytime we were working with a node that contained statements, we could invoke this function on it, and return the results.\nWe can add this function to our generateCommands function to begin generation from the top-level statements in our Model.\nexport function generateCommands(mode: Model, filePath: string, destination: string | undefined): string { const result: Object[] = generateStatements(model.stmts); } ... function generateStatements(stmts: Stmt[]): Object[] { ... } As a side note, to support generation with string content (like for generating file/program contents) we\u0026rsquo;ve added a CompositeGeneratorNode that is designed to help collect generated output. This is located in our cli-util.ts, and provides more structure with constructing textual outputs, without resorting to direct manipulation of strings.\nGenerating from Statements Now, let\u0026rsquo;s expand on generateStatements. From our grammar, there are 5 types of statements:\n pen move macro for color We we want to expand our function to handle each of these cases. This is easy to do using some special isTYPE functions made available from our semantic model. These are automatically generated from our grammar, and allow us to verify the type of a node from our AST at runtime.\nimport { isPen, isMove, isMacro, isFor, isColor } from '../language/generated/ast'; ... if(isPen(stmt)) { ... } else if(isMove(stmt)) { ... } else if(isMacro(stmt)) { ... } else if(isFor(stmt)) { ... } else if (isColor(stmt)) { ... } For isPen we have the easiest case where we could emit something like so:\n{ cmd: stmt.mode === 'up' ? 'penUp' : 'penDown' }; However, for the rest of the statements, we need to be able to evaluate expressions first.\nWriting an Expression Evaluator We need to evaluate our expressions to final values for statements, as we don\u0026rsquo;t want to emit literal expressions like 1 + x * 5; but rather their evaluated result. We\u0026rsquo;ll handle this in a new evalExprWithEnv function.\n// map of names to values type MiniLogoGenEnv = Map\u0026lt;string,number\u0026gt;; // evalutes exprs in the context of an env function evalExprWithEnv(e: Expr, env: MiniLogoGenEnv): number { ... } As we mentioned before, in order to perform generation in this context, we\u0026rsquo;re also writing an evaluator for our language. Thankfully, MiniLogo is relatively simple, especially since it doesn\u0026rsquo;t have variables outside of definitions and for loops.\nSo let\u0026rsquo;s write our expression evaluator. Assuming we have the function declaration from above, our first case to be added into that function is for Lit. Again, this is imported from our generated semantic model.\nif(isLit(e)) { return e.val; } Pretty easy. A literal returns its value. Now for references.\nif(isRef(e)) { const v = env.get(e.val.ref?.name ?? ''); if (v !== undefined) { return v; } // handle the error case... } Since we have cross references, we can retrieve the node in question (ref), and check if we have a value stored for its name. In the case that we do, we return the value, otherwise we would want to report an error.\nFor binary expressions, we can invoke evalExprWithEnv recursively on the left \u0026amp; right operands. Since we used actions to restructure our semantic model a bit, we have access to this isBinExpr function to find BinExpr nodes. It\u0026rsquo;s quite convenient, since we can now handle all 4 cases at once.\nif(isBinExpr(e)) { let opval = e.op; let v1 = evalExprWithEnv(e.e1, env); let v2 = evalExprWithEnv(e.e2, env); switch(opval) { case '+': return v1 + v2; case '-': return v1 - v2; case '*': return v1 * v2; case '/': return v1 / v2; default: throw new Error(`Unrecognized bin op passed: ${opval}`); } } For negated expressions, it\u0026rsquo;s also fairly straight forward. We invert whatever value we would get normally.\nif (isNegExpr(e)) { return -1 * evalExprWithEnv(e.ne, env); } Lastly, for groups we extract the \u0026lsquo;grouped\u0026rsquo; value and evaluate it.\nif(isGroup(e)) { return evalExprWithEnv(e.ge, env); } Lastly, it\u0026rsquo;s always a good measure to sanity check that you aren\u0026rsquo;t missing a case. Throwing an error is often much more desirable than having something silently fail, and produce strange results on generation. This means adding a default for your switches, and a final else clause to handle unexpected nodes.\nWith all those cases above, we can combine them into a series of else if clauses to have a clean case-by-case check.\nGenerating from Statements with the Evaluator Now that we can evaluate expressions, we can handle the rest of our statement cases. In order to incorporate our env, we\u0026rsquo;ll also want to update our generateStatements function, and create a new evalStmt function to help out.\nfunction generateStatements(stmts: Stmt[]): Object[] { // minilogo evaluation env let env : MiniLogoGenEnv = new Map\u0026lt;string,number\u0026gt;(); // generate mini logo cmds off of statements return stmts.flatMap(s =\u0026gt; evalStmt(s,env)).filter(e =\u0026gt; e !== undefined) as Object[]; } /** * Takes an statement, an environment, and produces a list of generated objects */ function evalStmt(stmt: Stmt, env: MiniLogoGenEnv) : (Object | undefined)[] { if (isPen(stmt)) { return [{ cmd: stmt.mode === 'up' ? 'penUp' : 'penDown' }]; } // ... the rest of our cases will follow ... } This gives us an env that can be updated by evaluating each statement, and persist from one to another; which is what we want for MiniLogo. Now, for isMove, we just need to evaluate the x \u0026amp; y arguments to their values using this env\nif (isMove(stmt)) { return [{ cmd: 'move', x: evalExprWithEnv(stmt.ex, env), y: evalExprWithEnv(stmt.ey, env) }]; } For isMacro we need to save and restore our execution environment after the macro has been evaluated. We can do this by generating a new env, setting the parameters from the arguments, and passing that new env to the macro\u0026rsquo;s statements instead.\nKeep in mind arguments need to be evaluated before setting them into the env, and we want to carefully do this using the original env, not the new one being constructed. If there are names that already exist, and would be shadowed by this macro, then it could change the result of the macro (or even the value of subsequent arguments).\n// get the cross ref const macro: Def = stmt.def.ref as Def; // copied env let macroEnv = new Map(env); // produce pairs of string \u0026amp; exprs, using a tmp env // this is important to avoid mixing of params that are only present in the tmp env w/ our actual env let tmpEnv = new Map\u0026lt;string, number\u0026gt;(); // evalute args independently, staying out of the environment macro.params.map((elm, idx) =\u0026gt; tmpEnv.set(elm.name, evalExprWithEnv(stmt.args[idx], macroEnv))); // add new params into our copied env tmpEnv.forEach((v,k) =\u0026gt; macroEnv.set(k,v)); // evaluate all statements under this macro return macro.body.flatMap(s =\u0026gt; evalStmt(s, macroEnv)); For isFor, we also use a copied env, so that we don\u0026rsquo;t alter the original env outside of the loop.\n// compute for loop bounds // start let vi = evalExprWithEnv(stmt.e1, env); // end let ve = evalExprWithEnv(stmt.e2, env); let results : (Object | undefined)[] = []; // perform loop const loopEnv = new Map(env); while(vi \u0026lt; ve) { loopEnv.set(stmt.var.name, vi++); stmt.body.forEach(s =\u0026gt; { results = results.concat(evalStmt(s, new Map(loopEnv))); }); } return results; Lastly, to handle isColor, check whether one set of properties is defined or the other (like color vs. any of the r,g,b properties).\nif (stmt.color) { // literal color text or hex return [{cmd:'color', color: stmt.color}] } else { // color as rgb const r = evalExprWithEnv(stmt.r!, env); const g = evalExprWithEnv(stmt.g!, env); const b = evalExprWithEnv(stmt.b!, env); return [{cmd:'color', r, g, b}] } With that, we\u0026rsquo;re effectively done writing the core of our generator! The last changes to make are to write the output to a file, and to connect what we\u0026rsquo;ve written here with a command in our CLI.\nConnecting the Generator to the CLI To do this, we can go back to the top of our generator, and update the generateCommands function to write the generated result to a file. Most of the structure here is carried over from the original code first setup by the yeoman generator, which makes it convenient to add in.\nexport function generateCommands(model: Model, filePath: string, destination: string | undefined): string { const data = extractDestinationAndName(filePath, destination); const generatedFilePath = `${path.join(data.destination, data.name)}.json`; if (!fs.existsSync(data.destination)) { fs.mkdirSync(data.destination, { recursive: true }); } const result = generateStatements(model.stmts); fs.writeFileSync(generatedFilePath, JSON.stringify(result, undefined, 2)); return generatedFilePath; } And to connect it to the CLI, which is setup in src/cli/index.ts, we can register it by slightly modifying the existing generateAction endpoint that was there by default.\nexport const generateAction = async (fileName: string, opts: GenerateOptions): Promise\u0026lt;void\u0026gt; =\u0026gt; { const services = createHelloWorldServices(NodeFileSystem).HelloWorld; const model = await extractAstNode\u0026lt;Model\u0026gt;(fileName, services); // now with 'generateCommands' instead const generatedFilePath = generateCommands(model, fileName, opts.destination); console.log(chalk.green(`MiniLogo commands generated successfully: ${generatedFilePath}`)); }; Towards the bottom of the same file, we\u0026rsquo;ll modify the description for the logic that registers this action:\nprogram .command('generate') .argument('\u0026lt;file\u0026gt;', `source file (possible file extensions: ${fileExtensions})`) .option('-d, --destination \u0026lt;dir\u0026gt;', 'destination directory of generating') // new description .description('generates MiniLogo commands that can be used as simple drawing instructions') .action(generateAction); And that\u0026rsquo;s it. Now we can run the following to generate commands from a MiniLogo file of our choice.\nnpm run build ./bin/cli generate test.logo This should produce generated/test.json, which contains a JSON array of the drawing commands generated by our program. For the following example program:\ndef test() { pen(down) move(10,10) pen(up) } test() our JSON output should be:\n[ { \u0026quot;cmd\u0026quot;: \u0026quot;penDown\u0026quot; }, { \u0026quot;cmd\u0026quot;: \u0026quot;move\u0026quot;, \u0026quot;x\u0026quot;: 10, \u0026quot;y\u0026quot;: 10 }, { \u0026quot;cmd\u0026quot;: \u0026quot;penUp\u0026quot; } ] If you\u0026rsquo;re looking at the implementation of MiniLogo that we\u0026rsquo;ve already written in the Langium organization on Github, you may notice that the program and output there are slightly different. This interpretation of MiniLogo has gone through some iterations, and so there are some slight differences here and there. What\u0026rsquo;s most important is that your version produces the generated output that you expect.\nWe could continue to extend on this with new features, and generate new sorts of output using a given input language. In this tutorial, we\u0026rsquo;re able to take a MiniLogo program and convert it into some simple JSON drawing instructions that can be consumed by another program. This opens the door for us to write such a program in another language, such as Python or Javascript, and draw with these results. In later tutorials, we\u0026rsquo;ll be talking about how to run Langium in the web with generation, so that we can immediately verify our results by drawing on an HTML5 canvas.\nWe recommend that you next read the guide on bundling your language with Langium to reduce its size, before moving onto the tutorial about bundling an extension. This is an important step before deployment as an extension for VSCode, and also if you\u0026rsquo;re planning to later deploy your language in the web.\n"},{"id":4,"href":"/docs/learn/minilogo/building_an_extension/","title":"Building an Extension","parent":"Minilogo tutorial","content":" Setting up the Scripts Generate an Extension Installing Adding an Icon Conclusion In this tutorial we\u0026rsquo;ll be going over how to build a VSIX extension (VSCode extension) for your Langium-based language. This will allow providing LSP support in VSCode for your language. We\u0026rsquo;ll assume that you\u0026rsquo;ve already looked at the previous tutorial, and have had time to read the guide on bundling, so that you\u0026rsquo;re ready to build an extension. At this point we assume that your language is also working, and there are no issues running npm run langium:generate or npm run build. If there are, you\u0026rsquo;ll want to correct those first.\nSetting up the Scripts To get started, you\u0026rsquo;ll want to have a language expressed in Langium, such as Lox or MiniLogo. If you have been following along with these tutorials, you should already have something ready. If you don\u0026rsquo;t you can also use the default language generated by the yeoman generator for Langium, presented in the workflow section.\nRegardless of what you\u0026rsquo;re working with, you\u0026rsquo;ll want to make sure you have the following scripts in your package.json.\n{ ... \u0026quot;vscode:prepublish\u0026quot;: \u0026quot;npm run esbuild-base -- --minify \u0026amp;\u0026amp; npm run lint\u0026quot;, \u0026quot;esbuild-base\u0026quot;: \u0026quot;esbuild ./src/extension/main.ts --bundle --outfile=out/main.js --external:vscode --format=cjs --platform=node\u0026quot;, ... } The esbuild-base script is particularly important, as it will be constructing the extension itself.\nYou\u0026rsquo;ll also need to install esbuild if you haven\u0026rsquo;t already.\nnpm i --save-dev esbuild Generate an Extension At this point we\u0026rsquo;re ready to generate an extension. We need the VS Code Extension Manager (vsce) to do this, so make sure to download this from npm via npm install -g @vscode/vsce (or install locally, as per your preference). Once you have that installed, you can invoke it like so from the root of your project.\nvsce package You should now see a VSIX extension file in the root of your project. The name of this file will correspond with the name and version properties listed in your package.json. For MiniLogo, this produced minilogo-0.1.0.vsix.\n Installing For installing the extension, you can right click the extension file, and select \u0026ldquo;Install VSIX Extension\u0026rdquo; at the bottom of the list.\n Indication that VSIX extension has been installed You should see a small indication at the bottom right of your screen that your VSIX extension has been successfully installed, like so:\nYou can verify this by going to your extensions tab and looking at the enabled extensions, where you should find the name of your language (again corresponding to the name property in your package.json).\n Assuming the extension is enabled and working correctly, you can open any file that ends in the extensions registered for your language, and you should immediately observe the syntax highlighting kicking in. Interaction with your language should show that syntax errors are recognized, and other LSP functionalities are working as intended (such as renaming of symbols).\nAdding an Icon You may notice that your extension may not have an icon to start with. This is a small thing that we can quickly fix. This is as simple as adding a small PNG icon somewhere in your project repo, such as the root. You\u0026rsquo;ll also want to set the icon property in your package.json with the relative path to this icon.\n{ ... \u0026quot;name\u0026quot;: \u0026quot;minilogo\u0026quot;, \u0026quot;displayName\u0026quot;: \u0026quot;minilogo\u0026quot;, \u0026quot;icon\u0026quot;: \u0026quot;icon.png\u0026quot;, \u0026quot;publisher\u0026quot;: \u0026quot;TypeFox\u0026quot;, ... } In our example, we\u0026rsquo;re using a simple turtle icon from onlinewebfonts as a placeholder.\n When you regenerate your extension \u0026amp; reinstall it, you should get an icon that is the same as the one that you packaged it with.\n Conclusion And that\u0026rsquo;s it, at this point you have an extension for your language that you can use for development. After some testing, and improvements, you could even publish it!\nAs a quick aside, it\u0026rsquo;s important to keep the extensions that your language recognizes synchronized in both your package.json and your langium-config.json. If you do make changes to your extensions, it\u0026rsquo;s a good idea to double check that these are both synced up, and to do a full rebuild to get those changes into your extension.\nAnd that\u0026rsquo;s it for building an extension. In the next tutorial, we\u0026rsquo;ll be setting up Langium + Monaco in the web.\n"},{"id":5,"href":"/docs/learn/minilogo/langium_and_monaco/","title":"Langium + Monaco Editor","parent":"Minilogo tutorial","content":" Technologies You\u0026rsquo;ll Need Getting your Language Setup for the Web Factoring out File System Dependencies Setting up Monaco Setting up a Static Page Serving via NodeJS Updated on Oct. 4th, 2023 for usage with monaco-editor-wrapper 3.1.0 \u0026amp; above, as well as Langium 2.0.2\nIn this tutorial we\u0026rsquo;ll be talking about running Langium in the web with the Monaco editor. If you\u0026rsquo;re not familiar with Monaco, it\u0026rsquo;s the editor that powers VS Code. We\u0026rsquo;re quite fond of it at TypeFox, so we\u0026rsquo;ve taken the time to write up this tutorial to explain how to integrate Langium in the web with Monaco, no backend required.\nAlthough we\u0026rsquo;re using Monaco in this tutorial, that does not mean that you cannot use another code editor of your choice. For example, you can use Code Mirror with Langium as well. Generally, if an editor has LSP support, it is very likely you can integrate it easily with Langium, since it\u0026rsquo;s LSP compatible.\nWithout further ado, let\u0026rsquo;s jump into getting your web-based Langium experience setup!\nTechnologies You\u0026rsquo;ll Need Langium 2.0.2 or greater Monaco Editor Wrapper 3.1.0 or greater ESBuild 0.18.20 or greater Getting your Language Setup for the Web To begin, you\u0026rsquo;re going to need a Langium-based language to work with. We have already written MiniLogo in Langium as an example for deploying a language in the web. However, if you\u0026rsquo;ve been following along with these tutorials so far, you should be ready to move your own language into a web-based context.\nPer usual, we\u0026rsquo;ll be using MiniLogo as the motivating example here.\nFactoring out File System Dependencies In order to build for the browser, we need to create a bundle that is free of any browser-incompatible modules. To do this, let\u0026rsquo;s create a new entry point for our language server in src/language-server/main-browser.ts. This will mirror the regular entry point that we use to build already, but will target a browser-based context instead. We\u0026rsquo;ll start with the following content:\nimport { startLanguageServer, EmptyFileSystem } from 'langium'; import { BrowserMessageReader, BrowserMessageWriter, createConnection } from 'vscode-languageserver/browser.js'; // your services \u0026amp; module name may differ based on your language's name import { createMiniLogoServices } from './minilogo-module.js'; declare const self: DedicatedWorkerGlobalScope; /* browser specific setup code */ const messageReader = new BrowserMessageReader(self); const messageWriter = new BrowserMessageWriter(self); const connection = createConnection(messageReader, messageWriter); // Inject the shared services and language-specific services const { shared, MiniLogo } = createMiniLogoServices({connection, ...EmptyFileSystem }); // Start the language server with the shared services startLanguageServer(shared); Again, this is based on code that was originally produced by the yeoman generator, so it should look familiar.\nMost of this is in line with what\u0026rsquo;s contained in the main.ts file. The exceptions being the message readers \u0026amp; writers, and the notion of an EmptyFileSystem for the browser. There is a virtual file system API that we could utilize on most modern browsers, but for this tutorial we\u0026rsquo;ll assume we aren\u0026rsquo;t using any file system. Instead we\u0026rsquo;ll have a single source \u0026lsquo;file\u0026rsquo; located in memory.\nWe\u0026rsquo;ll also need to include a library to resolve the missing DedicatedWorkerGlobalScope, which is normally not accessible until we update our tsconfig.json in our project root. We need to supplement the libs entry with DOM and WebWorker. From the yeoman generator example, the lib entry usually has just [\u0026quot;ESNext\u0026quot;].\n{ \u0026quot;compilerOptions\u0026quot;: { ... \u0026quot;lib\u0026quot;: [\u0026quot;ESNext\u0026quot;, \u0026quot;DOM\u0026quot;, \u0026quot;WebWorker\u0026quot;] } } Now that we have a new entry point for the browser, we need to add a script to our package.json to build a web worker for this language. The bundle this script produces will contain the language server for your language. The following script example is specific to MiniLogo, but should capture the general approach quite nicely:\n{ ... \u0026quot;build:worker\u0026quot;: \u0026quot;esbuild --minify ./out/language-server/main-browser.js --bundle --format=iife --outfile=./public/minilogo-server-worker.js\u0026quot;, } Assuming esbuild is installed, and we\u0026rsquo;ve properly factored out any modules that are not suitable for a browser-based context, we should be good to go!\nRunning npm run build:worker we should see the bundle is successfully generated without issue. If you\u0026rsquo;re still having problems building the worker, double check that you\u0026rsquo;re not coupled to fs or other file system dependent modules in a related file.\nNote that although our generator is still connected to using the file system, it\u0026rsquo;s not relevant for the worker bundle to function.\nSetting up Monaco Now we\u0026rsquo;re going to setup Monaco, but not with Langium yet, as we want to be sure it\u0026rsquo;s working first before connecting the two.\nFor convenience, we\u0026rsquo;re going to use the Monaco Editor Wrapper (MER) to wrap around some of Monaco\u0026rsquo;s core functionality, along with the Monaco Editor Workers package to assist. These packages are both maintained by TypeFox, and are designed to make it easier to use Monaco in a web-based context. We\u0026rsquo;ll be using the following versions of these packages:\n Monaco Editor Wrapper version 3.1.0 monaco-editor-workers version 0.39.0 Both these packages should be installed as dependencies for your language. In particular, this guide will assume that you\u0026rsquo;re using version 3.1.0 or later of the monaco-editor-wrapper package, and version 0.39.0 of the monaco-editor-workers package.\nAdditionally, we\u0026rsquo;ll want a way to serve this bundled language server. The choice of how you want to go about this is ultimately up to you. Previously we\u0026rsquo;ve recommended express as a development dependency (don\u0026rsquo;t forget to also add @types/express too), as a powerful \u0026amp; lightweight NodeJS server framework. However, we\u0026rsquo;ll be going with the built-in NodeJS support for standing up a web-server; however again the choice is yours here.\nWe\u0026rsquo;ll also want to add some more scripts to our package.json to copy over the necessary files from the monaco-editor-wrapper \u0026amp; monaco-editor-worker into the public folder. We\u0026rsquo;ll be referencing these library assets to setup the webpage for Langium + Monaco.\n{ ... \u0026quot;prepare:public\u0026quot;: \u0026quot;node scripts/prepare-public.mjs\u0026quot;, \u0026quot;build:web\u0026quot;: \u0026quot;npm run build \u0026amp;\u0026amp; npm run prepare:public \u0026amp;\u0026amp; npm run build:worker \u0026amp;\u0026amp; node scripts/copy-monaco-assets.mjs\u0026quot;, } Both scripts reference mjs files that need to be added as well into the scripts folder:\nscripts/prepare-public.mjs\nimport * as esbuild from 'esbuild' import shell from 'shelljs' // setup \u0026amp; copy over css \u0026amp; html to public shell.mkdir('-p', './public'); shell.cp('-fr', './src/static/*.css', './public/'); shell.cp('-fr', './src/static/*.html', './public'); // bundle minilogo.ts, and also copy to public await esbuild.build({ entryPoints: ['./src/static/minilogo.ts'], minify: true, sourcemap: true, bundle: true, outfile: './public/minilogo.js', }); scripts/copy-monaco-assets.mjs\nimport shell from 'shelljs' // copy workers to public shell.mkdir('-p', './public/monaco-editor-workers/workers'); shell.cp( '-fr', './node_modules/monaco-editor-workers/dist/index.js', './public/monaco-editor-workers/index.js' ); shell.cp( '-fr', './node_modules/monaco-editor-workers/dist/workers/editorWorker-es.js', './public/monaco-editor-workers/workers/editorWorker-es.js' ); shell.cp( '-fr', './node_modules/monaco-editor-workers/dist/workers/editorWorker-iife.js', './public/monaco-editor-workers/workers/editorWorker-iife.js' ); This saves us from writing these extra details into our package json, and focusing on the overall goal each step.\nThe last script, build:web is there to provide a convenient way to invoke all the intermediate build steps in sequence. However you\u0026rsquo;ll want to wait before running the build:web script, as we still need to add our static assets to make that work; which will come in the next step.\nAs a quick note, if you went with another editor you would want to make sure that the assets required for that editor will also be copied into public folder as part of your output.\nSetting up a Static Page And now for the actual HTML page itself, plus it\u0026rsquo;s supporting assets. To keep things organized, we\u0026rsquo;re splitting up the JS and CSS. We\u0026rsquo;ll be putting all of these files into a new location from our project root, src/static/.\nHere\u0026rsquo;s the raw contents of the HTML content stored in src/static/index.html. This will serve as a frame for Monaco to be setup within.\n\u0026lt;!DOCTYPE html\u0026gt; \u0026lt;html\u0026gt; \u0026lt;head\u0026gt; \u0026lt;meta charset='utf-8'\u0026gt; \u0026lt;!-- Page \u0026amp; Monaco styling --\u0026gt; \u0026lt;link href=\u0026quot;styles.css\u0026quot; rel=\u0026quot;stylesheet\u0026quot;/\u0026gt; \u0026lt;title\u0026gt;MiniLogo in Langium\u0026lt;/title\u0026gt; \u0026lt;/head\u0026gt; \u0026lt;body\u0026gt; \u0026lt;h1\u0026gt;MiniLogo in Langium\u0026lt;/h1\u0026gt; \u0026lt;!-- Use a wrapper to display Monaco + Canvas side-by-side --\u0026gt; \u0026lt;div id=\u0026quot;page-wrapper\u0026quot;\u0026gt; \u0026lt;!-- Monaco half --\u0026gt; \u0026lt;div class=\u0026quot;half\u0026quot;\u0026gt; \u0026lt;div class=\u0026quot;wrapper\u0026quot;\u0026gt; \u0026lt;div id=\u0026quot;monaco-editor-root\u0026quot;\u0026gt;\u0026lt;/div\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;!-- Canvas half --\u0026gt; \u0026lt;div class=\u0026quot;half\u0026quot;\u0026gt; \u0026lt;canvas id='minilogo-canvas' width=500 height=600\u0026gt;\u0026lt;/canvas\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;!-- Status message location --\u0026gt; \u0026lt;div style=\u0026quot;text-align:center\u0026quot;\u0026gt; \u0026lt;span id=\u0026quot;status-msg\u0026quot;\u0026gt;\u0026lt;/span\u0026gt; \u0026lt;/div\u0026gt; \u0026lt;br/\u0026gt; \u0026lt;footer\u0026gt; \u0026lt;br/\u0026gt; \u0026lt;p style=\u0026quot;font-style:italic\u0026quot;\u0026gt;Powered by\u0026lt;/p\u0026gt; \u0026lt;img width=\u0026quot;125\u0026quot; src=\u0026quot;https://langium.org/assets/langium_logo_w_nib.svg\u0026quot; alt=\u0026quot;Langium\u0026quot;\u0026gt; \u0026lt;/footer\u0026gt; \u0026lt;!-- Monaco Configuration --\u0026gt; \u0026lt;script type=\u0026quot;module\u0026quot; src=\u0026quot;minilogo.js\u0026quot;\u0026gt;\u0026lt;/script\u0026gt; \u0026lt;/body\u0026gt; \u0026lt;/html\u0026gt; And here\u0026rsquo;s the associated CSS stored in src/static/styles.css. This will style Monaco correctly so it renders as expected.\nhtml,body { background: rgb(33,33,33); font-family: 'Lucida Sans', 'Lucida Sans Regular', 'Lucida Grande', 'Lucida Sans Unicode', Geneva, Verdana, sans-serif; color: white; /* for monaco */ margin: 0; padding: 0; width: 100%; height: 100%; } h1 { text-align: center; } #minilogo-canvas { display: block; margin: 8px auto; text-align: center; } #page-wrapper { display: flex; max-width: 2000px; margin: 4px auto; padding: 4px; min-height: 75vh; justify-content: center; } #page-wrapper .half { display: flex; width: 40vw; } .build { display: block; margin: 8px auto; width: 300px; height: 30px; background: none; border: 2px #fff solid; color: #fff; transition: 0.3s; font-size: 1.2rem; border-radius: 4px; } .build:hover { border-color: #6cf; color: #6cf; cursor: pointer; } .build:active { color: #fff; border-color: #fff; } footer { text-align: center; color: #444; font-size: 1.2rem; margin-bottom: 16px; } @media(max-width: 1000px) { #page-wrapper { display: block; } #page-wrapper .half { display: block; width: auto; } #minilogo-canvas { margin-top: 32px; } #page-wrapper { min-height: auto; } } /* for monaco */ .wrapper { display: flex; flex-direction: column; height: 100%; width: 100%; } #monaco-editor-root { flex-grow: 1; } #status-msg { color: red; } Finally, there\u0026rsquo;s the actual Javascript setting up our Monaco instance (stored in src/static/minilogo.ts), and for setting up Langium as well. This is the most complex part of setting up Langium + Monaco in the web, so we\u0026rsquo;ll walk through the file in parts.\n(Update on Oct. 4th, 2023: Previously we wrote this as src/static/setup.js. This new file can be considered the same, but reworked into TypeScript \u0026amp; updated for the new versions of Langium \u0026amp; the MER.)\nFirst, we need to import and setup the worker, as well as some language client wrapper configuration.\nimport { MonacoEditorLanguageClientWrapper, UserConfig } from \u0026quot;monaco-editor-wrapper/bundle\u0026quot;; import { buildWorkerDefinition } from \u0026quot;monaco-editor-workers\u0026quot;; import { addMonacoStyles } from 'monaco-editor-wrapper/styles'; /** * Setup Monaco's own workers and also incorporate the necessary styles for the monaco-editor */ function setup() { buildWorkerDefinition( './monaco-editor-workers/workers', new URL('', window.location.href).href, false ); addMonacoStyles('monaco-editor-styles'); } Then, we\u0026rsquo;ll want to instantiate our language client wrapper. In previous versions of the monaco-editor-wrapper package (before 2.0.0), configuration was performed by manually setting properties on the MonacoEditorLanguageClientWrapper instance. However, as of 3.1.0 (at the time of writing this), the constructor for MonacoEditorLanguageClientWrapper now takes a configuration object as its first argument. This configuration object allows us to set the same properties as before, but with more fine-grained control over all the properties that are set.\nWe\u0026rsquo;re going to walk through the parts that will be used to build up this configuration first, and then joining the actual configuration object together afterwards.\nTo start, let\u0026rsquo;s keep in mind that our current language id will be minilogo. This should match the id of the language that will be recognized by our language server.\nThen, we\u0026rsquo;ll want to add some static syntax highlighting. To do this we have a couple choices, using a TextMate or a Monarch grammar. Both will provide us with the ability to parse our language, and apply styling to our tokens. However we have to choose one, we cannot use both simultaneously. This is related to how Monaco itself is configured with regards to whether we\u0026rsquo;re using the VSCode API config, or the classic editor config. This makes sense to a degree, as we can only prepare the editor one way or the other.\nFor MiniLogo, our monarch grammar will look like so:\n/** * Returns a Monarch grammar definition for MiniLogo */ function getMonarchGrammar() { return { keywords: [ 'color','def','down','for','move','pen','to','up' ], operators: [ '-',',','*','/','+','=' ], symbols: /-|,|\\(|\\)|\\{|\\}|\\*|\\/|\\+|=/, tokenizer: { initial: [ { regex: /#(\\d|[a-fA-F]){3,6}/, action: {\u0026quot;token\u0026quot;:\u0026quot;string\u0026quot;} }, { regex: /[_a-zA-Z][\\w_]*/, action: { cases: { '@keywords': {\u0026quot;token\u0026quot;:\u0026quot;keyword\u0026quot;}, '@default': {\u0026quot;token\u0026quot;:\u0026quot;string\u0026quot;} }} }, { regex: /(?:(?:-?[0-9]+)?\\.[0-9]+)|-?[0-9]+/, action: {\u0026quot;token\u0026quot;:\u0026quot;number\u0026quot;} }, { include: '@whitespace' }, { regex: /@symbols/, action: { cases: { '@operators': {\u0026quot;token\u0026quot;:\u0026quot;operator\u0026quot;}, '@default': {\u0026quot;token\u0026quot;:\u0026quot;\u0026quot;} }} }, ], whitespace: [ { regex: /\\s+/, action: {\u0026quot;token\u0026quot;:\u0026quot;white\u0026quot;} }, { regex: /\\/\\*/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;,\u0026quot;next\u0026quot;:\u0026quot;@comment\u0026quot;} }, { regex: /\\/\\/[^\\n\\r]*/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;} }, ], comment: [ { regex: /[^\\/\\*]+/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;} }, { regex: /\\*\\//, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;,\u0026quot;next\u0026quot;:\u0026quot;@pop\u0026quot;} }, { regex: /[\\/\\*]/, action: {\u0026quot;token\u0026quot;:\u0026quot;comment\u0026quot;} }, ], } }; } We can produce this Monarch grammar by updating our langium-config.json to produce a Monarch file as output. Note that although we\u0026rsquo;re talking about MiniLogo here, we based this example off of the hello-world example produced by the yeoman generator. As such, we still have hello world names here and there, and for this tutorial we\u0026rsquo;ll just use the same name again as for the TextMate grammar.\n... \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/minilogo.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/minilogo.monarch.ts\u0026quot; } To generate this file, run npm run langium:generate. You can then copy over the definition of the grammar from syntaxes/hello-world.monarch.ts (or whatever other name you have given this file). Keep in mind that this generated monarch grammar is very simple. If you want more complex highlighting, we recommend writing your own custom monarch grammar, and storing it somewhere else to prevent it from being overridden. If you\u0026rsquo;re interested, you can find more details about the Monarch grammar highlighting language here.\nThen, we want to setup the code that shows up by default. The following is a fixed MiniLogo program that should display a white diamond in the top left corner of the screen.\n/** * Retrieves the program code to display, either a default or from local storage */ function getMainCode() { let mainCode = ` def test() { move(100, 0) pen(down) move(100, 100) move(-100, 100) move(-100, -100) move(100, -100) pen(up) } color(white) test() `; // optionally: use local storage to save the code // and seek to restore any previous code from our last session if (window.localStorage) { const storedCode = window.localStorage.getItem('mainCode'); if (storedCode !== null) { mainCode = storedCode; } } return mainCode; } Since we\u0026rsquo;re planning to use a language server with Monaco, we\u0026rsquo;ll need to setup a language client config too. To do this we\u0026rsquo;ll also need to generate a worker using our language server worker file, but that\u0026rsquo;s fairly straightforward to setup here. Keep in mind that you\u0026rsquo;ll need to have access to the bundle produced from your main-browser.ts from before. Here the built result is copied over as public/minilogo-server-worker.js.\n/** * Creates \u0026amp; returns a fresh worker using the MiniLogo language server */ function getWorker() { const workerURL = new URL('minilogo-server-worker.js', window.location.href); return new Worker(workerURL.href, { type: 'module', name: 'MiniLogoLS' }); } By creating the worker in advance, we give ourselves the ability to directly interact with the worker/LS independent of the wrapper itself, and to even pre-configure it before use. This can be hugely beneficial, especially if we expect to customize our LS on the fly.\nLastly, let\u0026rsquo;s setup the user config, which will be used to startup the wrapper.\ntype WorkerUrl = string; /** * Classic configuration for the monaco editor (for use with a Monarch grammar) */ interface ClassicConfig { code: string, htmlElement: HTMLElement, languageId: string, worker: WorkerUrl | Worker, monarchGrammar: any; } /** * Generates a valid UserConfig for a given Langium example * * @param config An extended or classic editor config to generate a UserConfig from * @returns A completed UserConfig */ function createUserConfig(config: ClassicConfig): UserConfig { // setup urls for config \u0026amp; grammar const id = config.languageId; // generate langium config return { htmlElement: config.htmlElement, wrapperConfig: { editorAppConfig: { $type: 'classic', languageId: id, useDiffEditor: false, code: config.code, theme: 'vs-dark', languageDef: config.monarchGrammar }, serviceConfig: { enableModelService: true, configureConfigurationService: { defaultWorkspaceUri: '/tmp/' }, enableKeybindingsService: true, enableLanguagesService: true, debugLogging: false } }, languageClientConfig: { options: { $type: 'WorkerDirect', worker: config.worker as Worker, name: `${id}-language-server-worker` } } }; } This particular UserConfig will be for configuring a classic editor, rather than a VSCode extension-based editor. This is because we\u0026rsquo;re using a Monarch grammar, which is not supported by the extension configuration. However, if we wanted to use a TextMate grammar, we could use the extension based configuration instead.\neditorAppConfig: { $type: 'vscodeApi', languageId: id, useDiffEditor: false, code: config.code, ... } You would just need to fill in the rest of the details for associating a TextMate grammar \u0026amp; such. Here\u0026rsquo;s an example from the monaco-components repo.\nRegardless of how the user config is setup, we can now invoke that helper function with a handful of configuration details, and have a working UserConfig to pass to the wrapper.\n// create a wrapper instance const wrapper = new MonacoEditorLanguageClientWrapper(); // start up with a user config await wrapper.start(createUserConfig({ htmlElement: document.getElementById(\u0026quot;monaco-editor-root\u0026quot;)!, languageId: 'minilogo', code: getMainCode(), worker: getWorker(), monarchGrammar: getMonarchGrammar() })); That\u0026rsquo;s it! Now if everything was configured correctly, we should have a valid wrapper that will display the code we want in our browser.\nServing via NodeJS Now that we have our files all setup, and our build process prepared, we can put together a mini server application to make viewing our public assets easy. We\u0026rsquo;ll do this by adding src/web/app.ts to our project, and giving it the following contents:\n/** * Simple server app for serving generated examples locally * Based on: https://developer.mozilla.org/en-US/docs/Learn/Server-side/Node_server_without_framework */ import * as fs from \u0026quot;node:fs\u0026quot;; import * as http from \u0026quot;node:http\u0026quot;; import * as path from \u0026quot;node:path\u0026quot;; const port = 3000; const MIME_TYPES: Record\u0026lt;string,string\u0026gt; = { default: \u0026quot;application/octet-stream\u0026quot;, html: \u0026quot;text/html; charset=UTF-8\u0026quot;, js: \u0026quot;application/javascript\u0026quot;, css: \u0026quot;text/css\u0026quot;, }; const STATIC_PATH = path.join(process.cwd(), \u0026quot;./public\u0026quot;); const toBool = [() =\u0026gt; true, () =\u0026gt; false]; const prepareFile = async (url: string) =\u0026gt; { const paths = [STATIC_PATH, url]; if (url.endsWith(\u0026quot;/\u0026quot;)) { paths.push(\u0026quot;index.html\u0026quot;); } const filePath = path.join(...paths); const pathTraversal = !filePath.startsWith(STATIC_PATH); const exists = await fs.promises.access(filePath).then(...toBool); const found = !pathTraversal \u0026amp;\u0026amp; exists; // there's no 404, just redirect to index.html in all other cases const streamPath = found ? filePath : STATIC_PATH + \u0026quot;/index.html\u0026quot;; const ext = path.extname(streamPath).substring(1).toLowerCase(); const stream = fs.createReadStream(streamPath); return { found, ext, stream }; }; http .createServer(async (req, res) =\u0026gt; { const file = await prepareFile(req.url!); const statusCode = file.found ? 200 : 404; const mimeType: string = MIME_TYPES[file.ext] || MIME_TYPES.default; res.writeHead(statusCode, { \u0026quot;Content-Type\u0026quot;: mimeType }); file.stream.pipe(res); console.log(`${req.method} ${req.url} ${statusCode}`); }) .listen(port); console.log(`Server for MiniLogo assets listening on http://localhost:${port}`); If you would like to compact this, and don\u0026rsquo;t mind adding additional deps to your project, you can include express and @types/express to your project, and use the following code instead:\n/** * Simple express app for serving generated examples */ import express from 'express'; const app = express(); const port = 3000; app.use(express.static('./public')); app.listen(port, () =\u0026gt; { console.log(`Server for MiniLogo assets listening on http://localhost:${port}`); }); And to invoke the server, we need to add one more script to our package.json.\n{ ... \u0026quot;serve\u0026quot;: \u0026quot;node ./out/web/app.js\u0026quot; } That\u0026rsquo;s it! Now we can build all the assets, and run express to be able to view our demo of Langium in the web from localhost:3000.\nnpm run build:web npm run serve You should be greeted with a page that contains a working Monaco instance and a small MiniLogo program in the editor. This editor has the highlighting we would expect, and also is fully connected to the language server for our language. This means we have full LSP support for operations that we would expect to have in a native IDE, such as VSCode.\nAnd that\u0026rsquo;s it, we have successfully implemented Langium + Monaco in the web for our language. It\u0026rsquo;s not doing much at this time besides presenting us with an editor, but in the next tutorial we\u0026rsquo;ll talk about using the same setup to add generation in the web. Since our generation has already been configured natively in prior tutorials, we can use what we\u0026rsquo;ve written to quickly implement a web application that translates MiniLogo programs into drawing instructions for an HTML5 canvas.\n"},{"id":6,"href":"/docs/learn/minilogo/generation_in_the_web/","title":"Generation in the Web","parent":"Minilogo tutorial","content":" Handling Document Validations Listening for Notifications in the Client Interpreting Draw Commands (Drawing) Updated on Oct. 4th, 2023 for usage with monaco-editor-wrapper 3.1.0 \u0026amp; above.\nIn this tutorial we\u0026rsquo;ll be talking about how to perform generation in the web by listening for document builder notifications. There are multiple ways to hook into Langium to utilize the generator, such as by directly exporting the generator API. However, by listening to notifications from the document builder, we can do this with less code. This lets us quickly integrate new functionality into our existing Langium + Monaco integration, and focus more on what we would want to do with the generated output.\n(This tutorial previously utilized custom LSP commands to achieve the same goal of generation. This is still a valid approach, but we\u0026rsquo;ve found setting up listening for notifications this way is much more straightforward. We\u0026rsquo;ve implemented this in our own example languages as well, and would recommend it going forward.)\nWe\u0026rsquo;ll assume that you\u0026rsquo;ve already looked over most of the other tutorials at this point. It is particularly important that you have a language with working generation, and have a working instance of Langium + Monaco for your language (or another editor of your choice). In the case that you don\u0026rsquo;t have a language to work with, you can follow along with MiniLogo, which is the example language used throughout many of these tutorials.\nSince we\u0026rsquo;re working with MiniLogo here, we already know that our generated output is in the form of drawing instructions that transform some drawing context. The generated output that we\u0026rsquo;ve implemented so far consists of a JSON array of commands, making it very easy to interpret. Now that we\u0026rsquo;re working in a web-based context, this approach lends itself naturally towards manipulating an HTML5 canvas.\nThe parts that we still need to setup are:\n handle document validations, and generate notifications with our generator output listen for these notifications in the client, and extract the generated output interpret the generated output as drawing commands, and update the canvas Handling Document Validations This is the first step we\u0026rsquo;ll need, since without being able to generate notifications in the first place we would have nothing to listen to.\nThankfully a lot of the groundwork has already been done in previous tutorials, as well as within Langium itself. We just need to setup the an onBuildPhase listener for the document builder in our LS. Using the LS entry point main-browser.ts that we setup in the last tutorial on Langium + Monaco, we can add the following code to the end of our startLanguageServer function.\n// modified import from the previous tutorial: Langium + Monaco import { BrowserMessageReader, BrowserMessageWriter, Diagnostic, NotificationType, createConnection } from 'vscode-languageserver/browser.js'; // additional imports import { Model } from './generated/ast.js'; import { Command, getCommands } from './minilogo-actions.js'; import { generateStatements } from '../generator/generator.js'; // startLanguageServer... // Send a notification with the serialized AST after every document change type DocumentChange = { uri: string, content: string, diagnostics: Diagnostic[] }; const documentChangeNotification = new NotificationType\u0026lt;DocumentChange\u0026gt;('browser/DocumentChange'); // use the built-in AST serializer const jsonSerializer = MiniLogo.serializer.JsonSerializer; // listen on fully validated documents shared.workspace.DocumentBuilder.onBuildPhase(DocumentState.Validated, documents =\u0026gt; { // perform this for every validated document in this build phase batch for (const document of documents) { const model = document.parseResult.value as Model; let json: Command[] = []; // only generate commands if there are no errors if(document.diagnostics === undefined || document.diagnostics.filter((i) =\u0026gt; i.severity === 1).length === 0 ) { json = generateStatements(model.stmts); } // inject the commands into the model // this is safe so long as you careful to not clobber existing properties // and is incredibly helpful to enrich the feedback you get from the LS per document (model as unknown as {$commands: Command[]}).$commands = json; // send the notification for this validated document, // with the serialized AST + generated commands as the content connection.sendNotification(documentChangeNotification, { uri: document.uri.toString(), content: jsonSerializer.serialize(model, { sourceText: true, textRegions: true }), diagnostics: document.diagnostics ?? [] }); } }); And that\u0026rsquo;s it for setting up the onBuildPhase listener itself. We still need to address the usage of generateMiniLogoCmds, which is tied to the LS implementation.\nBased on the work done in previous tutorials, we already have set up a working generator with MinLogo. If you haven\u0026rsquo;t already set this up you can go back to the tutorial on generation and give it a look over. Ideally, we\u0026rsquo;ll already have setup our generateStatements function for MiniLogo, meaning so long as the imported module doesn\u0026rsquo;t have any modules that are browser incompatible, we should be able to use it as is. Based on the previous setup however, we should have a generator.js file that is free of such conflicts, as much of them should be separated into the cli directly.\nThis saves us quite a bit of time, since we don\u0026rsquo;t need to handle setting up \u0026amp; dispatching a document for validation, we simply tap into the existing workflow and collect the result when it\u0026rsquo;s ready. This is a great example of how Langium\u0026rsquo;s architecture allows us to easily extend existing functionality, and add new features without having to rewrite existing code.\nAs a concluding note for this section, don\u0026rsquo;t forget to rebuild your language server bundle! It might not be a bad idea to clean as well, just to be sure everything is working as expected at this step.\nListening for Notifications in the Client The next step we need to make is to actually listen for these notifications from the client\u0026rsquo;s end. This takes us back to the Langium + Monaco setup in the previous tutorial.\nAfter starting the wrapper successfully, we want to retrieve the MonacoLanguageClient instance (a wrapper around the language client itself) and listen for browser/DocumentChange notifications.\n// wrapper has started... // get the language client const client = wrapper.getLanguageClient(); if (!client) { throw new Error('Unable to obtain language client!'); } // listen for document change notifications client.onNotification('browser/DocumentChange', onDocumentChange); function onDocumentChange(resp: any) { let commands = JSON.parse(resp.content).$commands; // ... do something with these commands } Now this works, but when do we receive notifications, and how often? Well a good thing you asked, because if you started this up and began editing your program, you would be receiving a notification for every single change! Including whitespace changes. Now that\u0026rsquo;s probably not what we\u0026rsquo;re looking for, but the content is correct, we just want to slow it down a bit. We can do this by setting a timeout and a semaphore to prevent multiple notifications from being processed at once.\nlet running = false; let timeout: number | null = null; function onDocumentChange(resp: any) { // block until we're finished with a given run if (running) { return; } // clear previous timeouts if (timeout) { clearTimeout(timeout); } timeout = window.setTimeout(async () =\u0026gt; { running = true; let commands = JSON.parse(resp.content).$commands; await updateMiniLogoCanvas(commands); running = false; }, 200); // delay of 200ms is arbitrary, choose what makes the most sense in your use case } And now we have a nice delay where repeated updates are discarded, until we have about 200ms without a subsequent update. That allows us to take the commands we\u0026rsquo;re working with, and start doing something with them. The semaphore will prevent following updates from overriding the current run, allowing it to finish before starting a new execution.\nYou may have also noticed we added updateMiniLogoCanvas as the action to perform with our commands. This will be implemented in the next step, where we interpret our drawing commands.\nThat\u0026rsquo;s it for listening for notifications! Now that we have our commands extracted, we\u0026rsquo;ll can actually perform a series of drawing actions on an HTML5 canvas.\nInterpreting Draw Commands (Drawing) If you\u0026rsquo;ve gotten to this point then you\u0026rsquo;re on the final stretch! The last part we need to implement is the actual logic that takes our drawing commands and updates the canvas. This logic will be the content of the updateMiniLogoCanvas function, and we\u0026rsquo;ll walk through each step here.\nFirst, let\u0026rsquo;s get a handle on our canvas, as well as the associated 2D context.\nconst canvas : HTMLCanvasElement | null = document.getElementById('minilogo-canvas') as HTMLCanvasElement | null; if (!canvas) { throw new Error('Unable to find canvas element!'); } const context = canvas.getContext('2d'); if (!context) { throw new Error('Unable to get canvas context!'); } We\u0026rsquo;ll also want to clean up the context, in case we already drew something there before. This will be relevant when we\u0026rsquo;re updating the canvas multiple times with a new program.\ncontext.clearRect(0, 0, canvas.width, canvas.height); Next, we want to setup a background grid to display. It\u0026rsquo;s not essential for drawing, but it looks nicer than an empty canvas.\ncontext.beginPath(); context.strokeStyle = '#333'; for (let x = 0; x \u0026lt;= canvas.width; x+=(canvas.width / 10)) { context.moveTo(x, 0); context.lineTo(x, canvas.height); } for (let y = 0; y \u0026lt;= canvas.height; y+=(canvas.height / 10)) { context.moveTo(0, y); context.lineTo(canvas.width, y); } context.stroke(); After drawing a grid, let\u0026rsquo;s reset the stroke to a white color.\ncontext.strokeStyle = 'white'; Let\u0026rsquo;s also setup some initial drawing state. This will be used to keep track of the pen state, and where we are on the canvas.\n// maintain some state about our drawing context let drawing = false; let posX = 0; let posY = 0; And let\u0026rsquo;s begin evaluating each of our commands. To do this, we\u0026rsquo;ll setup an interval that repeatedly shifts the top element from our list of commands, evaluates it, and repeats. Once we\u0026rsquo;re out of commands to evaluate, we\u0026rsquo;ll clear the interval. The whole invocation will be wrapped in a promise, to make it easy to await later on. Feel free to adjust the delay (or remove it entirely) in your version.\nconst doneDrawingPromise = new Promise((resolve) =\u0026gt; { // use the command list to execute each command with a small delay const id = setInterval(() =\u0026gt; { if (cmds.length \u0026gt; 0) { dispatchCommand(cmds.shift() as MiniLogoCommand, context); } else { // finish existing draw if (drawing) { context.stroke(); } clearInterval(id); resolve(''); } }, 1); }); dispatchCommand itself only needs to handle 4 cases:\n penUp penDown move color Knowing this, and the details about what properties each command type can have, we can evaluate each command and update our context. This can be done with a switch and a case for each command type.\nBe sure to add this function inside the updateMiniLogoCanvas function, otherwise it will not have access to the necessary state!\n// dispatches a single command in the current context function dispatchCommand(cmd: MiniLogoCommand, context: CanvasRenderingContext2D) { if (cmd.name) { switch (cmd.name) { // pen is lifted off the canvas case 'penUp': drawing = false; context.stroke(); break; // pen is put down onto the canvas case 'penDown': drawing = true; context.beginPath(); context.moveTo(posX, posY); break; // move across the canvas // will draw only if the pen is 'down' case 'move': const x = cmd.args.x; const y = cmd.args.y; posX += x; posY += y; if (!drawing) { // move, no draw context.moveTo(posX, posY); } else { // move \u0026amp; draw context.lineTo(posX, posY); } break; // set the color of the stroke case 'color': if ((cmd.args as { color: string }).color) { // literal color or hex context.strokeStyle = (cmd.args as { color: string }).color; } else { // literal r,g,b components const args = cmd.args as { r: number, g: number, b: number }; context.strokeStyle = `rgb(${args.r},${args.g},${args.b})`; } break; // fallback in case we missed an instruction default: throw new Error('Unrecognized command received: ' + JSON.stringify(cmd)); } } } Now that we can interpret commands into drawing instructions, we\u0026rsquo;re effectively done with setting up the last part of MiniLogo. Since we\u0026rsquo;re listening to document updates, we don\u0026rsquo;t need to do anything other than to just start it up and start with an example program.\nThat\u0026rsquo;s it, we\u0026rsquo;re all done writing up our TS file. We should now be able to run the following (assuming the generator script is also executed by build:web), and get our results in localhost:3000.\nnpm run build:web npm run serve If all went well, you should see a white diamond sketched out on the canvas when the page loads. If not, double check that you receive \u0026amp; use the code value correctly in your createUserConfig function. You can also add the program yourself from here:\ndef test() { move(100, 0) pen(down) move(100, 100) move(-100, 100) move(-100, -100) move(100, -100) pen(up) } color(white) test() Once you have something drawing on the screen, you\u0026rsquo;re all set, congratulations! You\u0026rsquo;ve just successfully written your own Langium-based language, deployed it in the web, and hooked up generation to boot. In fact, you\u0026rsquo;ve done quite a lot if you\u0026rsquo;ve gone through all of these tutorials so far.\n writing your own grammar implementing custom validation customizing your CLI adding generation configuring code bundling building an extension setting up Langium + Monaco in the web adding a document build phase listener listening for notifications in the client, and using the results And the concepts that we\u0026rsquo;ve gone over from the beginning to now are not just for MiniLogo of course, they can be easily generalized to work for your own language as well. As you\u0026rsquo;ve been going through these tutorials, we hope that you\u0026rsquo;ve been thinking about how you could have done things differently too. Whether a simple improvement, or another approach, we believe it\u0026rsquo;s this creative kind of thinking that takes an idea of a language and really allows it to grow into something great.\nOne easy note is how the example code shown in these tutorials was designed to be easy to demonstrate. It could definitely be improved with better error checking, better logic, generator optimizations, etc; something to keep in mind.\nIt\u0026rsquo;s also easy to imagine how one could extend their generator to produce their own functionality besides drawing. For example, imagine that you might have multiple generator targets, as there is no requirement to have a single generator output form like we\u0026rsquo;ve done in these tutorials. You could add as many different output forms as you need for each specific target, and even share some functionality between generators.\nWe hope that these tutorials have given you a practical demonstration of how to construct a language in Langium, and facilitated further exploration into more advanced topics \u0026amp; customizations. If you\u0026rsquo;re interested about learning more about Langium, you can continue through our other tutorials, reach out to us via discussions on Github, or continue working on your Langium-based language.\n"},{"id":7,"href":"/docs/reference/glossary/","title":"Glossary","parent":"Reference","content":"Anyone who is new to DSL development should carefully read the following primer on the terms we are using in our documentation:\nabstract syntax tree: A tree of elements that represents a text document. Each element is a simple JS object that combines multiple input tokens into a single object. Commonly abbreviated as AST.\ndocument: An abstract term to refer to a text file on your file system or an open editor document in your IDE.\ngrammar: Defines the form of your language. In Langium, a grammar is also responsible for describing how the AST is built.\nparser: A program that takes a document as its input and computes an abstract syntax tree as its output.\nparser rule: A parser rule describes how a certain AST element is supposed to be parsed. This is done by invoking other parser rules or terminals.\nterminal: A terminal is the smallest parsable part of a document. It usually represents small pieces of text like names, numbers, keywords or comments.\ntoken: A token is a substring of the document that matches a certain terminal. It contains information about which kind of terminal it represents as well as its location in the document.\n"},{"id":8,"href":"/docs/recipes/lexing/","title":"Lexing","parent":"Recipes","content":""},{"id":9,"href":"/docs/recipes/lexing/case-insensitive-languages/","title":"Case-insensitive languages","parent":"Lexing","content":"Some programming languages such as SQL or Structured Text use case insensitivity to provide more flexibility when writing code. For example most SQL databases accept select statements starting with select, SELECT or even SeLeCt.\nIn case you want to provide your users the same flexibility with your language, there are different levels of case-insensitivity in Langium:\n You can make Langium\u0026rsquo;s parser completely case insensitive using the language configuration You can include case-insensitivity for specific terminal rules You can make cross references case insensitive All of these options can be enabled independent of one another.\nCase-insensitivity by configuration To make Langium case-insensitive, you have to set the caseInsensitive option to true in the LangiumConfig object which is located in the langium-config.json file at the root of your Langium project. You can set this up for every single language.\n{ ... \u0026quot;languages\u0026quot;: [ { \u0026quot;id\u0026quot;: \u0026quot;hello-world\u0026quot;, \u0026quot;caseInsensitive\u0026quot;: true, // \u0026lt;-- makes the specified language case insensitive ... }, ... ], ... } Case-insensitivity on demand If you want to include case-insensitivity only where you need it, you can use the i flag inside of your grammar\u0026rsquo;s regular expressions\n// append `i` to any regex to make it case insensitive terminal ID: /[A-Z]/i; Note that regular expressions can only be used inside of terminal rules.\nCase-insensitivity for identifiers and cross-references But be aware of that both ways will only take care of all the keywords in your grammar. If you want identifiers and cross-references to be case-insensitive as well, you have to adjust your scoping for each cross-reference case. This can be accomplished by setting the caseInsensitive option to true within the options when you are creating a new scope object.\nThere are several implementations of scopes. MapScope is very commonly used:\nnew MapScope(descriptions, parentScope, { caseInsensitive: true }); "},{"id":10,"href":"/docs/reference/grammar-language/","title":"Grammar Language","parent":"Reference","content":" Language Declaration Import of other grammar languages Terminal Rules Return Types Hidden Terminal Rules Parser Rules Declaration The Entry Rule Extended Backus-Naur Form Expressions Cardinalities Groups Alternatives Keywords Assignments Cross-References Unassigned Rule Calls Unordered Groups Simple Actions Tree-Rewriting Actions Data Type Rules Rule Fragments Guard Conditions More Examples More on Terminal Rules Extended Backus-Naur Form Terminals Terminal Groups Terminal Alternatives Character Range Wildcard Token Until Token Negated Token Terminal Rule Calls Terminal Fragments The grammar language describes the syntax and structure of your language. The Langium grammar language is implemented using Langium itself and therefore follows the same syntactic rules as any language created with Langium. The grammar language will define the structure of the abstract syntax tree (AST) which in Langium is a collection of TypeScript types describing the content of a parsed document and organized hierarchically. The individual nodes of the tree are then represented with JavaScript objects at runtime.\nIn the following, we describe the Langium syntax and document structure.\nLanguage Declaration An entry Langium grammar file (i.e. a grammar which contains an entry rule) always starts with a header which declares the name of the language. For example, a language named MyLanguage would be declared with:\ngrammar MyLanguage Every grammar file has a .langium extension and the entry grammar file needs to be referenced in langium-config.json. If you used the Yeoman generator to start your project, the configuration is already prepared.\nImport of other grammar languages It is possible to reuse grammar rules from other .langium files by importing them into your own grammar file.\nimport './path/to/an/other/langium/grammar'; This will import all grammar rules from the imported grammar file. It is therefore crucial to ensure that there are no duplicate rules between the different grammar files.\nContrary to entry grammars, imported grammars do not need to start with the keyword grammar.\nTerminal Rules The first step in parsing your language is lexing, which transforms a stream of characters into a stream of tokens. A token is a sequence of one or many characters which is matched by a terminal rule, creating an atomic symbol. The names of terminal rules are conventionally written in upper case.\nThe Langium parser is created using Chevrotain which has a built-in lexer based on Javascript Regular Expressions.\nLangium also allows the use of Extended Backus-Naur Form (EBNF) Expressions for terminals, but we highly recommend that you write your terminals using Regular Expressions instead. EBNF expressions are internally translated by langium into Regular Expressions, as they are intended to allow porting Xtext grammars into Langium grammars \u0026ndash; given their similarity.\nWith that said, both types of expressions can be used jointly in the same grammar.\nThe declaration of a terminal rule starts with the keyword terminal:\nterminal ID: /[_a-zA-Z][\\w_]*/; Here, the token ID will match a stream of characters starting with the character _, a small letter, or a capital letter followed by a sequence of zero or many (cardinality *) alphanumeric characters (\\w) or _.\nThe order in which terminal rules are defined is critical as the lexer will always return the first match.\nReturn Types A terminal rule returns an instance of a TypeScript primitive type. If no return type is specified, the terminal rule will return a string by default.\nterminal ID: /[_a-zA-Z][\\w_]*/; terminal INT returns number: /[0-9]+/; Here, the terminal rule ID will return an instance of string while the terminal rule INT will return an instance of number.\nThe available return types in Langium are:\n string number boolean bigint Date Hidden Terminal Rules The lexer tries to match every character in the document to a terminal rule or a keyword. It is therefore necessary to specify which characters or sequence of characters need to be ignored during lexing and parsing. Generally, you would want to ignore whitespaces and comments. This is achieved by adding the keyword hidden when defining a terminal rule. These hidden terminal rules are global and will be valid for all parser rules in the document.\nhidden terminal WS: /\\s+/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; Parser Rules While terminal rules indicate to the lexer what sequence of characters are valid tokens, parser rules indicate to the parser what sequence of tokens are valid. Parser rules lay the structure of objects to be created by the parser and result in the creation of the abstract syntax tree (AST) which represents the syntactic structure of your language. In Langium, parser rules are also responsible for defining the type of objects to be parsed.\nDeclaration A parser rule always starts with the name of the rule followed by a colon.\nPerson: 'person' name=ID; In this example, the parser will create an object of type Person. This object will have a property name which value and type must match the terminal rule ID (i.e. the property name is of type string and cannot start with a digit or special character).\nBy default, the parser will create an object with an inferred type corresponding to the parser rule name. It is possible to override this behavior by explicitly defining the type of the object to be created. This is done by adding the keyword returns followed by a separately declared type, or the keyword infers followed by the name of the type to be inferred for this rule (more about this in the next chapter):\nPerson infers OtherType: 'person' name=ID; The parser rule Person will now lead to the creation of objects of type OtherType instead of Person.\nThe Entry Rule The entry rule is a parser rule that defines the starting point of the parsing step. The entry rule starts with the keyword entry and matches other parser rules.\nentry Model: (persons+=Person | greetings+=Greeting)*; In this example, the entry rule Model defines a group of alternatives. The parser will go through the input document and try to parse Person or Greeting objects and add them to the persons or greetings arrays, respectively. The parser reads the token stream until all inputs have been consumed.\nExtended Backus-Naur Form Expressions Parser rules are defined using Extended Backus-Naur Form-like (EBNF) expressions similar to the Xtext notation.\nCardinalities A cardinality defines the number of elements in a given set. Four different cardinalities can be defined for any expression:\n exactly one (no operator) zero or one (operator ?) zero or many (operator *) one or many (operator +) Groups Expressions can be put in sequence specifying the order they have to appear:\nPerson: 'person' name=ID address=Address; In this example, the rule Person must start with the person keyword followed by an ID token and an instance of the Address rule.\nAlternatives It is possible to match one of multiple valid options by using the pipe operator |. The already mentioned Model example specifies to parse either Person or Greeting, zero or many times (cardinality *):\nentry Model: (persons+=Person | greetings+=Greeting)*; Keywords Keywords are inline terminals which need to match a character sequence surrounded by single or double quotes, for example 'person' or \u0026quot;person\u0026quot;. Keywords must not be empty and must not contain white space.\nAssignments Assignments define properties on the type returned by the surrounding rule. There are three different ways to assign an expression (right side) to a property (left side).\n = is used for assigning a single value to a property.\nPerson: 'person' name=ID Here, the property name will accept only one expression matching the terminal rule ID.\n += is used to assign multiple values to an array property.\nContact: addresses+=STRING addresses+=STRING; Here, the array property addresses will accept two expressions matching the terminal rule STRING.\n ?= is used to assign a value to a property of type boolean. The value of the property of type boolean is set to true if the right part of the assignment is consumed by the parser.\nEmployee: 'employee' name=ID (remote?='remote')? Here the value of the property remote will be set to true if the keyword remote was successfully parsed as a part of the rule call. If the keyword remote is not consumed (cardinality is ?), the property remote is set to false.\n Cross-References With Langium, you can declare cross-references directly in the grammar. A cross-reference allows to reference an object of a given type. The syntax is:\nproperty=[Type:TOKEN] The property will be a reference to an object of type Type identified by the token TOKEN. If the TOKEN is omitted, the parser will use the terminal or data type rule associated with the name assignment of the Type rule. If no such rule exists, then the token is mandatory.\nPerson: 'person' name=ID; Greeting: 'Hello' person=[Person:ID] '!'; The Person in square brackets does not refer to the parser rule Person but instead refers to an object of type Person. It will successfully parse a document like:\nperson Bob Hello Bob ! but the following:\nperson Bob Hello Sara ! will result in an error message since the cross reference resolution will fail because a Person object with the name \u0026lsquo;Sara\u0026rsquo; has not been defined, even though \u0026lsquo;Sara\u0026rsquo; is a valid ID.\nUnassigned Rule Calls Parser rules do not necessarily need to create an object, they can also refer to other parser rules which in turn will be responsible for returning the object. For example, in the Arithmetics example:\nAbstractDefinition: Definition | DeclaredParameter; The parser rule AbstractDefinition will not create an object of type AbstractDefinition. Instead, it calls either the Definition or DeclaredParameter parser rule which will be responsible for creating an object of a given type (or call other parser rules if they are unassigned rule calls themselves).\nIn contrast, an assigned rule call such as parameter=DeclaredParameter means that an object is created in the current parser rule and assigns the result of the DeclaredParameter parser rule to the specified property parameter of that object.\nUnordered Groups In regular groups, expressions must occur in the exact order they are declared.\nPerson: 'person' name=ID age=INT Here a Person object needs to first declare the property name then age.\nperson Bob 25 will successfully be parsed to an object of type Person while\nperson 25 Bob will throw an error.\nHowever, it is possible to declare a group of properties in an unordered fashion using the \u0026amp; operator\nPerson: 'person' name=ID \u0026amp; age=INT will now allow name and age to be declared in any order.\nperson 25 Bob will then successfully create an object of type Person.\nCardinality (?,*,+ operators) also applies to unordered group. Please note that assignments with a cardinality of + or * have to appear continuously and cannot be interrupted by an other assignment and resumed later.\nSimple Actions It is possible for a rule to return different types depending on declaration\ninterface TypeOne { name: string } RuleOne returns TypeOne: 'keywordOne' name=ID | RuleTwo; interface TypeTwo extends TypeOne {} RuleTwo returns TypeTwo: 'keywordTwo' name=ID; A rule call is one of the ways to specify the return type. With more complex rules, the readability will be highly impacted. Actions allow to improve the readability of the grammar by explicitly defining the return type. Actions are declared inside of curly braces {}:\nRuleOne returns TypeOne: 'keywordOne' name=ID | {TypeTwo} 'keywordTwo' name=ID; The example above requires that the return types TypeOne and TypeTwo are declared separately (see the next chapter). If the type returned by the action is created on-the-fly, the keyword infer needs to be added:\nRuleOne infers TypeOne: 'keywordOne' name=ID | {infer TypeTwo} 'keywordTwo' name=ID; Now both TypeOne and TypeTwo are inferred from the rule definition. Note that we use the keyword infers (declarative) for the grammar rule, but infer (imperative) for the action.\nTree-Rewriting Actions The parser is built using Chevrotain which implements a LL(k) parsing algorithm (left-to-right). Conceptually, a LL(k) grammar cannot have rules containing left recursion.\nConsider the following:\nAddition: Addition '+' Addition | '(' Addition ')' | value=INT; The parser rule Addition is left-recursive and will not be parseable. We can go around this issue by left-factoring the rule, i.e. by factoring out the common left-factor. We introduce a new rule SimpleExpression:\nAddition: SimpleExpression ('+' right=SimpleExpression)*; SimpleExpression: '(' Addition ')' | value=INT; Unfortunately, left-factoring does not come without consequences and can lead to the generation of unwanted nodes. It is possible to \u0026ldquo;clean\u0026rdquo; the tree by using tree-rewriting actions.\nAddition returns Expression: SimpleExpression ({Addition.left=current} '+' right=SimpleExpression)*; SimpleExpression: '(' Addition ')' | value=INT; Essentially this means that when a + keyword is found, a new object of type Addition is created and the current object is assigned to the left property of the new object. The Addition then becomes the new current object. In imperative pseudo code it may look like this:\nfunction Addition() { let current = SimpleExpression() while (nextToken == '+') { let newObject = new Addition newObject.left = current current = newObject current.right = SimpleExpression() } } Please refer to this blog post for further details.\nData Type Rules Data type rules are similar to terminal rules as they match a sequence of characters. However, they are parser rules and are therefore context-dependent. This allows for more flexible parsing, as they can be interspersed with hidden terminals, such as whitespaces or comments. Contrary to terminal rules, they cannot use regular expressions to match a stream of characters, so they have to be composed of keywords, terminal rules or other data type rules.\nThe following example from the domain model example uses the QualifiedName data type rule to enable references to other elements using their fully qualified name.\nQualifiedName returns string: ID ('.' ID)*; Data type rules need to specify a primitive return type.\nRule Fragments If you are facing repetitive patterns in your grammar definition, you can take advantage of Rule Fragments to improve the grammar\u0026rsquo;s maintainability.\nStudent: 'student' firstName=ID lastName=ID address=STRING phoneNumber=STRING grades=Grades; Teacher: 'teacher' firstName=ID lastName=ID address=STRING phoneNumber=STRING classes=Classes; TechnicalStaff: 'tech' firstName=ID lastName=ID address=STRING phoneNumber=STRING; The parser rules Student, Teacher, and TechnicalStaff partly share the same syntax. If, for example, the assignment for phoneNumber had to be updated, we would need to make changes everywhere the phoneNumber assignment was used. We can introduce Rule Fragments to extract similar patterns and improve maintainability:\nfragment Details: firstName=ID lastName=ID address=STRING phoneNumber=STRING; Student: 'student' Details grades=Grades; Teacher: 'teacher' Details classes=Classes; TechnicalStaff: 'tech' Details; Fragment rules are not part of the AST and will therefore never create an object, instead they can be understood as being textually inserted where they are referenced.\nGuard Conditions It may be useful to group parser rules with small variations inside of a single parser rule. Given the following example:\nentry Model: element+=RootElement; RootElement infers Element: isPublic?='public'? 'element' name=ID '{' elements+=Element* '}'; Element: 'element' name=ID '{' elements+=Element* '}'; The only difference between RootElement and Element is that the former has the boolean property isPublic. We can refactor the grammar so that only Element is present in the grammar with a guard condition that will determine which concrete syntax should be used by the parser:\nentry Model: element+=Element\u0026lt;true\u0026gt;; Element\u0026lt;isRoot\u0026gt;: (\u0026lt;isRoot\u0026gt; isPublic?='public')? 'element' name=ID '{' elements+=Element\u0026lt;false\u0026gt;* '}'; Element has the guard isRoot, which will determine whether the optional group containing the isPublic property is allowed to be parsed.\nThe entry rule Model sets the value of isRoot to true with element+=Element\u0026lt;true\u0026gt;, while isRoot is set to false inside of the Element\u0026lt;isRoot\u0026gt; parser rule with elements+=Element\u0026lt;false\u0026gt;.\nIn general, a guard condition on a group decides whether the parser is allowed to parse the group or not depending on the result of the evaluated condition. Logical operations can be applied, such as \u0026amp; (and), | (or) and ! (not) to fine-tune the exact conditions in which the group is supposed to be parsed.\nAdditionally, guard conditions can also be used inside of alternatives. See the following example:\nentry Model: element+=Element\u0026lt;true\u0026gt;; Element\u0026lt;isRoot\u0026gt;: (\u0026lt;isRoot\u0026gt; 'root' | \u0026lt;!isRoot\u0026gt; 'element') name=ID '{' elements+=Element\u0026lt;false\u0026gt;* '}'; The parser will always exclude alternatives whose guard conditions evaluate to false. All other alternatives remain possible options for the parser to choose from.\nMore Examples Not all parser rules need to be mentioned in the entry rule, as shown in this example:\nentry Model: (persons+=Person | greetings+=Greeting)*; Person: 'person' name=ID address=Address; Greeting: 'Hello' person=[Person] '!'; Address: street=STRING city=ID postcode=INT; Here the Person parser rule includes a property address which matches the parser rule Address. We decided that an Address will never be present in the input document on its own and will always be parsed in relation to a Person. It is therefore not necessary to include an array of Address inside of the entry rule.\n Keywords are meant to provide a visible structure to the language and guide the parser in deciding what type of object needs to be parsed. Consider the following:\nStudent: name=ID; Teacher: name=ID; Person: Student | Teacher; In this example, a Person can either be a Student or a Teacher. This grammar is ambiguous because the parser rules Student and Teacher are identical. The parser will not be able to differentiate between the parser rules for Student and Teacher when trying to parse a Person. Keywords can help removing such ambiguity and guide the parser in defining if a Student or Teacher needs to be parsed. We can add a keyword to the parser rule Student, Teacher, or to both of them:\nStudent: 'student' name=ID; Teacher: 'teacher' name=ID; Person: Student | Teacher; Now the ambiguity is resolved and the parser is able to differentiate between the two parser rules.\nParser rules can have many keywords:\nPerson: 'person' name=ID 'age' age=INT; If an assignment has a cardinality of + or *, then the expressions belong to a single group and must not be interrupted by other expressions.\nParagraph: 'paragraph' (sentences+=STRING)+ id=INT; Here, the property sentences will accept one or many expressions matching the terminal rule STRING followed by an INT. The parsing of a document containing:\nparagraph \u0026quot;The expression group \u0026quot; 3 \u0026quot;was interrupted\u0026quot; will throw an error since the STRING expressions are not continuous. It is however possible to interrupt and resume a sequence of expressions by using hidden terminal symbols:\nparagraph \u0026quot;expression one\u0026quot; /* comment */ \u0026quot;expression two\u0026quot; 3 The above example will be successfully parsed.\nMore on Terminal Rules Extended Backus-Naur Form Terminals For full disclosure, we recommend using regular expressions when writing your terminals, as EBNF expressions are translated to regular expressions internally anyway. EBNF support is primarily intended for supporting grammars that were originally written in Xtext, but are being ported to Langium.\nAs mentioned earlier, terminal rules can be described using regular expressions or EBNF expressions.\nEBNF expressions are very similar to parser rules, which are described above. In this section, we describe which EBNF expressions are supported for terminals and their equivalent in Javascript Regular Expressions where possible.\nTerminal Groups Tokens can be put in sequence specifying the order they have to appear:\nterminal FLIGHT_NUMBER: ('A'..'Z')('A'..'Z')('0'..'9')('0'..'9')('0'..'9')('0'...'9')?; In this example, the token FLIGHT_NUMBER must start with two capital letters followed by three or four digits.\nTerminal Alternatives It is possible to match one of multiple valid options by using the pipe operator |. The terminal rule STRING can use alternatives to match a sequence of characters between double quotes \u0026quot;\u0026quot; or single quotes '':\nterminal STRING: '\u0026quot;' !('\u0026quot;')* '\u0026quot;' | ''' !(''')* '''; In regular expression, alternatives are also possible with the pipe operator |:\nterminal STRING: /\u0026quot;[^\u0026quot;]*\u0026quot;|'[^']*'/; Character Range The operator .. is used to declare a character range. It is equivalent to the operator - within a character class in a regular expression. It matches any character in between the left character and the right character (inclusive on both ends).\nterminal INT returns number: ('0'..'9')+; is equivalent to the regular expression:\nterminal INT returns number: /[0-9]+/; Here, INT is matched to one or more characters (by using the operand +, which defines a cardinality of \u0026lsquo;one or many\u0026rsquo;) between 0 and 9 (inclusive on both ends).\nWildcard Token The operator . is used to match any character and is similar in regular expression.\nterminal HASHTAG: '#'.+; In this example, the terminal rule HASHTAG matches a sequence of character starting with # followed by one or many (cardinality +) characters.\nEquivalent in regular expression:\nterminal HASHTAG: /#.+/; Until Token The operator -\u0026gt; indicates that all characters should be consumed from the left token until the right token occurs. For example, the terminal rule for multi-line comment can be implemented as:\nterminal ML_COMMENT: '/*' -\u0026gt; '*/'; Langium will transform the until token into the regular expression [\\s\\S]*? which matches any character non-greedily:\nterminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; Negated Token It is possible to negate tokens using the operator !. In Langium this produces a negative lookahead. I.e., it does not consume tokens, but it is a \u0026lsquo;guard\u0026rsquo; for what the following expression can recognize.\nFor example, if you want to recognize a word that doesn\u0026rsquo;t start with no, then you could write such an expression in EBNF like so:\nterminal NONO: (!'no')('a'..'z'|'A'..'Z')+; For reference, this would correspond to the following regular expression:\nterminal NONO: /(?!no)[a-zA-Z]+/; Note, if you\u0026rsquo;re coming from Xtext, negated tokens work differently here. In Xtext, negated tokens allow recognizing the complement of a set of characters (or anything \u0026lsquo;but\u0026rsquo; what is listed in the negation), very much akin to a negated character class in regular expressions. This is very important to keep in mind if you\u0026rsquo;re porting a grammar from Xtext, as Langium\u0026rsquo;s interpretation of negated tokens deviates from that of Xtext.\nTerminal Rule Calls A terminal rule can include other terminal rules in its definition.\nterminal DOUBLE returns number: INT '.' INT; Note that it is easy to create conflicts between terminal rules when using terminal rule calls. See Data Type Rules for further details.\nTerminal Fragments Fragments allow for sub-definition of terminal rules to be extracted. They are not consumed by the lexer and have to be consumed by other terminal rules.\nterminal fragment CAPITAL_LETTER: ('A'..'Z'); terminal fragment SMALL_LETTER: ('a'..'z'); terminal NAME: CAPITAL_LETTER SMALL_LETTER+; In this example, the lexer will not transform a single capital or small letter into a valid token but will match a sequence of one capital letter followed by one or many small letters.\n"},{"id":11,"href":"/docs/recipes/scoping/qualified-name/","title":"Qualified Name Scoping","parent":"Scoping","content":"Qualified name scoping refers to a style of referencing elements using a fully qualified name. Such a fully qualified name is usually composed of the original name of the target element and the names of its container elements. You will usually see this method of scoping in C-like languages using namespaces or in Java using packages. The following code snippet shows an example of how qualified name scoping works from an end-user perspective, by using a function in a C++ namespace:\nnamespace Langium { void getDocumentation(); } void main() { // Should call the `getDocumentation` function defined in the `Langium` namespace Langium::getDocumentation(); } As can be seen, using qualified name scoping is quite helpful in this case. It allows us to reference the getDocumentation function through the scope computed \u0026amp; made available by the Langium namespace, even though it\u0026rsquo;s not directly accessible within the scope of main by itself.\nNote that such behavior can also be accomplished using class member scoping. However, there is one core advantage to using globally available elements: Compared to member scoping, this type of scoping requires few resources. The lookup required for qualified name scoping can be done in near constant time with just a bit of additional computation on a per-document basis, whereas member scoping needs to do a lot of computation on a per-reference basis. With large workspaces, complex scoping might become a performance bottleneck.\nThis behavior can be achieved in Langium by exporting the getDocumentation function under the name Langium::getDocumentation. To do this, we will first set up a new ScopeComputation class that extends the DefaultScopeComputation. This class will be responsible for our custom scope computation. Then, we\u0026rsquo;ll want to bind our custom scope computation class in our module:\n// Scope computation for our C++-like language export class CppScopeComputation extends DefaultScopeComputation { constructor(services: LangiumServices) { super(services); } } // Services module for overriding the scope computation // Your language module is usually placed in your `\u0026lt;dsl-name\u0026gt;-module.ts` file export const CppModule: Module\u0026lt;CppServices, PartialLangiumServices \u0026amp; CppAddedServices\u0026gt; = { references: { ScopeComputation: (services) =\u0026gt; new CppScopeComputation(services) } } Next, we can start implementing our custom scoping by overriding the computeExports function. This function is particularly important, as it allows us to change export nodes of our model using qualified names: We\u0026rsquo;ll also want to annotate this function with override, since there\u0026rsquo;s already a default definition provided.\nexport class CppScopeComputation extends DefaultScopeComputation { // Emitting previous implementation for brevity /** * Export all functions using their fully qualified name */ override async computeExports(document: LangiumDocument): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const exportedDescriptions: AstNodeDescription[] = []; for (const childNode of streamAllContents(document.parseResult.value)) { if (isFunctionDeclaration(childNode)) { const fullyQualifiedName = this.getQualifiedName(childNode, childNode.name); // `descriptions` is our `AstNodeDescriptionProvider` defined in `DefaultScopeComputation` // It allows us to easily create descriptions that point to elements using a name. exportedDescriptions.push(this.descriptions.createDescription(childNode, fullyQualifiedName, document)); } } return exportedDescriptions; } /** * Build a qualified name for a model node */ private getQualifiedName(node: AstNode, name: string): string { let parent: AstNode | undefined = node.$container; while (isNamespace(parent)) { // Iteratively prepend the name of the parent namespace // This allows us to work with nested namespaces name = `${parent.name}::${name}`; parent = parent.$container; } return name; } Once we start exporting functions using their fully qualified name, references such as QualifiedName::target will start working correctly. We can even nest multiple namespaces to create Fully::Qualified::Name::target. However, this leads us to another problem. We can now only reference functions using their fully qualified names, even if they\u0026rsquo;re locally available:\nnamespace QualifiedName { void target(); void test() { // Will not link correctly target(); // Requires the new fully qualified name QualifiedName::target(); } } To rectify this problem, we have to override the computeLocalScopes method, which provides access to elements that aren\u0026rsquo;t exported globally. We can also use this method to provide secondary access to globally available objects using a local name.\nexport class CppScopeComputation extends DefaultScopeComputation { // Emitting previous implementation for brevity override async computeLocalScopes(document: LangiumDocument): Promise\u0026lt;PrecomputedScopes\u0026gt; { const model = document.parseResult.value as CppProgram; // This map stores a list of descriptions for each node in our document const scopes = new MultiMap\u0026lt;AstNode, AstNodeDescription\u0026gt;(); this.processContainer(model, scopes, document); return scopes; } private processContainer( container: CppProgram | Namespace, scopes: PrecomputedScopes, document: LangiumDocument ): AstNodeDescription[] { const localDescriptions: AstNodeDescription[] = []; for (const element of container.elements) { if (isFunctionDeclaration(element)) { // Create a simple local name for the function const description = this.descriptions.createDescription(element, element.name, document); localDescriptions.push(description); } else if (isNamespace(element)) { const nestedDescriptions = this.processContainer(element, scopes, document); for (const description of nestedDescriptions) { // Add qualified names to the container // This could also be a partial qualified name const qualified = this.createQualifiedDescription(element, description, document); localDescriptions.push(qualified); } } } scopes.addAll(container, localDescriptions); return localDescriptions; } private createQualifiedDescription( container: Namespace, description: AstNodeDescription, document: LangiumDocument ): AstNodeDescription { // `getQualifiedName` has been implemented in the previous section const name = this.getQualifiedName(container.name, description.name); return this.descriptions.createDescription(description.node!, name, document); } } This new change now allows us to use local names of functions in the local scope, while they are still exported using their fully qualified name to the global scope. Another example for this style of scoping can be seen in the domain-model example language. Also, click the following note to see the full implementation of the scope computation service.\n Full Implementation export class CppScopeComputation extends DefaultScopeComputation { /** * Export all functions using their fully qualified name */ override async computeExports(document: LangiumDocument): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const exportedDescriptions: AstNodeDescription[] = []; for (const childNode of streamAllContents(document.parseResult.value)) { if (isFunctionDeclaration(childNode)) { const fullyQualifiedName = this.getQualifiedName(childNode, childNode.name); // `descriptions` is our `AstNodeDescriptionProvider` defined in `DefaultScopeComputation` // It allows us to easily create descriptions that point to elements using a name. exportedDescriptions.push(this.descriptions.createDescription(childNode, fullyQualifiedName, document)); } } return exportedDescriptions; } override async computeLocalScopes(document: LangiumDocument): Promise\u0026lt;PrecomputedScopes\u0026gt; { const model = document.parseResult.value as CppProgram; // This multi-map stores a list of descriptions for each node in our document const scopes = new MultiMap\u0026lt;AstNode, AstNodeDescription\u0026gt;(); this.processContainer(model, scopes, document); return scopes; } private processContainer( container: CppProgram | Namespace, scopes: PrecomputedScopes, document: LangiumDocument ): AstNodeDescription[] { const localDescriptions: AstNodeDescription[] = []; for (const element of container.elements) { if (isFunctionDeclaration(element)) { // Create a simple local name for the function const description = this.descriptions.createDescription(element, element.name, document); localDescriptions.push(description); } else if (isNamespace(element)) { const nestedDescriptions = this.processContainer(element, scopes, document); for (const description of nestedDescriptions) { // Add qualified names to the container // This could also be a partially qualified name const qualified = this.createQualifiedDescription(element, description, document); localDescriptions.push(qualified); } } } scopes.addAll(container, localDescriptions); return localDescriptions; } private createQualifiedDescription( container: Namespace, description: AstNodeDescription, document: LangiumDocument ): AstNodeDescription { const name = this.getQualifiedName(container.name, description.name); return this.descriptions.createDescription(description.node!, name, document); } /** * Build a qualified name for a model node */ private getQualifiedName(node: AstNode, name: string): string { let parent: AstNode | undefined = node.$container; while (isNamespace(parent)) { // Iteratively prepend the name of the parent namespace // This allows us to work with nested namespaces name = `${parent.name}::${name}`; parent = parent.$container; } return name; } } "},{"id":12,"href":"/docs/recipes/scoping/","title":"Scoping","parent":"Recipes","content":"You likely know scopes from programming, where some variables are only available from certain areas (such as blocks) in your program. For example, take the short Typescript snippet below. Based on the block (scope) where a variable is declared, it may or may not be available at another location in the same program.\nlet x = 42; x = 3; // References the `x` defined in the previous line if (condition) { let y = 42; } y = 3; // Cannot link, `y` isn't in any of the available scopes This kind of behavior is called lexical scoping. Although this default scoping implementation is suitable for prototyping \u0026ndash; and for some simple languages once finished \u0026ndash; this behavior can be easily modified to fit the needs of your language\u0026rsquo;s domain.\nIn general, the way we resolve references is split into three phases of the document lifecycle:\n Symbol indexing is responsible for making objects globally available for referencing. Scope computation determines which elements are reachable from a given position in your document. Finally, the linking phase eagerly links each reference within a document to its target using your language\u0026rsquo;s scoping rules. In this recipe, we\u0026rsquo;ll look at different scoping kinds and styles and see how we can achieve them using Langium:\n Qualified Name Scoping Class Member Scoping File-based scoping Note that these are just example implementations for commonly used scoping methods. The scoping API of Langium is designed to be flexible and extensible for any kind of use case.\n"},{"id":13,"href":"/showcase/statemachine/","title":"State Machine","parent":"Langium Showcase","content":""},{"id":14,"href":"/docs/learn/workflow/install/","title":"1. Install Yeoman","parent":"Langium's workflow","content":"Before diving into Langium itself, let\u0026rsquo;s get your environment ready for development:\n You have a working Node environment with version 16 or higher. Install Yeoman and the Langium extension generator. npm i -g yo generator-langium For our getting started example, we would also recommend you to install the latest version of vscode.\n"},{"id":15,"href":"/docs/recipes/builtin-library/","title":"Builtin Libraries","parent":"Recipes","content":"Languages usually offer their users some high-level programming features that they do not have to define themselves. For example, TypeScript provides users with typings for globally accessible variables such as the window, process or console objects. They are part of the JavaScript runtime, and not defined by any user or a package they might import. Instead, these features are contributed through what we call builtin libraries.\nLoading a builtin library in Langium is very simple. We first start off with defining the source code of the library using the hello world language from the getting started guide:\nexport const builtinHelloWorld = ` person Jane person John `.trimLeft(); Next, we load our builtin library code through the loadAdditionalDocuments method provided by the DefaultWorkspaceManager:\nimport { AstNode, DefaultWorkspaceManager, LangiumDocument, LangiumSharedServices } from \u0026quot;langium\u0026quot;; import { WorkspaceFolder } from 'vscode-languageserver'; import { URI } from \u0026quot;vscode-uri\u0026quot;; import { builtinHelloWorld } from './builtins'; export class HelloWorldWorkspaceManager extends DefaultWorkspaceManager { private documentFactory: LangiumDocumentFactory; constructor(services: LangiumSharedServices) { super(services); this.documentFactory = services.workspace.LangiumDocumentFactory; } protected override async loadAdditionalDocuments( folders: WorkspaceFolder[], collector: (document: LangiumDocument\u0026lt;AstNode\u0026gt;) =\u0026gt; void ): Promise\u0026lt;void\u0026gt; { await super.loadAdditionalDocuments(folders, collector); // Load our library using the `builtin` URI schema collector(this.documentFactory.fromString(builtinHelloWorld, URI.parse('builtin:///library.hello'))); } } As a last step, we have to bind our newly created workspace manager:\n// Add this to the `hello-world-module.ts` included in the yeoman generated project export type HelloWorldSharedServices = LangiumSharedServices; export const HelloWorldSharedModule: Module\u0026lt;HelloWorldSharedServices, DeepPartial\u0026lt;HelloWorldSharedServices\u0026gt;\u0026gt; = { workspace: { WorkspaceManager: (services) =\u0026gt; new HelloWorldWorkspaceManager(services) } } Be aware that this shared module is not injected by default. You have to add it manually to the inject call for the shared injection container.\nexport function createHellowWorldServices(context: DefaultSharedModuleContext): { shared: LangiumSharedServices, services: HelloWordServices } { const shared = inject( createDefaultSharedModule(context), HelloWorldGeneratedSharedModule, HelloWorldSharedModule ); const services = inject( createDefaultModule({ shared }), HelloWorldGeneratedModule, HelloWorldModule ); shared.ServiceRegistry.register(services); return { shared, services }; } Once everything is wired together, we are done from the perspective of our DSL. At startup, our language server will run the loadAdditionalDocuments method which makes our library available for any workspace documents of the user.\nHowever, when trying to navigate to the builtin library elements, vscode will show users an error message, complaining that it cannot find the builtin library file. This is expected, as the builtin library only lives in memory. To fix this issue, we need to implement a custom FileSystemProvider on the client(src/extension.ts in the hello world example) that allows navigation to the builtin library files:\nimport * as vscode from 'vscode'; import { builtinHelloWorld } from './language/builtins'; export class DslLibraryFileSystemProvider implements vscode.FileSystemProvider { static register(context: vscode.ExtensionContext) { context.subscriptions.push( vscode.workspace.registerFileSystemProvider('builtin', new DslLibraryFileSystemProvider(context), { isReadonly: true, isCaseSensitive: false })); } stat(uri: vscode.Uri): vscode.FileStat { const date = Date.now(); return { ctime: date, mtime: date, size: Buffer.from(builtinHelloWorld).length, type: vscode.FileType.File }; } readFile(uri: vscode.Uri): Uint8Array { // We could return different libraries based on the URI // We have only one, so we always return the same return new Uint8Array(Buffer.from(builtinHelloWorld)); } // The following class members only serve to satisfy the interface private readonly didChangeFile = new vscode.EventEmitter\u0026lt;vscode.FileChangeEvent[]\u0026gt;(); onDidChangeFile = this.didChangeFile.event; watch() { return { dispose: () =\u0026gt; {} }; } readDirectory(): [] { throw vscode.FileSystemError.NoPermissions(); } createDirectory() { throw vscode.FileSystemError.NoPermissions(); } writeFile() { throw vscode.FileSystemError.NoPermissions(); } delete() { throw vscode.FileSystemError.NoPermissions(); } rename() { throw vscode.FileSystemError.NoPermissions(); } } ... // register the file system provider on extension activation export function activate(context: vscode.ExtensionContext) { DslLibraryFileSystemProvider.register(context); } This registers an in-memory file system for vscode to use for the builtin file schema. Every time vscode is supposed to open a file with this schema, it will invoke the stat and readFile methods of the registered file system provider.\n"},{"id":16,"href":"/docs/recipes/scoping/class-member/","title":"Class Member Scoping","parent":"Scoping","content":"In this guide we will take a look at member based scoping. It\u0026rsquo;s a mechanism you are likely familiar with from object oriented languages such as Java, C# and JavaScript:\nclass A { b: B; } class B { value: string; } function test(): void { const a = new A(); const b = a.b; // Refers to the `b` defined in class `A` const value = b.value; // Refers to the `value` defined in class `B` } Member based scoping like this requires not only a modification of the default scoping provider, but also some other prerequisites. This includes adding a member call mechanism in your grammar and a rudimentary type system. For this guide, we will use excerpts from the langium-lox project to demonstrate how you can set this up yourself. This project implements a strongly-typed version of the Lox language from the popular book Crafting Interpreters.\nWe\u0026rsquo;ll first start with the MemberCall grammar rule, which references one of our NamedElements. These elements could be variable declarations, functions, classes or methods and fields of those classes. Additionally, we want to allow function calls on elements. Note that the grammar has no notion of whether these elements can actually be executed as functions. Instead, we always allow function calls on every named element, and simply provide validation errors in case an element is called erroneously. After parsing the first member call, we continue parsing further members as long as the input text provides us with further references to elements; which are separated by dots.\ntype NamedElement = FunctionDeclaration | VariableDeclaration | MethodMember | FieldMember | Class; MemberCall: // Reference a named element of our grammar // Variables, functions, etc. element=[NamedElement:ID] // Parse an operation call on this element (explicitOperationCall?='(' ( // Parse any arguments for the operation call arguments+=Expression (',' arguments+=Expression)* )? ')')? // Create a new `MemberCall` and assign the old one to the `previous` property // The previous member call can either be the member call that was parsed in the previous section // Or one that is parsed in the next section due to the repetition at the end of this group ({infer MemberCall.previous=current} // We repeat the named element reference (\u0026quot;.\u0026quot; element=[NamedElement:ID] ( // Parse an operation call again explicitOperationCall?='(' ( arguments+=Expression (',' arguments+=Expression)* )? ')')? // Our language allows to return functions in functions // So we need to be able to call multiple functions without any element references | ( explicitOperationCall?='(' ( arguments+=Expression (',' arguments+=Expression)* )? ')')) )*; A very important aspect of these chained member calls is the action ({infer MemberCall.previous=current}) which rewrites the resulting AST. In this case, it reverses the direction of member call AST nodes. Instead of starting with the first encountered member call and then traversing down to the last, we start with the last and traverse the list of member calls up using the previous property. The reason for doing this becomes clear when looking at the scope provider for the Lox language:\nexport class LoxScopeProvider extends DefaultScopeProvider { override getScope(context: ReferenceInfo): Scope { // target element of member calls if (context.property === 'element' \u0026amp;\u0026amp; isMemberCall(context.container)) { const memberCall = context.container; const previous = memberCall.previous; if (!previous) { return super.getScope(context); } const previousType = inferType(previous); if (isClassType(previousType)) { return this.scopeClassMembers(previousType.literal); } // When the target of our member call isn't a class // This means it is either a primitive type or a type resolution error // Simply return an empty scope return EMPTY_SCOPE; } return super.getScope(context); } private scopeClassMembers(classItem: Class): Scope { // Since Lox allows class-inheritance, // we also need to look at all members of possible super classes for scoping const allMembers = getClassChain(classItem).flatMap(e =\u0026gt; e.members); return this.createScopeForNodes(allMembers); } } When trying to compute the type of an expression, we are only interested in the final piece of the member call. However, to derive the type and scope of the final member call, we have to recursively identify the type of the previous member call. This is done by looking at the member call stored in the previous property and inferring its type. See here for the full implementation of the type inference system in Lox. This kind of type inference requires scoping.\nTo illustrate this behavior a bit better, take a look at the following code snippet:\nclass Container { sub: SubContainer } class SubContainer { name: string } // Constructor call var element = Container(); // Member access println(element.sub.name); We recursively alternate between the scope provider and the type inference system until we arrive at a member call without any previous member call. At this point we resolve the reference using the default lexical scoping which is builtin into Langium. With our scope provider in place, we can visualize how it interacts with Langium\u0026rsquo;s implementation of Lox in the following sequence diagram:\n sequenceDiagram participant R as Language Runtime participant T as Type System participant S as Scope Provider participant L as Lexical Scope R-T: (1) Query type of expression\n`element.sub.name` T-S: (2) Query `name` node S-T: (3) Query `sub` type T-S: (4) Query `sub` node S-T: (5) Query `element` type T-S: (6) Query `element` node S-L: (7) Query `element` node L-S: (7) Return `element` node S-T: (6) Return `element` node T-S: (8) Query `Container` node S-L: (9) Query `Container` node L-S: (9) Return `Container` node S-T: (8) Return `Container` node T-S: (5) Return `Container` type result S-T: (4) Return `sub` node T-S: (3) Return `SubContainer` type result S-T: (2) Return `name` node T-R: (1) Return `string` type result When trying to infer the type of the expression element.sub.name, we can see that this results in quite a lot of computations throughout the scoping and type systems. It is therefore recommended to cache type inference information as this naive approach to inference can quickly lead to performance issues.\n"},{"id":17,"href":"/docs/reference/configuration-services/","title":"Configuration via Services","parent":"Reference","content":"Langium supports the configuration of most aspects of your language and language server via a set of services. Those services are configured by modules, which are essentially mappings from a service name to its implementation.\nWe can separate services and modules into two main categories:\nShared Services The shared services are services that are shared across all Langium languages. In many applications there is only one Langium language, but the overall structure of the services is the same.\n The ServiceRegistry is responsible for registering and accessing the different languages and their services. The Connection service is used in a language server context; it sends messages to the client and registers message handlers for incoming messages. The AstReflection service provides access the structure of the AST types. Shared services involved in the document lifecycle (future documentation) Language Specific Services The language specific services are services specific to one Langium language and isolated from other languages.\n Services for LSP features Services involved in the document lifecycle (future documentation) Utility services (e.g. References, JsonSerializer) Customization If you have used the Yeoman generator, the entry point to services customization is found in the src/language/...-module.ts file, where \u0026lsquo;\u0026hellip;\u0026rsquo; is the name of your language. There you can register new services or override the default implementations of services. Langium implements the Inversion of Control principle via the Dependency Injection pattern, which promotes loosely-coupled architectures, maintainability, and extensibility.\nFor the following sections, we will use the arithmetics example to describe the procedure for replacing or adding services. Note that all names prefixed with Arithmetics should be understood as being specific to the language named Arithmetics, and in your project those services' names will be prefixed with your own language name.\nPlease note that it is not mandatory to implement all custom code via dependency injection. The main reason for using dependency injection is when your custom code depends on other services. In many cases you can use plain functions instead of service classes to implement your application logic.\nOverriding and Extending Services Thanks to the dependency injection pattern used in Langium, your can change the behavior of a service or add to its functionality in one place without having to modify every piece of code that depends on the service to be overridden or extended.\nThe arithmetics example provides a custom implementation of the ScopeProvider service, which overrides functionalities from the default implementation DefaultScopeProvider.\nFirst, we need to register the new implementation of ScopeProvider inside of the ArithmeticsModule:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { references: { ScopeProvider: (services) =\u0026gt; new ArithmeticsScopeProvider(services) } }; In the ArithmeticsModule singleton instance, we map a property with the name of our service (here ScopeProvider) to a concrete implementation of the service. This means that the first time we access the service named ScopeProvider, a new instance of the class ArithmeticsScopeProvider will be created instead of the default implementation DefaultScopeProvider. The provided factory function is invoked only once, which means that all services are handled as singletons.\nIn order to successfully override an existing service, the property name (here ScopeProvider) must match exactly that of the default implementation.\nThe ArithmeticsScopeProvider overrides two methods from DefaultScopeProvider:\nexport class ArithmeticsScopeProvider extends DefaultScopeProvider { protected createScope(elements: Stream\u0026lt;AstNodeDescription\u0026gt;, outerScope: Scope): Scope { return new StreamScope(elements, outerScope, { caseInsensitive: true }); } protected getGlobalScope(referenceType: string): Scope { return new StreamScope(this.indexManager.allElements(referenceType), undefined, { caseInsensitive: true }); } } The functions createScope and getGlobalScope are already defined in DefaultScopeProvider but needed to be overridden to add the option {caseInsensitive: true}. This is achieved through inheritance: By using the keyword extends, ArithmeticsScopeProvider inherits from DefaultScopeProvider, which means that it can access properties and methods as well as override methods declared in the superclass.\nIn the DefaultScopeProvider, those two methods are declared as:\nprotected createScope(elements: Stream\u0026lt;AstNodeDescription\u0026gt;, outerScope: Scope): Scope { return new StreamScope(elements, outerScope); } protected getGlobalScope(referenceType: string): Scope { return new StreamScope(this.indexManager.allElements(referenceType)); } Now, when we call either createScope or getGlobalScope from the ScopeProvider service, the call will be made from the ArithmeticsScopeProvider instead of the DefaultScopeProvider. Functions that were not overridden will still be called from DefaultScopeProvider via inheritance.\nOf course it is also possible to replace the default implementation with a completely separate one that does not inherit from the default service class.\nAdding New Services To add services that are not available by default in Langium, e.g. application specific ones, we first need to edit the type ArithmeticsAddedService. By default, the Yeoman-based generator adds a validator service where you can implement validation rules specific to your language. New services are added as properties to the type declaration:\nexport type ArithmeticsAddedServices = { ArithmeticsValidator: ArithmeticsValidator } The ArithmeticsAddedService type now has a property ArithmeticsValidator of type ArithmeticsValidator.\nFor the sake of organization and clarity, the services can be nested inside of other properties acting as \u0026ldquo;groups\u0026rdquo;:\nexport type ArithmeticsAddedServices = { validation: { ArithmeticsValidator: ArithmeticsValidator }, secondGroup: { AnotherServiceName: AnotherServiceType }, nthGroup: { withASubGroup: { YetAnotherServiceName: YetAnotherServiceType } } } Now that we have declared our new services inside of the ArithmeticsAddedServices type definition, we need to specify to the module how we want them to be implemented. To do so, we need to update the ArithmeticsModule:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { validation: { ArithmeticsValidator: () =\u0026gt; new ArithmeticsValidator() } }; Similarly to overridden services, the first access to the ArithmeticsValidator property will create a new instance of the class ArithmeticsValidator.\nThe ArithmeticsValidator service does not depend on other services, and no argument is passed during the instantiation of the class. If you implement a service that depends on other services, the constructor of your service should expect \u0026lt;yourDslName\u0026gt;Services as argument. The initializer function can expect that object as argument and pass it to your services constructor, such as:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { ServiceWithDependencies = (services) =\u0026gt; new ServiceClass(services); } The services which ServiceClass depends on need to be registered in the constructor:\nexport class ServiceClass { private readonly serviceOne: ServiceOne; private readonly serviceTwo: ServiceTwo; private readonly serviceN: ServiceN; constructor(services: ArithmeticsServices) { this.serviceOne = services.ServiceOne; this.serviceTwo = services.Group.ServiceTwo; this.serviceN = services.Group.SubGroup.ServiceN; } /* service logic */ } Resolving cyclic dependencies In case one of the services the ServiceClass above depends on, also has a dependency back to the ServiceClass, your module will throw an error similar to this: Cycle detected. Please make \u0026quot;ServiceClass\u0026quot; lazy. Ideally, such cyclic dependencies between services should be avoided. Sometimes, cycles are unavoidable though. In order to make them lazy, assign a lambda function that returns the service in the constructor. You can then invoke this function in your service logic to get access to the depending service:\nexport class ServiceClass { private readonly serviceOne: () =\u0026gt; ServiceOne; constructor(services: ArithmeticsServices) { this.serviceOne = () =\u0026gt; services.ServiceOne; // \u0026lt;-- lazy evaluated service } /* service logic */ method() { this.serviceOne().methodOne(); } } Using ArithmeticsValidator in other services The ArithmeticsValidator needs to be registered inside of the ValidationRegistry. This done by overriding ValidationRegistry with ArithmeticsValidationRegistry.\nBriefly, ArithmeticsValidator implements two checks, checkDivByZero and checkNormalisable:\nexport class ArithmeticsValidator { checkDivByZero(binExpr: BinaryExpression, accept: ValidationAcceptor): void { ... } checkNormalisable(def: Definition, accept: ValidationAcceptor): void { ... } } These two new checks need to be registered inside of the ValidationRegistry. We extend ValidationRegistry with ArithmeticsValidationRegistry to implement our new functionalities:\nexport class ArithmeticsValidationRegistry extends ValidationRegistry { constructor(services: ArithmeticsServices) { super(services); const validator = services.validation.ArithmeticsValidator; const checks: ArithmeticsChecks = { BinaryExpression: validator.checkDivByZero, Definition: validator.checkNormalisable }; this.register(checks, validator); } } Inside of the ArithmeticsValidationRegistry, we obtain our ArithmeticsValidator with const validator = services.validation.ArithmeticsValidator, which will create a new instance of ArithmeticsValidator. Then we declare the checks to be registered and register them inside of the registry via the function register which is declared in the superclass. The ArithmeticsValidationRegistry only adds validation checks to the ValidationRegistry, but does not override any functionality from it.\nThe implementation of ArithmeticsValidationRegistry needs to be registered in ArithmeticsModule. The complete ArithmeticsModule is:\nexport const ArithmeticsModule: Module\u0026lt;ArithmeticsServices, PartialLangiumServices \u0026amp; ArithmeticsAddedServices\u0026gt; = { references: { ScopeProvider: (services) =\u0026gt; new ArithmeticsScopeProvider(services) }, validation: { ValidationRegistry: (services) =\u0026gt; new ArithmeticsValidationRegistry(services), ArithmeticsValidator: () =\u0026gt; new ArithmeticsValidator() } }; Language Server Protocol If you want to modify aspects of the Language Server, this section will help you find the relevant service for handling a given LSP request.\nCompletionProvider The CompletionProvider service is responsible for handling a Completion Request at a given cursor position. When a Completion Request is submitted by the client to the server, the CompletionProvider will create a CompletionList of all possible CompletionItem to be presented in the editor. The CompletionProvider service computes a new CompletionList after each keystroke.\nDocumentSymbolProvider The DocumentSymbolProvider service is responsible for handling a Document Symbols Request. The DocumentSymbolProvider is used to return a hierarchy of all symbols found in a document as an array of DocumentSymbol.\nHoverProvider The HoverProvider service is responsible for handling a Hover Request at a given text document position. By default, Langium implements tooltips with the content of the preceding multiline comment when hovering a symbol.\nFoldingRangeProvider The FoldingRangeProvider service is responsible for handling a Folding Range Request. This service identifies all the blocks that can be folded in a document.\nReferenceFinder The ReferenceFinder service is responsible for handling a Find References Request. This service is used to find all references to a given symbol inside of a document.\nDocumentHighlighter The DocumentHighlighter service is responsible for handling a Document Highlights Request. This service will find all references to a symbol at a given position (via the References service) and highlight all these references in a given document.\nRenameHandler The RenameHandler service is responsible for handling a Rename Request or a Prepare Rename Request. First, the service will check the validity of the Prepare Rename Request. If the request is valid, the service will find all references to the selected symbol inside of a document and replace all occurrences with the new value.\n"},{"id":18,"href":"/docs/features/","title":"Features","parent":"What is Langium?","content":"Designing programming languages from the ground up is hard, independent of whether your language is a \u0026ldquo;simple\u0026rdquo; domain specific language or a full-fledged general-purpose programming language. Not only do you have to keep up with the requirements of your domain experts, but you have to deal with all the technical complexity that comes with building a language, including questions such as:\n How do I get from a string to a semantic model which I can work with? How do I resolve references to other parts of my model, even if they are located in a separate file? How do I provide a great editing experience to users of my language? This is the point where Langium comes into play. Langium aims to lower the barrier to entry for creating a language by removing the technical complexity, allowing you to focus on your domain\u0026rsquo;s requirements.\nIn this chapter, you\u0026rsquo;ll get a closer look at the requirements developers usually have to implement by themselves when building a programming language:\n Language Parsing Semantic Models Cross References and Linking Workspace Management Editing Support Try it out Langium provides out-of-the-box solutions for these problems, with the ability to fine-tune every part of it to fit your domain requirements.\n Language Parsing Programming languages and domain specific languages (DSLs) cannot be parsed using simple regular expressions (RegExp). Instead they require a more sophisticated parsing strategy. To define a custom language in Langium, you interact with a high level representation of your context-free grammar using the Langium grammar language, in a similar fashion to EBNF.\nBased on the grammar, Langium is then able to construct a parser which transforms an input string into a semantic model representation. Just as the name suggests, this model captures the essential structure to describe your language.\nSemantic Models Langium grammars are not only used to parse your language, but also to generate a semantic model for your Language as TypeScript interfaces. When a program in your language is then parsed, the generated AST will be automatically produced using these interfaces. The following language snippet parses a simple object { name: 'John' }.\nPerson: 'person' // keyword name=ID // semantic assignment ; To interact with the semantic model in a type safe manner, the langium-cli tool generates TypeScript type definitions from your parser rules. The Person parser rule generates the following interface:\ninterface Person { name: String } These interfaces allow you to safely traverse your abstract syntax tree. In case your grammar changes, they will also notify you of any breaking changes which might require you to change your domain logic.\nCross References and Linking To express any kind of relationship between elements in your language, you will need to reference them. The process of resolving these references, i.e. identifying what element of your language hides behind a certain name, is called linking. Performing the linking process in a deterministic manner with a lot of objects in your project requires sound linking design.\nLangium accomplishes this feature by using the concept of \u0026lsquo;scoping\u0026rsquo;. You likely know scopes from programming, where some variables are only available from certain scopes:\nlet x = 42; x = 3; // References the `x` defined in the previous line if (something) { let y = 42; } y = 3; // Cannot link, `y` isn't in any of the available scopes The same occurs in Langium. To enable more complex scoping behavior, you can add custom domain scoping. For example, common object-oriented languages need a more involved scoping mechanism to resolve references to fields and methods of a class:\nclass X { y(): void { ... } } const instance = new X(); // Symbol `X` is in the local scope instance.y(); // Symbol `y` exists in the scope of the `X` class Once your domain specific scoping rules have been defined, Langium will take care of linking by itself, reporting any errors.\nWorkspace Management Like with common modularized programming languages, domain logic written in your DSL will usually be split across multiple files to facilitate ease of use and maintenance. This is also possible using Langium, which automatically tries to pick up any files belonging to your current project.\nWhen running a Langium based language in a language server, all files in your workspace (the folder containing your current project) belonging to your DSL will automatically be picked up and processed. In addition, any changes in your workspace will be handled as well. Dealing with added, changed or deleted files in a workspace with multiple hundreds of files can become complicated and decrease performance drastically if not done correctly. Langium employs heuristics to only invalidate and recompute what is actually necessary.\nThe workspace management also keeps track of the global scope. This allows users of your DSL to reference elements across files within the same workspace.\nEditing Support The Langium framework is deeply integrated with the language server protocol (LSP). The LSP aims to reduce integration efforts when designing a language by providing an interface that all IDEs can use to provide editing support. This allows Langium based languages to easily interact with common IDEs and editors with LSP support, including Visual Studio Code, Eclipse, IntelliJ and many more.\nThe LSP includes commonly used language features, such as code completion, custom validations/diagnostics, finding references, formatting and many more. This allows for deep IDE integration without binding your language to a single IDE. Langium offers out-of-the-box support for most of these language features, with additional extension points for your domain specific requirements.\nTry it out You can try out most of these features using our showcase and playground. The languages shown there are written using Langium and integrated in the monaco-editor.\nIf you\u0026rsquo;re interested in Langium, you can check out our learning page next. There you\u0026rsquo;ll learn how to get started writing your first language, and to learn more about how Langium can help you achieve your language designing goals.\n"},{"id":19,"href":"/docs/learn/minilogo/","title":"Minilogo tutorial","parent":"Learn Langium","content":"In this section you\u0026rsquo;ll find helpful tutorials aimed at teaching you how to implement Langium for concrete applications.\nThese tutorials revolve around the MiniLogo language, implemented in Langium. They describe how to implement MiniLogo from the ground up, and how to customize the various features of MiniLogo.\nBy working through each of these tutorials, you\u0026rsquo;ll be learning about:\n writing a grammar in Langium implementing validation for your language customizing a CLI for your tooling writing up a simple generator building a VSCode extension running Langium in the web with the Monaco editor implementing generation in the web By the end of this series, you should be equipped to start working on your own language, and also have a pretty good idea for how you can integrate Langium into other projects as well.\nWith that being said, hop on in to the first guide on Writing a Grammar in Langium!\n"},{"id":20,"href":"/docs/learn/workflow/scaffold/","title":"2. Scaffold a Langium project","parent":"Langium's workflow","content":"To create your first working DSL, execute the Yeoman generator:\n\u0026gt; yo langium ┌─────┐ ─┐ ┌───┐ │ ╶─╮ ┌─╮ ╭─╮ ╷ ╷ ╷ ┌─┬─╮ │ ,´ │ ╭─┤ │ │ │ │ │ │ │ │ │ │ │╱ ╰─ ╰─┘ ╵ ╵ ╰─┤ ╵ ╰─╯ ╵ ╵ ╵ ` ╶─╯ Welcome to Langium! This tool generates a VS Code extension with a \u0026quot;Hello World\u0026quot; language to get started quickly. The extension name is an identifier used in the extension marketplace or package registry. ❓ Your extension name: hello-world The language name is used to identify your language in VS Code. Please provide a name to be shown in the UI. CamelCase and kebab-case variants will be created and used in different parts of the extension and language server. ❓ Your language name: Hello World Source files of your language are identified by their file name extension. You can specify multiple file extensions separated by commas. ❓ File extensions: .hello Your language can be run inside of a VSCode extension. ❓ Include VSCode extension? Yes You can add CLI to your language. ❓ Include CLI? Yes You can run the language server in your web browser. ❓ Include Web worker? Yes You can add the setup for language tests using Vitest. ❓ Include language tests? Yes Yeoman will prompt you with a few basic questions about your DSL:\n Extension name: Will be used as the folder name of your extension and its package.json. Language name: Will be used as the name of the grammar and as a prefix for some generated files and service classes. File extensions: A comma separated list of file extensions for your DSL. The following questions are about the project parts you want to include in your project:\n VS Code extension: will be used to run your language inside of a VS Code extension. CLI: will add a CLI to your language. Web worker: will add the setup for running the language server in your web browser. Language tests: will add the setup for language tests. Afterwards, it will generate a new project and start installing all dependencies, including the langium framework as well as the langium-cli command line tool required for generating code based on your grammar definition.\nAfter everything has successfully finished running, open your newly created Langium project with vscode via the UI (File \u0026gt; Open Folder\u0026hellip;) or execute the following command, replacing hello-world with your chosen project name:\ncode hello-world Sneak peek using the VS Code extension Press F5 or open the debug view and start the available debug configuration to launch the extension in a new Extension Development Host window. Open a folder and create a file with your chosen file extension (.hello is the default). The hello-world language accepts two kinds of entities: The person and Hello entity. Here\u0026rsquo;s a quick example on how to use them both:\nperson Alice Hello Alice! person Bob Hello Bob! The file src/language/hello-world.langium in your newly created project contains your grammar.\n"},{"id":21,"href":"/showcase/arithmetics/","title":"Arithmetics","parent":"Langium Showcase","content":""},{"id":22,"href":"/docs/reference/document-lifecycle/","title":"Document Lifecycle","parent":"Reference","content":"LangiumDocument is the central data structure in Langium that represents a text file of your DSL. Its main purpose is to hold the parsed Abstract Syntax Tree (AST) plus additional information derived from it. After its creation, a LangiumDocument must be \u0026ldquo;built\u0026rdquo; before it can be used in any way. The service responsible for building documents is called DocumentBuilder.\nA LangiumDocument goes through seven different states during its lifecycle:\n Parsed when an AST has been generated from the content of the document. IndexedContent when the AST nodes have been processed by the IndexManager. ComputedScopes when local scopes have been prepared by the ScopeComputation. Linked when the Linker has resolved cross-references. IndexedReferences when the references have been indexed by the IndexManager. Validated when the document has been validated by the DocumentValidator. Changed when the document has been modified. State 1 is the initial state after creation of a document, and states 2 to 6 are part of its build process. State 7 is a final state used to mark the document as invalid due to a change in the source text.\nThe following diagram depicts how the DocumentBuilder processes LangiumDocuments depending on their state. More details about each step of the lifecycle can be found below.\n graph TD; N(LangiumDocumentFactory) -.-|Creation of LangiumDocuments| C{{Parsed}} A(DocumentBuilder) --|Indexing of symbols| D(IndexManager) -.- E{{IndexedContent}} A --|Computing scopes| F(ScopeComputation) -.- G{{ComputedScopes}} A --|Linking| H(Linker) -.- I{{Linked}} A --|Indexing of cross-references| J(IndexManager) -.- K{{IndexedReferences}} A --|Validation| L(DocumentValidator) -.- M{{Validated}} click N \"./#creation-of-langiumdocuments\" click D \"./#indexing-of-symbols\" click F \"./#computing-scopes\" click H \"./#linking\" click J \"./#indexing-of-cross-references\" click L \"./#validation\" Creation of LangiumDocuments When the workspace is initialized, all files having an extension matching those defined in langium-config.json are collected by the WorkspaceManager service. The LangiumDocumentFactory service creates a new instance of LangiumDocument for each source file. Those documents are then stored in memory by the LangiumDocuments service so they can be accessed later.\nFiles in the workspace are mapped to instances of TextDocument as implemented by the vscode-languageserver package. Such a TextDocument holds the content of the respective file as a string. In contrast, a LangiumDocument represents the file content as an AST. This means that the creation of a LangiumDocument by the LangiumDocumentFactory service is accompanied by the parsing of the content of a TextDocument into an AST. During the creation of a LangiumDocument (i.e. after the document has been parsed), its state is set to Parsed.\ngraph LR; A(LangiumDocuments\nmanages LangiumDocument instances) -- B(LangiumDocumentFactory\ncreates a LangiumDocument) B -- C(LangiumParser\nparses a string into an AST) Once all LangiumDocuments have been created, the DocumentBuilder service will sequentially process each LangiumDocument as described below.\nIndexing of Symbols Symbols are AST nodes that can be identified with a name and hence can be referenced from a cross-reference. Symbols that are exported can be referenced from other documents, while non-exported symbols are local to the document containing them. The IndexManager service keeps an index of all symbols that are exported from documents in the workspace. The set of all these exported symbols is called the global scope.\nIndexing of the exported symbols of an AST is executed on documents with the state Parsed. The default ScopeComputation service creates an AstNodeDescription for the root node (i.e. the node created by parsing the entry rule) and each named AstNode directly descending from the root node. This AstNodeDescription contains the type of the node, its identifier (i.e. the name property), the URI of the document where the node is located, and the location of the node inside of the document. The generated set of AstNodeDescriptions makes symbols from a LangiumDocument accessible to other documents in the same workspace.\nThe default ScopeComputation can be overridden to change the selection of exported symbols, or to export them with different names than the plain value of their name property. However, keep in mind that you cannot access any cross-references in this phase because that requires the document state to be at least ComputedScopes, which happens later in the build process.\nOnce the initial indexing is done, the document\u0026rsquo;s state is set to IndexedContent.\ngraph LR; A(IndexManager\nmanages exported content\nof LangiumDocuments) -- B(ScopeComputation\ncreates descriptions\nof all exported symbols) B -- C(NameProvider\nresolves the name of an AstNode) B -- D(AstNodeLocator\ngets the path of an AstNode) Computing Scopes This phase is executed on documents with the state IndexedContent and is required to complete prior to resolving cross-references.\nLocal scope computation consists of gathering all symbols contained in the AST, done by the ScopeComputation service (in addition to the indexing explained in the previous section). Metadata of the gathered symbols are represented with AstNodeDescription like in the initial indexing phase. These metadata are attached to the LangiumDocument in a multi-map structure that associates a (possibly empty) set of symbol descriptions to each container node of the AST, called the precomputed scopes. These are used in the linking phase to construct the actual scope of a cross-reference, i.e. all possible symbols that are reachable. A symbol in the precomputed scopes is reachable from a specific cross-reference if it is associated with a direct or indirect container of that reference. Symbols associated to the root node are reachable from the whole AST, while symbols associated with an inner node are reachable from the respective sub-tree.\nThe default implementation of the ScopeComputation service attaches the description of every symbol to its direct container. This means that the container holds information about which named nodes are nested inside of it. You can override this default behavior to change the position where a symbol is reachable, or to change the name by which it can be referenced. It is even possible to associate the same symbol to multiple container nodes, possibly with different names, to control precisely where and how references to it can be resolved. However, keep in mind that you cannot access any cross-references in this phase. More complex, context-dependent scope mechanisms can be implemented in the ScopeProvider (see next section).\nThe \u0026ldquo;Domainmodel\u0026rdquo; example includes a customization of scopes precomputation where every entity contained in a package declaration is exposed using its qualified name, that is the concatenation of the package name and entity name separated with . (similar to Java).\nIn languages with a type system, you would typically implement computation of types in an additional pre-processing step in order to make type information available in the document. This additional step can be registered to run after scope computation with the onBuildPhase method of DocumentBuilder. How types are computed heavily depends on the kind of type system, so there is no default implementation for it.\nOnce local scopes are computed and attached to the document, the document\u0026rsquo;s state is set to ComputedScopes.\ngraph LR; A(ScopeComputation\ngathers all symbols from the AST and\nstores their metadata in a MulitMap) -- B(NameProvider\nresolves the name of an AST node) A -- C(AstNodeDescriptionProvider\ncreates descriptions of the\ngathered symbols) C -- D(AstNodeLocator\ngets the path of an AstNode) Linking Once local scopes have been prepared, cross-references are resolved via the Linker service. The Linker retrieves all cross-references in a LangiumDocument and tries to resolve them. Reference resolution consists of three main steps:\n Query the ScopeProvider to obtain a scope. A scope describes all symbols that are reachable from the AST node holding the cross-reference. In the obtained scope, find the description of a symbol whose name matches the identifier given at the cross-reference. Load the AST node for that description. The AST node is given either directly (for a local symbol) or indirectly though a path string (for a symbol exported from another document). The default implementation of the ScopeProvider service creates a hierarchical scope by traveling from the given cross-reference via its container nodes up to the root of the AST, and collecting symbol descriptions from the precomputed scopes (created in the preceding phase). The symbols are filtered to match the type of the cross-reference target. Symbols that are closer to the cross-reference shadow those that are further above in the AST, which means they have higher priority to be chosen as cross-reference targets. As the last resort, the global scope computed in the initial indexing phase is included in the hierarchical scope. Symbols that cannot be found locally are looked up in the global scope.\nThe ScopeProvider can be overridden to implement complex scenarios for scoping and cross-reference resolution. Since cross-references can be linked lazily in this phase, it is possible to create a scope for a cross-reference depending on the resolved target of another cross-reference.\nOnce the linking is complete, the document\u0026rsquo;s state is set to Linked.\ngraph LR; A(Linker\nlinks references to their target AstNodes) -- B(ScopeProvider\ncreates a Scope for the context of a Reference) A -- C(AstNodeLocator\nresolves an AstNode from its path) Indexing of Cross-References Once the cross-references have been resolved by the linker, the IndexManager kicks in a second time to create descriptions of cross-references between different documents. Such a ReferenceDescription implies a dependency from its source document to its target document. This information ensures an efficient lookup to identify which other documents may be impacted by a change in a LangiumDocument.\nAfter the cross-references have been indexed, the document\u0026rsquo;s state is set to IndexedReferences.\ngraph LR; A(IndexManager\nmanages metadata of cross-references\nbetween documents) -- B(ReferenceDescriptionProvider\ncreates descriptions of all cross-references) B -- C(AstNodeLocator\ngets the path of an AstNode) Validation The DocumentValidator creates an array of Diagnostics from a LangiumDocument. This array contains all errors that have occurred during lexing, parsing, and linking, and the results of a set of custom validations with varying severity (error, warning, info). The custom validations are registered with the ValidationRegistry service.\nAfter the diagnostics have been created, the document\u0026rsquo;s state is set to Validated.\ngraph LR; A(DocumentValidator\ntranslate parser and linker errors to Diagnostics,\nand executes custom validation checks) -- B(ValidationRegistry\nmanages custom validation checks for each AST node type) At this point, all documents have been processed by the DocumentBuilder and the workspace is ready to process requests from the editor (e.g. completion).\nModifications of a document When a TextDocument is modified, the language client (IDE) notifies the language server, which triggers corresponding events. In Langium, a change in a TextDocument\u0026rsquo;s content leads to the invalidation of the associated LangiumDocument. The document\u0026rsquo;s state is set to Changed and the document\u0026rsquo;s entry is removed from the LangiumDocuments service. If the TextDocument was deleted, the corresponding LangiumDocument is removed from the index in the IndexManager service. If the document\u0026rsquo;s content was modified, a new instance of LangiumDocument is created as described above. All other documents that may have been affected as a result of the modification get their references unlinked and their state is modified such that they run through the linking phase again. The DocumentBuilder then processed the newly created document along with all other documents that have not reached the Validated state yet.\nTo determine which documents are affected by a change, the IndexManager uses the reference descriptions gathered in the reference indexing phase.\n"},{"id":23,"href":"/docs/recipes/scoping/file-based/","title":"File-based scoping","parent":"Scoping","content":"Goal By default, Langium will always expose all top-level AST elements to the global scope. That means they are visible to all other documents in your workspace. However, a lot of languages are better served with a JavaScript-like import/export mechanism:\n Using export makes a symbol from the current file available for referencing from another file. Using import allows to reference symbols for a different file. To make things easier I will modify the \u0026ldquo;Hello World\u0026rdquo; example from the learning section.\nStep 1: Change the grammar First off, we are changing the grammar to support the export and the import statements. Let\u0026rsquo;s take a look at the modified grammar:\ngrammar HelloWorld entry Model: ( fileImports+=FileImport //NEW: imports per file | persons+=Person | greetings+=Greeting )*; FileImport: //NEW: imports of the same file are gathered in a list 'import' '{' personImports+=PersonImport (',' personImports+=PersonImport)* '}' 'from' file=STRING; PersonImport: person=[Person:ID] ('as' name=ID)?; Person: published?='export'? 'person' name=ID; //NEW: export keyword type Greetable = PersonImport | Person Greeting: 'Hello' person=[Greetable:ID] '!'; hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w_]*/; terminal STRING: /\u0026quot;(\\\\.|[^\u0026quot;\\\\])*\u0026quot;|'(\\\\.|[^'\\\\])*'/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; After changing the grammar you need to regenerate the abstract syntax tree (AST) and the language infrastructure. You can do that by running the following command:\nnpm run langium:generate Step 2: Exporting persons to the global scope The index manager shall get all persons that are marked with the export keyword. In Langium this is done by overriding the ScopeComputation.getExports(…) function. Here is the implementation:\nexport class HelloWorldScopeComputation extends DefaultScopeComputation { override async computeExports(document: LangiumDocument\u0026lt;AstNode\u0026gt;): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const model = document.parseResult.value as Model; return model.persons .filter(p =\u0026gt; p.published) .map(p =\u0026gt; this.descriptions.createDescription(p, p.name)); } } After that, you need to register the HelloWorldScopeComputation in the HelloWorldModule:\nexport const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { //... references: { ScopeComputation: (services) =\u0026gt; new HelloWorldScopeComputation(services) } }; Having done this, will make all persons that are marked with the export keyword available to the other files through the index manager.\nStep 3: Importing from specific files The final step is to adjust the cross-reference resolution by overriding the DefaultScopeProvider.getScope(…) function:\nexport class HelloWorldScopeProvider extends DefaultScopeProvider { override getScope(context: ReferenceInfo): Scope { switch(context.container.$type as keyof HelloWorldAstType) { case 'PersonImport': if(context.property === 'person') { return this.getExportedPersonsFromGlobalScope(context); } break; case 'Greeting': if(context.property === 'person') { return this.getImportedPersonsFromCurrentFile(context); } break; } return EMPTY_SCOPE; } //... } Do not forget to add the new service to the HelloWorldModule:\nexport const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { //... references: { ScopeComputation: (services) =\u0026gt; new HelloWorldScopeComputation(services), ScopeProvider: (services) =\u0026gt; new HelloWorldScopeProvider(services) //NEW! } }; You noticed the two missing functions? Here is what they have to do.\nThe first function (getExportedPersonsFromGlobalScope(context)) will take a look at the global scope and return all exported persons respecting the files that were touched by the file imports. Note that we are outputting all persons that are marked with the export keyword. The actual name resolution is done internally later by the linker.\nprivate getExportedPersonsFromGlobalScope(context: ReferenceInfo): Scope { //get document for current reference const document = AstUtils.getDocument(context.container); //get model of document const model = document.parseResult.value as Model; //get URI of current document const currentUri = document.uri; //get folder of current document const currentDir = dirname(currentUri.path); const uris = new Set\u0026lt;string\u0026gt;(); //for all file imports of the current file for (const fileImport of model.fileImports) { //resolve the file name relatively to the current file const filePath = join(currentDir, fileImport.file); //create back an URI const uri = currentUri.with({ path: filePath }); //add the URI to URI list uris.add(uri.toString()); } //get all possible persons from these files const astNodeDescriptions = this.indexManager.allElements(Person, uris).toArray(); //convert them to descriptions inside of a scope return this.createScope(astNodeDescriptions); } The second function (getImportedPersonsFromCurrentFile(context)) will take a look at the current file and return all persons that are imported from other files.\nprivate getImportedPersonsFromCurrentFile(context: ReferenceInfo) { //get current document of reference const document = AstUtils.getDocument(context.container); //get current model const model = document.parseResult.value as Model; //go through all imports const descriptions = model.fileImports.flatMap(fi =\u0026gt; fi.personImports.map(pi =\u0026gt; { //if the import is name, return the import if (pi.name) { return this.descriptions.createDescription(pi, pi.name); } //if import references to a person, return that person if (pi.person.ref) { return this.descriptions.createDescription(pi.person.ref, pi.person.ref.name); } //otherwise return nothing return undefined; }).filter(d =\u0026gt; d != undefined)).map(d =\u0026gt; d!); return this.createScope(descriptions); } Result Now, let\u0026rsquo;s test the editor by npm run build and starting the extension. Try using these two files. The first file contains the Simpsons family.\nexport person Homer export person Marge person Bart person Lisa export person Maggy The second file tries to import and greet them.\nimport { Marge, Homer, Lisa, //reference error, because not exported Maggy as Baby } from \u0026quot;persons.hello\u0026quot; Hello Lisa! //reference error, because no valid import Hello Maggy! //reference error, because name was overwritten with 'Baby' Hello Homer! Hello Marge! Hello Baby! Full Implementation import { AstNode, AstNodeDescription, AstUtils, DefaultScopeComputation, DefaultScopeProvider, EMPTY_SCOPE, LangiumDocument, ReferenceInfo, Scope } from \u0026quot;langium\u0026quot;; import { CancellationToken } from \u0026quot;vscode-languageclient\u0026quot;; import { HelloWorldAstType, Model, Person } from \u0026quot;./generated/ast.js\u0026quot;; import { dirname, join } from \u0026quot;node:path\u0026quot;; export class HelloWorldScopeComputation extends DefaultScopeComputation { override async computeExports(document: LangiumDocument\u0026lt;AstNode\u0026gt;): Promise\u0026lt;AstNodeDescription[]\u0026gt; { const model = document.parseResult.value as Model; return model.persons .filter(p =\u0026gt; p.published) .map(p =\u0026gt; this.descriptions.createDescription(p, p.name)) ; } } export class HelloWorldScopeProvider extends DefaultScopeProvider { override getScope(context: ReferenceInfo): Scope { switch(context.container.$type as keyof HelloWorldAstType) { case 'PersonImport': if(context.property === 'person') { return this.getExportedPersonsFromGlobalScope(context); } break; case 'Greeting': if(context.property === 'person') { return this.getImportedPersonsFromCurrentFile(context); } break; } return EMPTY_SCOPE; } protected getExportedPersonsFromGlobalScope(context: ReferenceInfo): Scope { //get document for current reference const document = AstUtils.getDocument(context.container); //get model of document const model = document.parseResult.value as Model; //get URI of current document const currentUri = document.uri; //get folder of current document const currentDir = dirname(currentUri.path); const uris = new Set\u0026lt;string\u0026gt;(); //for all file imports of the current file for (const fileImport of model.fileImports) { //resolve the file name relatively to the current file const filePath = join(currentDir, fileImport.file); //create back an URI const uri = currentUri.with({ path: filePath }); //add the URI to URI list uris.add(uri.toString()); } //get all possible persons from these files const astNodeDescriptions = this.indexManager.allElements(Person, uris).toArray(); //convert them to descriptions inside of a scope return this.createScope(astNodeDescriptions); } private getImportedPersonsFromCurrentFile(context: ReferenceInfo) { //get current document of reference const document = AstUtils.getDocument(context.container); //get current model const model = document.parseResult.value as Model; //go through all imports const descriptions = model.fileImports.flatMap(fi =\u0026gt; fi.personImports.map(pi =\u0026gt; { //if the import is name, return the import if (pi.name) { return this.descriptions.createDescription(pi, pi.name); } //if import references to a person, return that person if (pi.person.ref) { return this.descriptions.createDescription(pi.person.ref, pi.person.ref.name); } //otherwise return nothing return undefined; }).filter(d =\u0026gt; d != undefined)).map(d =\u0026gt; d!); return this.createScope(descriptions); } } "},{"id":24,"href":"/docs/recipes/formatting/","title":"Formatting","parent":"Recipes","content":"Langium\u0026rsquo;s formatting API allows to easily create formatters for your language. We start building a custom formatter for our language by creating a new class that inherits from AbstractFormatter.\nimport { AbstractFormatter, AstNode, Formatting } from 'langium'; export class CustomFormatter extends AbstractFormatter { protected format(node: AstNode): void { // This method is called for every AstNode in a document } } ... // Bind the class in your module export const CustomModule: Module\u0026lt;CustomServices, PartialLangiumServices\u0026gt; = { lsp: { Formatter: () =\u0026gt; new CustomFormatter() } }; The entry point for the formatter is the abstract format(AstNode) method. The AbstractFormatter calls this method for every node of our model. To perform custom formatting for every type of node, we will use pattern matching. In the following example, we will take a closer look at a formatter for the domain-model language. In particular, we will see how we can format the root of our model (DomainModel) and each nested element (Entity and PackageDeclaration).\nTo format each node, we use the getNodeFormatter method of the AbstractFormatter. The resulting generic NodeFormatter\u0026lt;T extends AstNode\u0026gt; provides us with methods to select specific parts of a parsed AstNode such as properties or keywords.\nOnce we have selected the nodes of our document that we are interested in formatting, we can start applying a specific formatting. Each formatting option allows to prepend/append whitespace to each note. The Formatting namespace provides a few predefined formatting options which we can use for this:\n newLine Adds one newline character (while preserving indentation). newLines Adds a specified amount of newline characters. indent Adds one level of indentation. Automatically also adds a newline character. noIndent Removes all indentation. oneSpace Adds one whitespace character. spaces Adds a specified amount of whitespace characters. noSpace Removes all spaces. fit Tries to fit the existing text into one of the specified formattings. We first start off by formatting the Domainmodel element of our DSL. It is the root node of every document and just contains a list of other elements. These elements need to be realigned to the root of the document in case they are indented. We will use the Formatting.noIndent options for that:\nif (ast.isDomainmodel(node)) { // Create a new node formatter const formatter = this.getNodeFormatter(node); // Select a formatting region which contains all children const nodes = formatter.nodes(...node.elements); // Prepend all these nodes with no indent nodes.prepend(Formatting.noIndent()); } Our other elements, namely Entity and PackageDeclaration, can be arbitrarily deeply nested, so using noIndent is out of the question for them. Instead we will use indent on everything between the { and } tokens. The formatter internally keeps track of the current indentation level:\nif (ast.isEntity(node) || isPackageDeclaration(node)) { const formatter = this.getNodeFormatter(node); const bracesOpen = formatter.keyword('{'); const bracesClose = formatter.keyword('}'); // Add a level of indentation to each element // between the opening and closing braces. // This even includes comment nodes formatter.interior(bracesOpen, bracesClose).prepend(Formatting.indent()); // Also move the newline to a closing brace bracesClose.prepend(Formatting.newLine()); // Surround the name property of the element // With one space to each side formatter.property(\u0026quot;name\u0026quot;).surround(Formatting.oneSpace()); } Note that most predefined Formatting methods accept additional arguments which make the resulting formatting more lenient. For example, the prepend(newLine({ allowMore: true })) formatting will not apply formatting in case the node is already preceeded by one or more newlines. It will still correctly indent the node in case the indentation is not as expected.\n Full Code Sample import { AbstractFormatter, AstNode, Formatting } from 'langium'; import * as ast from './generated/ast'; export class DomainModelFormatter extends AbstractFormatter { protected format(node: AstNode): void { if (ast.isEntity(node) || ast.isPackageDeclaration(node)) { const formatter = this.getNodeFormatter(node); const bracesOpen = formatter.keyword('{'); const bracesClose = formatter.keyword('}'); formatter.interior(bracesOpen, bracesClose).prepend(Formatting.indent()); bracesClose.prepend(Formatting.newLine()); formatter.property('name').surround(Formatting.oneSpace()); } else if (ast.isDomainmodel(node)) { const formatter = this.getNodeFormatter(node); const nodes = formatter.nodes(...node.elements); nodes.prepend(Formatting.noIndent()); } } } "},{"id":25,"href":"/docs/recipes/lexing/indentation-sensitive-languages/","title":"Indentation-sensitive languages","parent":"Lexing","content":"Some programming languages (such as Python, Haskell, and YAML) use indentation to denote nesting, as opposed to special non-whitespace tokens (such as { and } in C++/JavaScript). This can be difficult to express in the EBNF notation used for defining a language grammar in Langium, which is context-free. To achieve that, you can make use of synthetic tokens in the grammar which you would then redefine using Chevrotain in a custom token builder.\nStarting with Langium v3.2, such token builder (and an accompanying lexer) are provided for easy plugging into your language. They work by modifying the underlying Chevrotain token generated for your indentation terminal tokens to use a custom matcher function instead that has access to more context than simple Regular Expressions, allowing it to store state and detect changes in indentation levels. This is why you should provide it with the names of the tokens you used to denote indentation: so it can override the correct tokens for your grammar.\nConfiguring the token builder and lexer To be able to use the indendation tokens in your grammar, you first have to import and register the IndentationAwareTokenBuilder and IndentationAwareLexer services in your module as such:\nimport { IndentationAwareTokenBuilder, IndentationAwareLexer } from 'langium'; // ... export const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { // ... parser: { TokenBuilder: () =\u0026gt; new IndentationAwareTokenBuilder(), Lexer: (services) =\u0026gt; new IndentationAwareLexer(services), }, }; // ... The IndentationAwareTokenBuilder constructor optionally accepts an object defining the names of the tokens you used to denote indentation and whitespace in your .langium grammar file, as well as a list of delimiter tokens inside of which indentation should be ignored. It defaults to:\n{ indentTokenName: 'INDENT', dedentTokenName: 'DEDENT', whitespaceTokenName: 'WS', ignoreIndentationDelimiters: [], } Ignoring indentation between specific tokens Sometimes, it is necessary to ignore any indentation token inside some expressions, such as with tuples and lists in Python. For example, in the following statement:\nx = [ 1, 2 ] any indentation between [ and ] should be ignored.\nTo achieve similar behavior with the IndentationAwareTokenBuilder, the ignoreIndentationDelimiters option can be used. It accepts is a list of pairs of token names (terminal or keyword) and turns off indentation token detection between each pair.\nFor example, if you construct the IndentationAwareTokenBuilder with the following options:\nnew IndentationAwareTokenBuilder({ ignoreIndentationDelimiters: [ ['[', ']'], ['(', ')'], ], }) then no indentation tokens will be emitted between either of those pairs of tokens.\nConfiguration options type safety The IndentationAwareTokenBuilder supports generic type parameters to improve type-safety and IntelliSense of its options. This helps detect when a token name has been mistyped or changed in the grammar. The first generic parameter corresponds to the names of terminal tokens, while the second one corresonds to the names of keyword tokens. Both parameters are optional and can be imported from ./generated/ast.js and used as such:\nimport { MyLanguageTerminalNames, MyLanguageKeywordNames } from './generated/ast.js'; import { IndentationAwareTokenBuilder, IndentationAwareLexer } from 'langium'; // ... export const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { parser: { TokenBuilder: () =\u0026gt; new IndentationAwareTokenBuilder\u0026lt;MyLanguageTerminalNames, MyLanguageKeywordNames\u0026gt;({ ignoreIndentationDelimiters: [ ['L_BRAC', 'R_BARC'], // \u0026lt;-- This typo will now cause a TypeScript error ] }), Lexer: (services) =\u0026gt; new IndentationAwareLexer(services), }, }; Writing the grammar In your langium file, you have to define terminals with the same names you passed to IndentationAwareTokenBuilder (or the defaults shown above if you did not override them). For example, let\u0026rsquo;s define the grammar for a simple version of Python with support for only if and return statements, and only booleans as expressions:\ngrammar PythonIf entry Statement: If | Return; If: 'if' condition=BOOLEAN ':' INDENT thenBlock+=Statement+ DEDENT ('else' ':' INDENT elseBlock+=Statement+ DEDENT)?; Return: 'return' value=BOOLEAN; terminal BOOLEAN returns boolean: /true|false/; terminal INDENT: 'synthetic:indent'; terminal DEDENT: 'synthetic:dedent'; hidden terminal WS: /[\\t ]+/; hidden terminal NL: /[\\r\\n]+/; The important terminals here are INDENT, DEDENT, and WS. INDENT and DEDENT are used to delimit a nested block, similar to { and } (respectively) in C-like languages. Note that INDENT indicates an increase in indentation, not just the existence of leading whitespace, which is why in the example above we used it only at the beginning of the block, not before every Statement. Additionally, the separation of WS from simply \\s+ to [\\t ]+ and [\\r\\n]+ is necessary because a simple \\s+ will match the new line character, as well as any possible indentation after it. To ensure correct behavior, the token builder modifies the pattern of the whitespaceTokenName token to be [\\t ]+, so a separate hidden token for new lines needs to be explicitly defined.\nThe content you choose for these 3 terminals doesn\u0026rsquo;t matter since it will overridden by IndentationAwareTokenBuilder anyway. However, you might still want to choose tokens that don\u0026rsquo;t overlap with other terminals for easier use in the playground.\nWith the default configuration and the grammar above, for the following code sample:\nif true: return false else: if true: return true the lexer will output the following sequence of tokens: if, BOOLEAN, INDENT, return, BOOLEAN, DEDENT, else, INDENT, if, BOOLEAN, INDENT, return, BOOLEAN, DEDENT, DEDENT.\n"},{"id":26,"href":"/docs/recipes/keywords-as-identifiers/","title":"Keywords as Identifiers","parent":"Recipes","content":"As you write your grammar, you will add keywords such as var, get or function to improve the readability of your language and to add structure. These keywords get a special keyword highlighting whenever they are used, by default even at unintended locations according to your grammar, and are handled separately from other terminals such as names, identifiers, numbers and so on. You will quickly notice that a function such as function get() will lead to parser errors by default, as get is identified as keyword and not as identifier. This guide is all about how to explicitly enable these keywords (highlighted in blue) to be supported as identifiers (highlighted in white) as well.\nLet\u0026rsquo;s look at the \u0026ldquo;hello-world\u0026rdquo; example in the playground or as a new local project created with yo langium (for details, how to set up your first Langium project, read getting started):\nHere, it is not possible to introduce a person whose name is \u0026ldquo;Hello\u0026rdquo;, since Hello is a dedicated keyword of the language. Additionally, we cannot greet a person called \u0026ldquo;Hello\u0026rdquo; as well. The same applies to the keyword \u0026ldquo;person\u0026rdquo;, but let\u0026rsquo;s focus on enabling \u0026ldquo;Hello\u0026rdquo; as name for persons.\nTo enable keywords as identifiers, you need to apply the following three steps:\n Step 1: Modify the grammar to explicitly parse keywords as property values Step 2: Change the semantic type of the resulting token Step 3: Ensure that your editor styles the chosen semantic token type Step 1: Modify the grammar to explicitly parse keywords as property values The first step is to modify the grammar to explicitly parse keywords as property values. At the moment, the parser rule for introducing persons looks like this:\nPerson: 'person' name=ID; terminal ID: /[_a-zA-Z][\\w_]*/; Note that the terminal rule for ID already covers the string \u0026ldquo;Hello\u0026rdquo;. However, since the parser rule for greeting persons uses \u0026ldquo;Hello\u0026rdquo; as keyword, the keyword takes precedence:\nGreeting: 'Hello' person=[Person:ID] '!'; Roughly summarized, the background for this behaviour is that Langium\u0026rsquo;s internally used LL(k) parser implementation named Chevrotain first does lexing/tokenizing, i.e. splitting text into single tokens, such as keywords, identifiers and delimiters. The actual parsing, i.e. the application of the parser rules, is performed afterwards on these tokens. Chevrotain uses regular expressions (regex) for splitting text into tokens. Since keywords are implemented as regex as well and take precedence, all occurrences of \u0026ldquo;Hello\u0026rdquo; are treated as keywords for the parser rule named Greeting, even a \u0026ldquo;Hello\u0026rdquo; intented to be a name, which finally causes the two syntax errors.\nIn order to explicitly enable parsing \u0026ldquo;Hello\u0026rdquo; as name as well, modify the parser rule for persons in this way:\nPerson: 'person' name=(ID | 'Hello'); terminal ID: /[_a-zA-Z][\\w_]*/; // the terminal rule for ID is unchanged! Now Langium knows that keyword \u0026ldquo;Hello\u0026rdquo; may also occur as value for the name property of the parser rule for persons. That\u0026rsquo;s it! (Don\u0026rsquo;t forget to run npm run langium:generate after updating the grammar.)\nSince the name property is used for cross-references by the parser rule for greetings, \u0026ldquo;Hello\u0026rdquo; needs to be supported here as well. For that we recommend to introduce a data type rule like \u0026ldquo;PersonID\u0026rdquo; in the example, since it makes it easier to support more keywords in the future:\nPerson: 'person' name=PersonID; Greeting: 'Hello' person=[Person:PersonID] '!'; PersonID returns string: ID | 'Hello'; Now, your editor accepts \u0026ldquo;Hello\u0026rdquo; as value for persons' names. Nevertheless, the name \u0026ldquo;Hello\u0026rdquo; still is highlighted in blue and looks like a keyword \u0026ldquo;Hello\u0026rdquo;. This leads us to the second step.\nStep 2: Change the semantic type of the resulting token The second step is to change the semantic type of the resulting token in order to adjust the highlighting in the editor: While parsing text with Langium is done in a language server, the highlighting is done in editors (the language clients). Editors like VS Code usually use syntax highlighting basing on the tokenized text. This highlighting can be complemented by semantic highlighting with additional semantic types for the tokens from the language server.\nIn case of Langium and VS Code, VS Code uses by default TextMate grammars, which can be seen as collections of regex (and which is generated by npm run langium:generate), to split the text into tokens and to assign a (syntactic) type to these tokens. The color for highlighting the token is chosen depending on the assigned type. In the example, a regex for the \u0026ldquo;Hello\u0026rdquo; keyword matches all strings \u0026ldquo;Hello\u0026rdquo; in text, resulting in the blue color even for \u0026ldquo;Hello\u0026rdquo; used as name.\nSince Langium applies the parser rules to the token stream, Langium is able to distinguish \u0026ldquo;Hello\u0026rdquo; tokens used as keyword for greetings and \u0026ldquo;Hello\u0026rdquo; tokens used as name for persons and therefore is able to assign different semantic types to \u0026ldquo;Hello\u0026rdquo; tokens for persons and for greetings. According to the Language Server Protocol (LSP), these semantic token types are sent to editors like VS Code, which complement the syntactic types of tokens with these semantic types. The default highlighting of tokens according to the syntactic type is then altered according to the semantic token type and the color theme selected by the user (editor preferences).\nIn Langium, the SemanticTokenProvider service is responsible for assigning language-dependent semantic types to tokens. Therefore, we customize the default semantic token provider like this:\nimport { AbstractSemanticTokenProvider, AstNode, SemanticTokenAcceptor } from 'langium'; import { isPerson } from './generated/ast.js'; import { SemanticTokenTypes } from 'vscode-languageserver'; export class HelloWorldSemanticTokenProvider extends AbstractSemanticTokenProvider { protected override highlightElement(node: AstNode, acceptor: SemanticTokenAcceptor): void { if (isPerson(node)) { acceptor({ node, property: 'name', type: SemanticTokenTypes.class }); } } } For all persons (isPerson(...) in line 7), we explicitly specify the semantic type for the token of their 'name' property. Here, we use SemanticTokenTypes.class as semantic type. For your case, select a predefined type which fits your domain best. Since the name is used as cross-reference by greetings, a similar check and assignment of a semantic token type needs to be done for the person property of Greeting as well.\nAfter creating the semantic token provider, you need to register the HelloWorldSemanticTokenProvider in hello-world-module.ts in the following way:\nexport const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { // ... lsp: { SemanticTokenProvider: (services) =\u0026gt; new HelloWorldSemanticTokenProvider(services) } }; Now rebuild and restart your application and test the improvements of the second step:\nThe HelloWorldSemanticTokenProvider works, and you might see a different highlighting XOR you might not see any difference, e.g. \u0026ldquo;Hello\u0026rdquo; is still blue here. This leads us to the third step.\nStep 3: Ensure that your editor styles the chosen semantic token type The third step is to ensure that your editor supports the assigned semantic tokens: Depending on your editor and the currently selected color theme, the semantic token type selected in HelloWorldSemanticTokenProvider might not be supported or didn\u0026rsquo;t get a different color in the color theme. The easiest way to detect such problems is to change the current color theme and to try some others. Note that VS Code allows to switch off semantic highlighting for all themes with the setting editor.semanticHighlighting.enabled.\nAfter switching from \u0026ldquo;Dark (Visual Studio)\u0026rdquo; to \u0026ldquo;Dark Modern\u0026rdquo; in VS Code, the example looks as expected. You can switch the current color theme in VS Code with cmd + K cmd + T (or via the menu: Code -\u0026gt; Settings\u0026hellip; -\u0026gt; Theme -\u0026gt; Color Theme).\nNow \u0026ldquo;Hello\u0026rdquo; is highlighted in purple if used as keyword, it\u0026rsquo;s written in green if it is used as value for the name of a person. Another solution is to select a different semantic type for your token in step two.\nWhile step one is mandatory to enable keywords as values in general, step two improves the user experience of your language. While step one and step two can be handled in the LSP server once for your language, step three highly depends on your editor and its color themes (in the LSP clients), which makes step three quite complicated to handle.\nNow you have learned how to enable keywords as regular values for properties. Feel free to enable the keyword \u0026ldquo;person\u0026rdquo; as name for persons in the example on your own.\nWord to the wise: Enabling certain strings to be used interchangeably as keywords and identifiers/values is possible, but has some costs. It always needs to be evaluated per case, whether accepting the costs is required and worth it. Additionally, using keywords as identifiers impacts the user experience, therefore, involve the users of your language!\nSome hints beyond this guide:\n In multi-grammar projects, only keywords of the included grammars are affected by this general problem, but not keywords of other languages or Langium grammar files. In order to get an overview about the keywords of your language, have a look into the generated TextMate grammar *.tmLanguage.json and search for the pattern named keyword.control.*, which contains a regex with the keywords. Read about the concept of semantic tokens in the Language Server Protocol (LSP) including predefined semantic types for tokens. Read about how VS Code realizes semantic highlighting using semantic tokens. Dive into tokenizing of Chevrotain with regex. "},{"id":27,"href":"/showcase/","title":"Langium Showcase","parent":"Langium","content":"Showcase Welcome to Langium's showcase! Here you can find examples of languages created using Langium, all running in the browser (no backend involved). "},{"id":28,"href":"/showcase/minilogo/","title":"MiniLogo","parent":"Langium Showcase","content":""},{"id":29,"href":"/docs/reference/","title":"Reference","parent":"Documentation","content":"This section contains the reference documentation for Langium.\nWhere to go from here? Glossary If you are looking for a specific term or concept, you can find it in the glossary.\nGrammar language If you are looking for a specific grammar language feature, you can find it in the grammar language.\nArchitecture If you are looking for a specific architecture feature, here are some nice readings:\n Configuration via services Document lifecycle Semantic model "},{"id":30,"href":"/docs/introduction/showcases/","title":"Showcases","parent":"What is Langium?","content":""},{"id":31,"href":"/docs/learn/workflow/write_grammar/","title":"3. Write the grammar","parent":"Langium's workflow","content":"Your Langium project is now setup and ready to be used. The next step is to define the grammar of your language. The grammar is the most important part of your language definition. It defines the syntax of your language and how the language elements are structured.\nThe grammar is defined in a .langium file. Make sure that you have installed the VS Code extension for Langium. This extension provides syntax highlighting and code completion for .langium files. Here\u0026rsquo;s the grammar from the Hello-World example that was generated by the Yeoman generator:\ngrammar HelloWorld hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w]*/; entry Model: (persons+=Person | greetings+=Greeting)*; Person: 'person' name=ID; Greeting: 'Hello' person=[Person] '!'; Let\u0026rsquo;s go through this one by one:\ngrammar HelloWorld Before we tell Langium anything about our grammar contents, we first need to give it a name - in this case it\u0026rsquo;s HelloWorld. The langium-cli will pick this up to prefix any generated services with this name.\nhidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w]*/; Here we define our two needed terminals for this grammar: The whitespace WS and identifier ID terminals. Terminals parse a part of our document by matching it against their regular expression. The WS terminal parses any whitespace characters with the regex /\\s+/. This allows us to consume whitespaces in our document. As the terminal is declared as hidden, the parser will parse any whitespace and discard the results. That way, we don\u0026rsquo;t have to care about how many whitespaces a user uses in their document. Secondly, we define our ID terminal. It parses any string that starts with an underscore or letter and continues with any amount of characters that match the \\w regex token. It will match Alice, _alice, or _al1c3 but not 4lice or #alice. Langium is using the JS regex dialect for terminal definitions.\nentry Model: (persons+=Person | greetings+=Greeting)*; The Model parser rule is the entry point to our grammar. Parsing always starts with the entry rule. Here we define a repeating group of alternatives: persons+=Person | greetings+=Greeting. This will always try to parse either a Person or a Greeting and add it to the respective list of persons or greetings in the Model object. Since the alternative is wrapped in a repeating group *, the parser will continue until all input has been consumed.\nPerson: 'person' name=ID; The Person rule starts off with the 'person' keyword. Keywords are like terminals, in the sense that they parse a part of the document. The set of keywords and terminals create the tokens that your language is able to parse. You can imagine that the 'person' keyword here is like an indicator to tell the parser that an object of type Person should be parsed. After the keyword, we assign the Person a name by parsing an ID.\nGreeting: 'Hello' person=[Person] '!'; Like the previous rule, the Greeting starts with a keyword. With the person assignment we introduce the cross reference, indicated by the brackets []. A cross reference will allow your grammar to reference other elements that are contained in your file or workspace. By default, Langium will try to resolve this cross reference by parsing the terminal that is associated with its name property. In this case, we are looking for a Person whose name property matches the parsed ID.\nThat finishes the short introduction to Langium! Feel free to play around with the grammar and use npm run langium:generate to regenerate the generated TypeScript files. To go further, we suggest that you continue with our tutorials.\n"},{"id":32,"href":"/docs/recipes/multiple-languages/","title":"Multiple dependent languages","parent":"Recipes","content":"This guide is about integrating multiple dependent languages in one Langium project.\nOne common situation where it makes sense to create dependent languages is when you only want to read concepts in one language and predefine them in another file (probably also a built-in one). Think of splitting SQL into a defining CREATE TABLE table (...)) and a reading part (SELECT * FROM table).\n Notice that for n independent languages, you can simply create n independent Langium projects.\n If you want to see a living example, I recommend to visit the requirements example of the main Langium repository.\nOur plan The entire change touches several files. Let\u0026rsquo;s summarize what needs to be done:\n the grammar (the *.langium file) needs to be split into the three parts that were discussed above the Langium configuration (the langium-config.json file in the Langium project root) needs to split the language configuration into three parts the module file of your language (XXX-module.ts) needs to create the new language services as well. Last, but not least, you have to cleanup all dependent files. Here we can give general hints. if you have a VSCode extension the package.json needs to be adapted the extension entry point file (src/extension/main.ts) needs to be changed slightly Our scenario To keep this guide easy, I will use the hello-world project of the learning section.\nLet’s imagine that we have three languages:\n the first language defines persons the second language greets persons of the first language the third language configures which person you are Just as a finger practice, let\u0026rsquo;s require that you cannot greet yourself.\n flowchart Implementation --|requires| Definition Configuration --|requires| Definition Implementation --|requires| Configuration Let\u0026rsquo;s start Grammar The most relevant change might be in the grammar. Here is the original grammar from the hello-world example, which is generated by Langium\u0026rsquo;s Yeoman generator:\ngrammar MultipleLanguages entry Model: (persons+=Person | greetings+=Greeting)*; Person: 'person' name=ID; Greeting: 'Hello' person=[Person:ID] '!'; hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w_]*/; terminal INT returns number: /[0-9]+/; terminal STRING: /\u0026quot;(\\\\.|[^\u0026quot;\\\\])*\u0026quot;|'(\\\\.|[^'\\\\])*'/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; Now, split it into three new files (let\u0026rsquo;s call the entry rules units and the files we can name like multiple-languages-(configuration|definition|implementation).langium):\nOur definition grammar:\ngrammar MultiDefinition entry DefinitionUnit: (persons+=Person)*; Person: 'person' name=ID; hidden terminal WS: /\\s+/; terminal ID: /[_a-zA-Z][\\w_]*/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; Our configuration grammar (note the import):\ngrammar MultiConfiguration import \u0026quot;multiple-languages-definition\u0026quot;; entry ConfigurationUnit: 'I' 'am' who=[Person:ID] '.'; Our implementation grammar (note the import again):\ngrammar MultiImplementation import \u0026quot;multiple-languages-definition\u0026quot;; entry ImplementationUnit: (greetings+=Greeting)*; Greeting: 'Hello' person=[Person:ID] '!'; Langium configuration Splitting the grammar alone is not sufficient to generate anything using the CLI. You need to change the langium-config.json in the root folder as well. Let\u0026rsquo;s make it happen!\nThe initial version of this file was:\n{ \u0026quot;projectName\u0026quot;: \u0026quot;MultipleLanguages\u0026quot;, \u0026quot;languages\u0026quot;: [{ \u0026quot;id\u0026quot;: \u0026quot;multiple-languages\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages.monarch.ts\u0026quot; } }], \u0026quot;out\u0026quot;: \u0026quot;src/language/generated\u0026quot; } The actual change is simple: Triple the object in the languages list and fill in reasonable values. Like here:\n{ \u0026quot;projectName\u0026quot;: \u0026quot;MultipleLanguages\u0026quot;, \u0026quot;languages\u0026quot;: [{ \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-configuration\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages-configuration.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.me\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-configuration.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-configuration.monarch.ts\u0026quot; } }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-definition\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages-definition.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.who\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-definition.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-definition.monarch.ts\u0026quot; } }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-implementation\u0026quot;, \u0026quot;grammar\u0026quot;: \u0026quot;src/language/multiple-languages-implementation.langium\u0026quot;, \u0026quot;fileExtensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;textMate\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-implementation.tmLanguage.json\u0026quot; }, \u0026quot;monarch\u0026quot;: { \u0026quot;out\u0026quot;: \u0026quot;syntaxes/multiple-languages-implementation.monarch.ts\u0026quot; } }], \u0026quot;out\u0026quot;: \u0026quot;src/language/generated\u0026quot; } From now on you are able to run the Langium CLI using the NPM scripts (npm run langium:generate). It will generate one file for the abstract syntax tree (AST) containing all languages concepts (it is also a good idea to keep the names of these concepts disjoint).\nFor the next step you need to run the Langium generator once:\nnpm run langium:generate Language module file The module file describes how your language services are built. After adding two more languages, some important classes get generated - which need to be registered properly.\n Open the module file (/src/language/multiple-languages-module.ts).\n You will notice a wrong import (which is ok, we renamed it in the previous steps and derived new classes by code generation).\n Import the new generated modules instead. Replace this line:\nimport { MultipleLanguagesGeneratedModule, MultipleLanguagesGeneratedSharedModule } from './generated/module.js'; with the following:\nimport { MultiConfigurationGeneratedModule, MultiDefinitionGeneratedModule, MultiImplementationGeneratedModule, MultipleLanguagesGeneratedSharedModule } from './generated/module.js'; In the function createMultipleLanguagesServices you will notice an error line now, because we deleted the old class name in the previous step. The code there needs to basically be tripled. But before we do this, we need to define the new output type of createMultipleLanguagesServices. In the end this should lead to this definition:\nexport function createMultipleLanguagesServices(context: DefaultSharedModuleContext): { shared: LangiumSharedServices, Configuration: MultipleLanguagesServices, Definition: MultipleLanguagesServices, Implementation: MultipleLanguagesServices } { const shared = inject( createDefaultSharedModule(context), MultipleLanguagesGeneratedSharedModule ); const Configuration = inject( createDefaultModule({ shared }), MultiConfigurationGeneratedModule, MultipleLanguagesModule ); const Definition = inject( createDefaultModule({ shared }), MultiDefinitionGeneratedModule, MultipleLanguagesModule ); const Implementation = inject( createDefaultModule({ shared }), MultiImplementationGeneratedModule, MultipleLanguagesModule ); shared.ServiceRegistry.register(Configuration); shared.ServiceRegistry.register(Definition); shared.ServiceRegistry.register(Implementation); registerValidationChecks(Configuration); registerValidationChecks(Definition); registerValidationChecks(Implementation); return { shared, Configuration, Definition, Implementation }; } After this step, Langium is set up correctly. But if you try to build now, the compiler will throw you some errors, because the old concepts of the AST are not existing anymore.\n Be aware of the fact that we are using MultipleLanguagesModule in all three services, three independent services! If you want to avoid this (because of duplicated state etc.), you should put some work into creating instances for each service.\n Cleanup Let\u0026rsquo;s clean up the error lines. Here are some general hints:\n keep in mind, that you are dealing with three file types now, namely *.me, *.who and *.hello you can distinguish them very easily by selecting the right sub service from the result object of createMultipleLanguagesServices, which is either Configuration, Definition, or Implementation, but not shared all these services have a sub service with file extensions: [Configuration,Definition,...].LanguageMetaData.fileExtensions: string[] so, when you are obtaining any documents from the DocumentBuilder you can be sure that they are parsed by the matching language service to distinguish them on your own, use the AST functions for determining the root type, for example for the Configuration language use isConfigurationUnit(document.parseResult.value) VSCode extension If you have a VSCode extension, you need to touch two files: package.json and src/extension/main.ts.\nFile package.json In this file we define what services this extension will contribute to VSCode.\nBefore the change only one language and grammar was defined:\n//... \u0026quot;contributes\u0026quot;: { \u0026quot;languages\u0026quot;: [ { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages\u0026quot;, \u0026quot;multiple-languages\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; } ], \u0026quot;grammars\u0026quot;: [ { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages.tmLanguage.json\u0026quot; } ] }, //... After the change, we tripled the information. Be aware of that the language ids must match the ids from the Langium configuration. Also make sure that the paths to the syntax files and the language configuration are correct.\n For the language configuration for VSCode, we reused the old file three times. If you want to make a more precise configuration per language, you should also split this file. But let\u0026rsquo;s use the same for a moment and for simplicity.\n //... \u0026quot;contributes\u0026quot;: { \u0026quot;languages\u0026quot;: [ { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-configuration\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages Configuration\u0026quot;, \u0026quot;multiple-languages-configuration\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.me\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-definition\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages Definition\u0026quot;, \u0026quot;multiple-languages-definition\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.who\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; }, { \u0026quot;id\u0026quot;: \u0026quot;multiple-languages-implementation\u0026quot;, \u0026quot;aliases\u0026quot;: [ \u0026quot;Multiple Languages Implementation\u0026quot;, \u0026quot;multiple-languages-implementation\u0026quot; ], \u0026quot;extensions\u0026quot;: [\u0026quot;.hello\u0026quot;], \u0026quot;configuration\u0026quot;: \u0026quot;./language-configuration.json\u0026quot; } ], \u0026quot;grammars\u0026quot;: [ { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages-configuration\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages-configuration\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages-configuration.tmLanguage.json\u0026quot; }, { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages-definition\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages-definition\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages-definition.tmLanguage.json\u0026quot; }, { \u0026quot;language\u0026quot;: \u0026quot;multiple-languages-implementation\u0026quot;, \u0026quot;scopeName\u0026quot;: \u0026quot;source.multiple-languages-implementation\u0026quot;, \u0026quot;path\u0026quot;: \u0026quot;./syntaxes/multiple-languages-implementation.tmLanguage.json\u0026quot; } ] }, File src/extension/main.ts And here is the extension file before the change:\n// Options to control the language client const clientOptions: LanguageClientOptions = { documentSelector: [{ scheme: 'file', language: 'multiple-languages' }] }; After the change, it should look like this (the language IDs should be the same as they are in the Langium configuration):\n// Options to control the language client const clientOptions: LanguageClientOptions = { documentSelector: [ { scheme: 'file', language: 'multiple-languages-configuration' }, { scheme: 'file', language: 'multiple-languages-definition' }, { scheme: 'file', language: 'multiple-languages-implementation' } ] }; Test the extension Now everything should be executable. Do not forget to build!\nLet\u0026rsquo;s run the extension and create some files in our workspace:\nDefinition people.who person Markus person Michael person Frank Configuration thats.me I am Markus. Implementation greetings.hello Hello Markus! Hello Michael! Checklist You should be able now\u0026hellip;:\n to see proper syntax highlighting to trigger auto completion for keywords to jump to the definition by Cmd/Ctrl-clicking on a person\u0026rsquo;s name Add a validator (task) As promised, let\u0026rsquo;s add a simple validation rule, that you cannot greet yourself. Therefore we enter our name in the thats.me file like we did in the previous step.\nTry to include the following code to our validator. This is meant as task, try to find the missing pieces on your own :-).\ncheckNotGreetingYourself(greeting: Greeting, accept: ValidationAcceptor): void { const document = getDocument(greeting); const configFilePath = join(document.uri.fsPath, '..', 'thats.me'); const configDocument = this.documents.getOrCreateDocument(URI.file(configFilePath)); if (greeting.person.ref) { if (configDocument \u0026amp;\u0026amp; isConfigurationUnit(configDocument.parseResult.value)) { if(configDocument.parseResult.value.who.ref === greeting.person.ref) { accept('warning', 'You cannot greet yourself 🙄!', { node: greeting, property: 'person' }); } } } } After doing so, your name should display a warning, stating that you cannot greet yourself.\nTroubleshooting In this section we will list common mistakes.\n One prominent mistake is forgetting to build Langium and Typescript files, before running the extension.\n Since we are basically just copy-pasting given configuration, be aware of what you are pasting. Make sure that the code still makes sense after copying. You probably forgot to adapt the pasted code.\n If you encounter any problems, we are happy to help in our discussions page or our issue tracker.\n"},{"id":33,"href":"/playground/","title":"Playground","parent":"Langium","content":"import { addMonacoStyles, setupPlayground, share, overlay, getPlaygroundState, MonacoEditorLanguageClientWrapper } from \"./libs/worker/common.js\"; import { buildWorkerDefinition } from \"../libs/monaco-editor-workers/index.js\"; addMonacoStyles('monaco-styles-helper'); buildWorkerDefinition( \"../libs/monaco-editor-workers/workers\", new URL(\"\", window.location.href).href, false ); // on doc load addEventListener('load', function() { // get a handle to our various interactive buttons const copiedHint = document.getElementById('copiedHint'); const shareButton = document.getElementById('shareButton'); const grammarRoot = document.getElementById('grammar-root'); const contentRoot = document.getElementById('content-root'); // register a listener for the share button shareButton.onclick = () = { // retrieve the current playground state (grammar + content/program) const playgroundState = getPlaygroundState(); share(playgroundState.grammar, playgroundState.content); // update the display to indicate that the text has been shared shareButton.src = '/assets/checkmark.svg'; copiedHint.style.display = 'block'; // reset again after a second... setTimeout(() = { shareButton.src = '/assets/share.svg'; copiedHint.style.display = 'none'; }, 1000); }; const treeButton = document.getElementById('treeButton'); const grid = document.getElementById('grid'); const key = 'display-ast'; if(localStorage.getItem(key) === 'yes') { grid.classList.toggle('without-tree'); } treeButton.onclick = () = { const shown = !grid.classList.toggle('without-tree'); localStorage.setItem(key, shown ? 'yes' : 'no'); const resizeEvent = new Event('resize'); window.dispatchEvent(resizeEvent); }; const url = new URL(window.location.toString()); const grammar = url.searchParams.get('grammar'); const content = url.searchParams.get('content'); setupPlayground( grammarRoot, contentRoot, grammar, content, overlay ); }); "},{"id":34,"href":"/docs/recipes/","title":"Recipes","parent":"Documentation","content":"Where to go from here? Take your time to study the recipes within the navigation on the left. They are designed to help you with common tasks and challenges you might face when working with Langium. If you have any questions or suggestions, feel free to create an issue or start a discussion on the Github repository.\n"},{"id":35,"href":"/docs/reference/semantic-model/","title":"Semantic Model Inference","parent":"Reference","content":"When AST nodes are created during the parsing of a document, they are given a type. The language grammar dictates the shape of those types and how they might be related to each other. All types form the semantic model of your language. There are two ways by which Langium derives semantic model types from the grammar, by inference and by declaration.\nInference is the default behavior in Langium. During the generation of the semantic model types, Langium infers the possible types directly from the grammar rules. While this is a powerful approach for simple languages and prototypes, it is not recommended for more mature languages since minimal changes in the grammar can easily lead to breaking changes.\nTo minimize the chance of breaking changes, Langium introduces declared types where the semantic model types are explicitly defined by the user in the grammar via a TypeScript-like syntax.\nIn the following, we detail how grammar rules shape the semantic model via inference and declaration.\nInferred Types Inferred types result from letting Langium infer the types of the nodes from the grammar rules. Let\u0026rsquo;s have a look at how various rules shape these type definitions:\nParser Rules The simplest way to write a parser rule is as follows:\nX: name=ID; With this syntax, Langium will infer the type of the node to be generated when parsing the rule. By convention, the type of the node will be named after the name of the rule, resulting in this TypeScript interface in the semantic model:\ninterface X extends AstNode { name: string } It is also possible to control the naming of the interface by using the following syntax:\nX infers MyType: name=ID; resulting in the following interface in the semantic model:\ninterface MyType extends AstNode { name: string } Please note that an interface X is no longer present in the semantic model.\nIt is important to understand that the name of the parser rule and the name of the type it infers work on two separate abstraction levels. The name of the parser rule is used at the parsing level where types are ignored and only the parsing rule is considered, while the name of the type is used at the types level where both the type and the parser rule play a role. This means that the name of the type can be changed without affecting the parsing rules hierarchy, and that the name of the rule can be changed - if it explicitly infers or returns a given type - without affecting the semantic model.\nBy inferring types within the grammar, it is also possible to define several parser rules creating the same semantic model type. For example, the following grammar has two rules X and Y inferring a single semantic model type MyType:\nX infers MyType: name=ID; Y infers MyType: name=ID count=INT; This result in the creation of a single interface in the semantic model \u0026lsquo;merging\u0026rsquo; the two parser rules with non-common properties made optional:\ninterface MyType extends AstNode { count?: number name: string } Terminal Rules Terminal rules are linked to built-in types in the semantic model. They do not result in semantic model types on their own but determine the type of properties in semantic model types inferred from a parser rule:\nterminal INT returns number: /[0-9]+/; terminal ID returns string: /[a-zA-Z_][a-zA-Z0-9_]*/; X: name=ID count=INT; // generated interface interface X extends AstNode { name: string count: number } The property name is of type string because the terminal rule ID is linked to the built-in type string, and the property count is of type number because the terminal rule INT is linked to the built-in type number.\nData type rules Data type rules are similar to terminal rules in the sense that they determine the type of properties in semantic model types inferred from parser rules. However, they lead to the creation of type aliases for built-in types in the semantic model:\nQualifiedName returns string: ID '.' ID; X: name=QualifiedName; // generated types type QualifiedName = string; interface X extends AstNode { name: string } Assignments There are three available kinds of assignments in a parser rule:\n = for assigning a single value to a property, resulting in the property\u0026rsquo;s type to be derived from the right hand side of the assignment. += for assigning multiple values to a property, resulting in the property\u0026rsquo;s type to be an array of the right hand side of the assignment. ?= for assigning a boolean to a property, resulting in the property\u0026rsquo;s type to be a boolean. X: name=ID numbers+=INT (numbers+=INT)* isValid?='valid'?; // generated interface interface X extends AstNode { name: string numbers: Array\u0026lt;number\u0026gt; isValid: boolean } The right-hand side of an assignment can be any of the following:\n A terminal rule or a data type rule, which results in the type of the property to be a built-in type. A parser rule, which results in the type of the property to be the type of the parser rule. A cross-reference, which results in the type of the property to be a Reference to the type of the cross-reference. An alternative, which results in the type of the property to be a type union of all the types in the alternative. X: 'x' name=ID; Y: crossValue=[X:ID] alt=(INT | X | [X:ID]); // generated types interface X extends AstNode { name: string } interface Y extends AstNode { crossValue: Reference\u0026lt;X\u0026gt; alt: number | X | Reference\u0026lt;X\u0026gt; } Unassigned Rule Calls A parser rule does not necessarily need to have assignments. It may also contain only unassigned rule calls. These kind of rules can be used to change the types' hierarchy.\nX: A | B; A: 'A' name=ID; B: 'B' name=ID count=INT; // generated types type X = A | B; interface A extends AstNode { name: string } interface B extends AstNode { name: string count: number } Simple Actions Actions can be used to change the type of a node inside of a parser rule to another semantic model type. For example, they allow you to simplify parser rules which would have to be split into multiple rules.\nX: {infer A} 'A' name=ID | {infer B} 'B' name=ID count=INT; // is equivalent to: X: A | B; A: 'A' name=ID; B: 'B' name=ID count=INT; // generated types type X = A | B; interface A extends AstNode { name: string } interface B extends AstNode { name: string count: number } Assigned actions Actions can also be used to control the structure of the semantic model types. This is a more advanced topic, so we recommend getting familiar with the rest of the documentation before diving into this section.\nLet\u0026rsquo;s consider two different grammars derived from the Arithmetics example. These grammars are designed to parse a document containing a single definition comprised of a name and an expression assignment, with an expression being any amount of additions or a numerical value.\nThe first one does not use assigned actions:\nDefinition: 'def' name=ID ':' expr=Expression; Expression: Addition; Addition infers Expression: left=Value ('+' right=Expression)?; Primary infers Expression: '(' Expression ')' | {Literal} value=NUMBER; When parsing a document containing def x: (1 + 2) + 3, this is the shape of the semantic model node:\n graph TD; expr((expr)) -- left((left)) expr -- right((right)) left -- left_left((left)) left -- left_right((right)) right -- right_left((left)) left_left -- left_left_v{1} left_right -- left_right_{2} right_left -- right_left_v{3} We can see that the nested right -\u0026gt; left nodes in the tree are unnecessary and we would like to remove one level of nesting from the tree.\nThis can be done by refactoring the grammar and adding an assigned action:\nDefinition: 'def' name=ID ':' expr=Addition ';'; Expression: Addition; Addition infers Expression: Primary ({infer Addition.left=current} '+' right=Primary)*; Primary infers Expression: '(' Expression ')' | {Literal} value=NUMBER; Parsing the same document now leads to this semantic model:\ngraph TD; expr((expr)) -- left((left)) expr -- right((right)) left -- left_left((left)) left -- left_right((right)) right -- right_v{3} left_left -- left_left_v{1} left_right -- left_right_{2} While this is a fairly trivial example, adding more layers of expression types in your grammar massively degrades the quality of your syntax tree as each layer will add another empty right property to the tree. Assigned actions alleviate this issue completely.\nDeclared Types Because type inference takes into account every entity of a parser rule, even the smallest changes can update your inferred types. This can lead to unwanted changes in your semantic model and incorrect behavior of services that depend on it. Declared types are a means to minimize the risk of introducing breaking changes when modifying the grammar.\nIn most cases, especially for early language designs, letting the type inference take care of generating your types will be your best choice. As your language starts to mature, it may then be of interest to fix parts of your semantic model using declared types.\nWith that aside, declared types can be especially helpful for more mature and complex languages, where a stable semantic model is key and breaking changes introduced by inferred types can break your language services. Declared types allow the user to fix the type of their parser rules and rely on the power of validation errors to detect breaking changes.\nLet\u0026rsquo;s look at the example from the previous section:\nX infers MyType: name=ID; Y infers MyType: name=ID count=INT; // should be replaced by: interface MyType { name: string count?: number } X returns MyType: name=ID; Y returns MyType: name=ID count=INT; We now explicitly declare MyType directly in the grammar with the keyword interface. The parser rules X and Y creating nodes of type MyType need to explicitly declare the type of the node they create with the keyword returns.\nContrary to inferred types, all properties must be explicitly declared in order to be valid inside of a parser rule. The following syntax:\nZ returns MyType: name=ID age=INT; will show the following validation error A property 'age' is not expected because the declaration of MyType does not include the property age. In short, declared types add a layer of safety via validation to the grammar that prevents mismatches between the expected semantic model types and the shape of the parsed nodes.\nA declared type can also extend types, such as other declared types or types inferred from parser rules:\ninterface MyType { name: string } interface MyOtherType extends MyType { count: number } Y returns MyOtherType: name=ID count=INT; Explicitly declaring union types in the grammar is achieved with the keyword type:\ntype X = A | B; // generates: type X = A | B; Using returns always expects a reference to an already existing type. To create a new type for your rule, use the infers keyword or explicitly declare an interface.\nCross-references, Arrays, and Alternatives Declared types come with special syntax to declare cross-references, arrays, and alternatives:\ninterface A { reference: @B array: B[] alternative: B | C } interface B { name: string } interface C { name: string count: number } X returns A: reference=[B:ID] array+=Y (array+=Y)* alternative=(Y | Z); Y returns B: 'Y' name=ID; Z returns C: 'Z' name=ID count=INT; Actions Actions referring to a declared type have the following syntax:\ninterface A { name: string } interface B { name: string count: number } X: {A} 'A' name=ID | {B} 'B' name=ID count=INT; Note the absence of the keyword infer compared to actions which infer a type.\nReference Unions Trying to reference different types of elements can be an error prone process. Take a look at the following rule which tries to reference either a Function or a Variable:\nMemberCall: (element=[Function:ID] | element=[Variable:ID]); As both alternatives are only an ID from a parser perspective, this grammar is not decidable and the langium CLI script will throw an error during generation. Luckily, we can improve on this by adding a layer of indirection using an additional parser rule:\nNamedElement: Function | Variable; MemberCall: element=[NamedElement:ID]; This allows us to reference either Function or Variable using the common rule NamedElement. However, we have now introduced a rule which is never actually parsed, but only exists for the purpose of the type system to pick up on the correct target types of the reference. Using declared types, we are able to refactor this unused rule, making our grammar more resilient in the process:\n// Note the `type` prefix here type NamedElement = Function | Variable; MemberCall: element=[NamedElement:ID]; We can also use interfaces in place of union types with similar results:\ninterface NamedElement { name: string } // Infers an interface `Function` that extends `NamedElement` Function returns NamedElement: {infer Function} \u0026quot;function\u0026quot; name=ID ...; // This also picks up on the `Function` elements MemberCall: element=[NamedElement:ID]; "},{"id":36,"href":"/showcase/sql/","title":"SQL","parent":"Langium Showcase","content":""},{"id":37,"href":"/docs/introduction/playground/","title":"Try it out!","parent":"What is Langium?","content":""},{"id":38,"href":"/docs/learn/workflow/generate_ast/","title":"4. Generate the AST","parent":"Langium's workflow","content":"After defining the grammar, you can generate the abstract syntax tree (AST) of your language. The AST is a tree representation of the source code that can be used to analyze and transform the code. The AST definition is generated by the Langium CLI. Simply call the following command on your terminal:\nnpm run langium:generate This line will call langium generate on your Langium project. The Langium CLI will generate the files in the src/generated directory. It will create the following files (depending on your given Langium configuration):\n a grammar file: which contains your entire grammar definition in JSON format. a module file: which contains language-specific setup objects for the final module definition of your language. an ast file: which contains the definition of your AST. several syntax highlighting files: like for PrismJS, TextMate or Monarch. The syntax tree An AST of your language is now ready to be get parsed. One important concept in Langium are cross-references. With them you can reference other elements in your language. For example, you can reference a variable in a function call. The AST will contain a reference to the variable. This is useful for code analysis and transformation. Technologies like ANTLR or other parser-only generators do not support this feature. For them you are forced to resolve these references in-place everytime the developer is confronted with them.\nAfter these generation steps, cross-references are not resolved yet. This is done in the next step.\nExample Imagine you are using the Hello-World example from the Yeoman generator. For an input file like this you will get the following syntax tree from Langium during runtime:\nperson John person Jane Hello John! Hello Jane! graph TB Model--persons Model--greetings persons--P1[Person] P1 -- H1('person') P1 -- N1[name] N1 -- NL1('John') persons--P2[Person] P2 -- H2('person') P2 -- N2[name] N2 -- NL2('Jane') greetings--G1[Greeting] G1 -- KW1('hello') G1 -- PRef1[Ref] G1 -- EM1('!') PRef1 -- QM1{?} greetings--G2[Greeting] G2 -- KW2('hello') G2 -- PRef2[Ref] G2 -- EM2('!') PRef2 -- QM2{?} Mind the gaps (question marks) for the cross-references inside the greetings. This job has to be done by the developer. Fortunately Langium provides a default implementation for cross-reference resolution. You can also implement your own resolution strategy.\nHow to test the parser? You can test the parser by comparing the generated AST with the expected AST. Here is an example:\nimport { createHelloWorldServices } from \u0026quot;./your-project//hello-world-module.js\u0026quot;; import { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { Model } from \u0026quot;../../src/language/generated/ast.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); //act const document = await parse(` person John person Jane Hello John! Hello Jane! `); //assert const model = document.parseResult.value; expect(model.persons).toHaveLength(2); expect(model.persons[0].name).toBe(\u0026quot;John\u0026quot;); expect(model.persons[1].name).toBe(\u0026quot;Jane\u0026quot;); expect(model.greetings).toHaveLength(2); //be aware of the fact that the following checks will fail at this point, because the cross-references are not resolved yet expect(model.greetings[0].person.ref?.name).toBe(\u0026quot;John\u0026quot;); expect(model.greetings[1].person.ref?.name).toBe(\u0026quot;Jane\u0026quot;); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":39,"href":"/showcase/domainmodel/","title":"Domain Model","parent":"Langium Showcase","content":""},{"id":40,"href":"/docs/learn/workflow/resolve_cross_references/","title":"5. Resolve cross-references","parent":"Langium's workflow","content":"This step takes place after generating the AST. The AST definition was created and you are able to parse input files. But the AST is not complete yet. It contains cross-references that are not resolved. Cross-references are used to reference other elements in your language.\nProblem Let\u0026rsquo;s illustrate the problem using the Hello-World example from the Yeoman generator:\nperson John person Jane Hello John! Hello Jane! The following syntax tree is generated by the Langium parser during the runtime. Mind the gaps with the question marks. These are the missing pieces you want to fill out in this step.\n graph TB Model--persons Model--greetings persons--P1[Person] P1 -- H1('person') P1 -- N1[name] N1 -- NL1('John') persons--P2[Person] P2 -- H2('person') P2 -- N2[name] N2 -- NL2('Jane') greetings--G1[Greeting] G1 -- KW1('hello') G1 -- PRef1[Ref] PRef1 -- $refText -- RT1('John') G1 -- EM1('!') PRef1 -- QM1{?} greetings--G2[Greeting] G2 -- KW2('hello') G2 -- PRef2[Ref] PRef2 -- $refText -- RT2('Jane') G2 -- EM2('!') PRef2 -- QM2{?} You normally can achieve the cross-reference resolution by implementing a so-called scope provider and a scope computation. When setup correctly given syntax tree will change to this:\ngraph TB Model--persons Model--greetings persons--P1[Person] P1 -- H1('person') P1 -- N1[name] N1 -- NL1('John') persons--P2[Person] P2 -- H2('person') P2 -- N2[name] N2 -- NL2('Jane') greetings--G1[Greeting] G1 -- KW1('hello') G1 -- PRef1[Ref] PRef1 -- $refText -- RT1('John') G1 -- EM1('!') PRef1 -..- P1 greetings--G2[Greeting] G2 -- KW2('hello') G2 -- PRef2[Ref] PRef2 -- $refText -- RT2('Jane') G2 -- EM2('!') PRef2 -..- P2 Resolution of cross-references As already hinted, you can implement a scope provider and a scope computation. Fortunately, Langium comes with default implementations for both. But eventually as your language grows, you might want to implement your own strategy because the default is not sufficient. In the following sections the interpretation of the involved interfaces will be sketched.\nScope provider Terms The scope provider is responsible for providing a scope for a given cross-reference represented by the ReferenceInfo type.\nA scope is a collection of AST nodes that are represented by the AstNodeDescription type.\nThe description is like a (string) path through the AST of a document. It can be also seen as a tuple of document URI, JSON path, name and type of the AST node.\nA reference info contains the concrete AST reference (which points to nothing yet). The info also has the parent AST node (a so-called container) of the reference and the property name under which you can find the reference under its container. In the form of this tuple (container, property, reference) Langium visits all cross-references using the scope provider\u0026rsquo;s getScope method.\nexport interface ScopeProvider { getScope(context: ReferenceInfo): Scope; } export interface ReferenceInfo { reference: Reference container: AstNode property: string index?: number } export interface Scope { getElement(name: string): AstNodeDescription | undefined; getAllElements(): Stream\u0026lt;AstNodeDescription\u0026gt;; } Purpose So, what is the purpose of the scope provider? As mentioned above: it visits each cross-reference and tries to find the corresponding AST nodes over the entire workspace that can be a candidate for the cross-reference\u0026rsquo;s place. It is important to understand that we do not decide here which of these nodes is the perfect match! That decision is part of the so-called linker of the Langium architecture.\nWhether your cross-reference\u0026rsquo;s $refText contains the name Jane does not matter here. We need to provide all nodes that are possible at this position. So in the result, you would return Jane and John AST nodes - for both cross-references!\nThe background for this behavior is that this mechanism can be used for two things: the cross-reference resolution and the code completion. The code completion needs to know all possible candidates for a given cross-reference. The resolution of the cross-reference is done by the linker: Given a scope for a certain cross-reference, the linker decides which of the candidates is the right one - for example the first candidate with the same name.\nScope computation The scope computation is responsible for defining per document file\u0026hellip;\n which AST nodes are getting exported to the global scope. These nodes will be collected by the so-called index manager. which AST nodes (as descriptions) are available in the local scope of a certain AST node. This is meant as a cache computation for the scope provider. The index manager is keeping in mind the global symbols of your language. It can be used by the scope provider to find the right candidates for a cross-reference.\nexport interface ScopeComputation { computeExports(document: LangiumDocument, cancelToken?: CancellationToken): Promise\u0026lt;AstNodeDescription[]\u0026gt;; computeLocalScopes(document: LangiumDocument, cancelToken?: CancellationToken): Promise\u0026lt;PrecomputedScopes\u0026gt;; } So, while the scope computation is defining what symbols are globally exported (like using the export keyword in Typescript), the scope provider is the place to implement the import of these symbols using the index manager and the semantics of your import logic.\nCross-reference resolution from a high-level perspective The AST gets generated by the parser for each document in the workspace. The scope computation is called for each document in the workspace. All exported AST nodes are collected by the index manager. The scope computation is called for each document in the workspace, again. All local scopes get computed and attached to the document. The linker and the scope provider are called for each cross-reference in the workspace. The scope provider uses the index manager to find candidates for each cross-reference. The linker decides which candidate is the right one for each cross-reference. Example For the Hello-World example, you can implement a scope provider and a scope computation like this (keep in mind that this is a alternative solution to the default implementation of Langium, which already works for most cases):\nimport { ReferenceInfo, Scope, ScopeProvider, AstUtils, LangiumCoreServices, AstNodeDescriptionProvider, MapScope, EMPTY_SCOPE } from \u0026quot;langium\u0026quot;; import { isGreeting, isModel } from \u0026quot;./generated/ast.js\u0026quot;; export class HelloWorldScopeProvider implements ScopeProvider { private astNodeDescriptionProvider: AstNodeDescriptionProvider; constructor(services: LangiumCoreServices) { //get some helper services this.astNodeDescriptionProvider = services.workspace.AstNodeDescriptionProvider; } getScope(context: ReferenceInfo): Scope { //make sure which cross-reference you are handling right now if(isGreeting(context.container) \u0026amp;\u0026amp; context.property === 'person') { //Success! We are handling the cross-reference of a greeting to a person! //get the root node of the document const model = AstUtils.getContainerOfType(context.container, isModel)!; //select all persons from this document const persons = model.persons; //transform them into node descriptions const descriptions = persons.map(p =\u0026gt; this.astNodeDescriptionProvider.createDescription(p, p.name)); //create the scope return new MapScope(descriptions); } return EMPTY_SCOPE; } } Please make sure to override the default scope provider in your language module file like this:\n//... export const HelloWorldModule: Module\u0026lt;HelloWorldServices, PartialLangiumServices \u0026amp; HelloWorldAddedServices\u0026gt; = { //validation: ... references: { ScopeProvider: (services) =\u0026gt; new HelloWorldScopeProvider(services) } }; //... How to test the linking? You can test the linking by comparing the resolved references with the expected references. Here is the example from the last step.\nimport { createHelloWorldServices } from \u0026quot;./your-project//hello-world-module.js\u0026quot;; import { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { Model } from \u0026quot;../../src/language/generated/ast.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); //act const document = await parse(` person John person Jane Hello John! Hello Jane! `); //assert const model = document.parseResult.value; expect(model.persons).toHaveLength(2); expect(model.greetings).toHaveLength(2); expect(model.greetings[0].person.ref).toBe(model.persons[0]); expect(model.greetings[1].person.ref).toBe(model.persons[1]); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":41,"href":"/docs/learn/workflow/create_validations/","title":"6. Create validations","parent":"Langium's workflow","content":"After resolving the cross-references, you can assume that the syntax tree is complete. Now you can start with the validation of the input files. The validation process is a crucial part of the language engineering workflow. The parser ensures the syntactic correctness of the input files. The validation process ensures the semantic correctness of the input files.\nExample Let\u0026rsquo;s consider the Hello-World example from the Yeoman generator. One semantic of this language could be that each declared person must be greeted at most once. To be clear, the following input file is invalid, we are greeting John twice:\nperson John person Jane Hello John! Hello Jane! Hello John! //should throw: You can great each person at most once! This is the 2nd greeting to John. Implementation To accomplish this, you need to implement a validator. The validator is a visitor that traverses a certain part of the syntax tree and checks for semantic errors. The following code snippet shows how you can implement a validator for the Hello-World example. Mind that the Hello-World already has a validator, you just need to add the following one.\nimport type { ValidationAcceptor, ValidationChecks } from 'langium'; import type { HelloWorldAstType, Model, Person } from './generated/ast.js'; import type { HelloWorldServices } from './hello-world-module.js'; export function registerValidationChecks(services: HelloWorldServices) { const registry = services.validation.ValidationRegistry; const validator = services.validation.HelloWorldValidator; const checks: ValidationChecks\u0026lt;HelloWorldAstType\u0026gt; = { //registers a validator for all Model AST nodes Model: validator.checkPersonAreGreetedAtMostOnce }; registry.register(checks, validator); } export class HelloWorldValidator { checkPersonAreGreetedAtMostOnce(model: Model, accept: ValidationAcceptor): void { //create a multi-counter variable using a map const counts = new Map\u0026lt;Person, number\u0026gt;(); //initialize the counter for each person to zero model.persons.forEach(p =\u0026gt; counts.set(p, 0)); //iterate over all greetings and count the number of greetings for each person model.greetings.forEach(g =\u0026gt; { const person = g.person.ref; //Attention! if the linker was unsucessful, person is undefined if(person) { //set the new value of the counter const newValue = counts.get(person)!+1; counts.set(person, newValue); //if the counter is greater than 1, create a helpful error if(newValue \u0026gt; 1) { accept('error', `You can great each person at most once! This is the ${newValue}${newValue==2?'nd':'th'} greeting to ${person.name}.`, { node: g }); } } }); } } How to test the validator? To test the validator, we can simply use the parseHelper again. The following code snippet shows how you can test the validator:\nimport { createHelloWorldServices } from \u0026quot;./your-project//hello-world-module.js\u0026quot;; import { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { Model } from \u0026quot;../../src/language/generated/ast.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); //act const document = await parse(` person John person Jane Hello John! Hello Jane! Hello John! `, { validation: true }); //enable validation, otherwise the validator will not be called! //assert expect(document.diagnostics).toHaveLength(1); expect(document.diagnostics![0].message).toBe('You can great each person at most once! This is the 2nd greeting to John.'); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":42,"href":"/docs/learn/workflow/generate_everything/","title":"7. Generate artifacts","parent":"Langium's workflow","content":"The syntax was ensured. The semantics were checked. Your workspace is free of errors. Now the AST is a valid representation of your input file written in your language. It is time to generate some cool stuff!\nDepending on your domain and on your requirements there are different ways to generate artifacts from your AST.\nHow to write the generator? The simplest way is to generate text into a string. Let\u0026rsquo;s print out every greeting from the hello-world example.\nimport type { Model } from '../language/generated/ast.js'; export function generateJavaScript(model: Model): string { return `\u0026quot;use strict\u0026quot;; ${model.greetings .map(greeting =\u0026gt; `console.log('Hello, ${greeting.person.ref?.name}!');`) .join(\u0026quot;\\n\u0026quot;) }`; } How to test the generator? You can test the generator by comparing the generated text with the expected text. Here is an example.\nimport { EmptyFileSystem } from \u0026quot;langium\u0026quot;; import { parseHelper } from \u0026quot;langium/test\u0026quot;; import { createHelloWorldServices } from \u0026quot;./your-project/hello-world-module.js\u0026quot;; import { Model } from \u0026quot;./your-project/generated/ast.js\u0026quot;; import { generateJavaScript } from \u0026quot;./your-project/generator.js\u0026quot;; //arrange const services = createHelloWorldServices(EmptyFileSystem); const parse = parseHelper\u0026lt;Model\u0026gt;(services.HelloWorld); const document = await parse(` person Langium Hello Langium! `, {validation: true}); expect(document.parseResult.lexerErrors).toHaveLength(0); expect(document.parseResult.parserErrors).toHaveLength(0); expect(document.diagnostics ?? []).toHaveLength(0); //act const javaScript = generateJavaScript(document.parseResult.value); //assert expect(javaScript).toBe(`\u0026quot;use strict\u0026quot;; console.log('Hello, Langium!');`); The expect function can be any assertion library you like. The Hello world example uses Vitest.\n"},{"id":43,"href":"/docs/recipes/code-bundling/","title":"Code Bundling","parent":"Recipes","content":"When you first create a Langium project using the Yeoman generator, it will only contain a plain TypeScript configuration, without any additional build processes. However, if you want to make your language available for consumption in a non-development context, you\u0026rsquo;ll want to create a bundle. It is not absolutely necessary in a Node.js context, since you can always resolve local node_modules but it\u0026rsquo;s still recommended for vscode extensions. It improves performance and decreases file size by minifying your code and only including what you actually need.\nWe generally recommend using esbuild to bundle Langium based language servers and extensions. To install it, simply run:\nnpm i --save-dev esbuild You can see a minimal configuration file below that bundles both your language server and your extension.\n//@ts-check import * as esbuild from 'esbuild'; const watch = process.argv.includes('--watch'); const minify = process.argv.includes('--minify'); const ctx = await esbuild.context({ entryPoints: ['src/extension.ts', 'src/language/main.ts'], outdir: 'out', bundle: true, target: \u0026quot;es6\u0026quot;, loader: { '.ts': 'ts' }, external: ['vscode'], // the vscode-module is created on-the-fly and must be excluded. platform: 'node', // VSCode extensions run in a node process sourcemap: !minify, minify }); if (watch) { await ctx.watch(); } else { await ctx.rebuild(); ctx.dispose(); } Store it in a module JavaScript file (.mjs) and create a corresponding script in your package.json file:\n\u0026quot;scripts\u0026quot;: { \u0026quot;build\u0026quot;: \u0026quot;node ./esbuild.mjs\u0026quot; } If you want to use a Langium language server in the browser, you can get away with an even smaller setup with the following script:\n\u0026quot;scripts\u0026quot;: { \u0026quot;build:worker\u0026quot;: \u0026quot;esbuild ./src/main.ts --bundle --format=iife --outfile=./public/languageServerWorker.js\u0026quot; } If you\u0026rsquo;re more inclined to use webpack, a configuration for an extension bundler can be seen below:\nconst path = require('path'); const commonConfig = { target: 'node', mode: 'none', devtool: 'nosources-source-map', externals: { vscode: 'commonjs vscode' // the vscode-module is created on-the-fly and must be excluded }, resolve: { extensions: ['.ts', '.js'] }, module: { rules: [ { test: /\\.js$/, enforce: 'pre', loader: 'source-map-loader', exclude: /vscode/ }, { test: /\\.ts$/, exclude: /node_modules/, use: [ { loader: 'ts-loader' } ] } ] } } const lspConfig = { ...commonConfig, entry: './src/language/main.ts', // the entry point of the language server output: { path: path.resolve(__dirname, 'out', 'language'), filename: 'main.js', libraryTarget: 'commonjs2', devtoolModuleFilenameTemplate: '../../[resource-path]', clean: true } }; const vscodeConfig = { ...commonConfig, entry: './src/extension.ts', // the entry point of this extension output: { path: path.resolve(__dirname, 'out'), filename: 'extension.js', libraryTarget: 'commonjs2', devtoolModuleFilenameTemplate: '../[resource-path]' } }; module.exports = [lspConfig, vscodeConfig]; "},{"id":44,"href":"/","title":"Langium","parent":"","content":" Built to bring language engineering to the next level _ Langium is an open source language engineering tool with first-class support for the Language Server Protocol, written in TypeScript and running in Node.js. This future-proof technology stack enables domain-specific languages\nin VS Code, Eclipse Theia, web applications, and more. Try it! Learn Why Langium? _ TypeScript integration Langium generates a typed abstract syntax tree (AST) definition that perfectly fits your grammar and provides utility functions to help you navigate and process the AST. _ Quality based on experience Langium was developed on the basis of years of practical use of Xtext, which is an integral part of numerous projects and products worldwide. We apply this experience to push language engineering to a new level. _ Low barrier to entry The main goal of Langium is to lower the barrier of creating a DSL or low-code platform. We achieve this by providing a special DSL that describes the syntax and structure of your language: the grammar language. _ Your language, everywhere Built exclusively on web technologies, Langium is not only available for Node.js based environments but works just as well in your browser. When packaged as a language server, you can connect it to most modern IDEs. _ Lean by default, customizable by design Exploiting the power of the Language Server Protocol, Langium provides useful default implementations for most features. If you are in need of something special, you can override the defaults with your custom implementation. _ Versatile use You can easily package a Langium-based DSL as a command line interface (CLI) to create a rich set of interconnected tools: validator, interpreter, code generators, service adapters, etc. Features _ Simple and direct integration .... with the VS Code extension API _ Well-known technology stack .... implemented in TypeScript, runs in Node.js _ Proven quality on a next level .... with a grammar declaration language similar to Xtext _ Declarative approach .... derives a parser and abstract syntax tree from a grammar declaration _ High performance ... by using Chevrotain\u0026mdash;the blazing fast parser library\u0026mdash;under the hood _ Scale it .... with high out-of-the-box functionality and high extensibility Langium vs. Xtext Despite its age, Xtext is still an excellent basis for building languages and related tools with a Java technology stack. In recent years, however, the VS Code extension API has become increasingly relevant, not only for VS Code itself, but also for other tools that support this format, such as Eclipse Theia. This is why Langium has been created. It enables language engineering in TypeScript, the same technology used for VS code extensions. The differences at a glance: _ Langium is clear Building a tool that uses an Xtext-based language server with VS Code or Theia means creating a hybrid technology stack where some parts are implemented in Java and others in TypeScript. Developing and maintaining such a mixed code base is more challenging for the engineers involved, and long-term maintenance is more difficult compared to Langium's coherent technology stack. _ Langium is simple Xtext is heavily based on the Eclipse Modeling Framework (EMF). This can be an advantage if you want to integrate with other Eclipse modeling tools (e.g. Sirius), but it can also be a burden due to its complexity. Langium uses the simplest possible solution to describe an AST (i.e. the parsed contents of a text document): TypeScript interfaces. By relying on the built-in language constructs, we avoid the additional abstraction layers and steep learning curve of a modeling framework. In short : Langium wants to keep the concepts that have made Xtext successful, but lift them onto another platform. "},{"id":45,"href":"/docs/","title":"Documentation","parent":"Langium","content":""},{"id":46,"href":"/docs/learn/workflow/","title":"Langium's workflow","parent":"Learn Langium","content":"Langium\u0026rsquo;s workflow can be expressed as a flow chart diagram, which boils down to the following steps in the diagram. Be aware of the fact that the possibilities go beyond this simple workflow. For more advanced topics, you can find answers in the recipes.\n flowchart TD A([\"1. Install Yeoman\"]); B([\"2. Scaffold a Langium project\"]); C([\"3. Write the grammar\"]); D([\"4. Generate the AST\"]); E([\"5. Resolve cross-references\"]); F([\"6. Create validations\"]); G([\"7. Generate artifacts\"]); H([\"Find advanced topics\"]); A -- B -- C -- D -- E -- F -- G ~~~ H; G -- for each additional\\ngrammar change -- C; click A \"/docs/learn/workflow/install\" click B \"/docs/learn/workflow/scaffold\" click C \"/docs/learn/workflow/write_grammar\" click D \"/docs/learn/workflow/generate_ast\" click E \"/docs/learn/workflow/resolve_cross_references\" click F \"/docs/learn/workflow/create_validations\" click G \"/docs/learn/workflow/generate_everything\" click H \"/docs/recipes\" Explanation This is the workflow we recommend for developing a language with Langium. It is a step-by-step guide that will help you to get started with Langium and to understand the basics of language development.\nThis simple introduction can be seen as three main parts:\n setting up your project environment (1.+2.): this is only done once specifying the language features (3.-7.): this cycle you need to go through for each grammar change everything advanced (8.): The limit of the common workflow is reached here. For specific questions you can find answers in the recipes. While the first part is straight-forward, the last part is about advanced topics that differ from project to project. The middle part will be explained briefly in the following section.\nInitial setup 1. Install Yeoman This step ensures that you start a Langium project with the Yeoman generator. Yeoman is a scaffolding tool that helps you to start a new project with a predefined structure.\n2. Scaffold a Langium project After installing Yeoman, you can scaffold a new Langium project.\nCore workflow 3. Write the grammar The first step in the core workflow starts with the grammar. You will have some language feature in mind that you want to implement. The grammar is used to nail down the syntax of your features. You can use our Langium VS Code extension to get syntax highlighting and code completion for .langium files. If your grammar is free of errors, you can generate the files for the abstract syntax tree (AST).\n4. Generate the AST The AST is the backbone of your language. It is used to represent the structure of your language elements. The AST is generated from the grammar. One important part of the AST are the cross-references. They are used to resolve references between language elements. If you have cross-references in your language, you need to resolve them, after this step. The actual generation is done by a call of the Langium CLI.\n5. Resolve cross-references The cross-references are used to resolve references between language elements (between different sub trees of one file or even elements of other files(!)). This step is quite important, because it is the basis for the next steps. You can also see it like this: Step 4 will generate an AST with gaps, this fifth step will fill these gaps.\n6. Create validations From here we have a fully utilized AST. Now every input file that matches the syntax will be accepted. But we want to have more control over the input. We want to check if the input is semantically correct. This is done by creating validations. They are used to check the input against a set of rules. If the input does not match the rules, an error will be thrown.\n7. Generate artifacts Now you have a fully working language. You can generate whatever you want from the input. This can be code, documentation, or anything else. You can use the AST to traverse the input and generate the output.\nFind advanced topics Everything that is out of the scope of the common workflow is covered in the recipes. Here you can find answers to specific questions or problems that you might encounter during the development of your language.\n"},{"id":47,"href":"/docs/learn/","title":"Learn Langium","parent":"Documentation","content":""},{"id":48,"href":"/showcase/openapi/","title":"OpenAPI SL","parent":"Langium Showcase","content":""},{"id":49,"href":"/tags/","title":"Tags","parent":"Langium","content":""},{"id":50,"href":"/docs/learn/minilogo/writing_a_grammar/","title":"Writing a Grammar","parent":"Minilogo tutorial","content":" Planning Sketching the Grammar Adding Commands Adding Expressions Adding Terminals In this tutorial we will be talking about writing a grammar for your language in Langium. As a motivating example, we\u0026rsquo;ll be describing how to write a grammar for the MiniLogo language. If you\u0026rsquo;re not familiar with MiniLogo, it\u0026rsquo;s a smaller implementation of the Logo programming language. Logo itself is a lot like Turtle from Python. Ultimately, we\u0026rsquo;ll be using MiniLogo to express drawing instructions that can be used to draw on a canvas.\nWe\u0026rsquo;ve already written an implementation of MiniLogo on Github using Langium. This tutorial will be following along with this project, by walking through the grammar implementation step by step. Later tutorials will also follow along with MiniLogo to create an easy to follow series.\nPlanning Before we get started writing the grammar, we\u0026rsquo;ll want to first identify a couple important aspects of our language. Namely, these are:\n The Semantic Domain The Concrete Syntax The Semantic Domain describes the types of values that will be produced by evaluating our language. In the case of MiniLogo our semantic domain is going to have a single part, an updated drawing state that contains information on:\n position whether we\u0026rsquo;re drawing or not color of the drawing stroke We\u0026rsquo;ll also be producing values and updating an environment as well, which are important to keep in mind.\nBasically, a MiniLogo program can be considered equivalent to a series of transformations on some drawing context. This goal for MiniLogo will guide our design throughout these tutorials.\nIn addition, we\u0026rsquo;ll want to get an idea of what our concrete syntax will be. This step can be done on paper if you like, but the overall goal is to get a feel for how you want the language to look. Your choice of concrete syntax will also drive your grammar\u0026rsquo;s design. If your design is chosen well, it can simplify the way your grammar is constructed. If your syntax is complex, the grammar may also be complex as well. Not only this, but it\u0026rsquo;s also important to try and strike a balance between syntax that is special to your language, and syntax that is at least somewhat shared with other languages. The more unfamiliar the language appears, the more likely your users will struggle trying to pick it up.\nIn our case, we\u0026rsquo;re going to use a C-like concrete syntax. This will make it easy to understand the structure of our programs for most users. This is also chosen because it allows us to use curly braces to delimit blocks of code, which is quite easy to implement in Langium. You could also go for a Python style language, where whitespace has significance in determining which block some code belongs to. Unfortunately, this is not as easy to do out of the box with Langium, due to it ignoring whitespace by default, but it can be configured to work for such languages.\nSketching the Grammar Now that we have an idea of our semantics and our concrete syntax, we can then start writing out a grammar. Conveniently, MiniLogo already has a grammar and concrete syntax described, and that in turn is based off of the Logo programming language. MiniLogo itself was designed by Eric Walkingshaw at Oregon State University, and was used to teach students. It\u0026rsquo;s not something that we\u0026rsquo;ve created, but rather something that we found to be an ideal demonstration of Langium\u0026rsquo;s capabilities, while also remaining friendly for newcomers.\nAs an aside, our version of MiniLogo will be an approximation of Dr. Walkingshaw\u0026rsquo;s version. We won\u0026rsquo;t adhere to it completely, and we won\u0026rsquo;t be incorporating some elements, such as variable declarations.\nTo get started sketching the grammar we\u0026rsquo;ll be using the Hello World example from the yeoman generator. You can read about how to get this setup in the learning section of our docs. We\u0026rsquo;ll be working with a fresh from the generator using only the defaults, and building up from that. We\u0026rsquo;ll begin by modifying the default grammar file, and updating it to work for MiniLogo. You can find this file under src/language/hello-world.langium in your new project. If you used a name other than the default, the file will still be there, but using your custom name instead.\nWe\u0026rsquo;ll be overriding the existing langium grammar file completely, so delete the old contents before we begin.\nThe first line that we\u0026rsquo;ll then add is the declaration of our grammar.\ngrammar MiniLogo This simply describes the name of the grammar that will be proceeding, and is required.\nNext, we\u0026rsquo;ll need to describe an entry rule. This will be a parser rule that must be matched first when recognizing a MiniLogo program. This rule is particularly special, because it will become the root of the resulting abstract syntax tree, which captures the essential structure of our program. For MiniLogo, our entry rule Will be Model. You could also make it Program, but whatever you choose it should capture the same notion. Regardless of your choice, this rule should match any number of Statements and/or Definitions to follow the MiniLogo specification.\nentry Model: (stmts+=Stmt | defs+=Def)*; Each instance of a statement will be stored under the stmts property as an element of an Array. The same will be done for Definitions using defs as well. Note the trailing * after the grouping, which means technically a program containing nothing is also a valid MiniLogo program.\nTo iterate on this a little bit further we\u0026rsquo;ll need to describe what a Statement (Stmt) and a Definition (Def) are in the context of MiniLogo.\nFirst, let\u0026rsquo;s talk about Definitions. A definition corresponds to:\n a name a list of parameters a block of statements And we want definitions to look like so in our concrete syntax:\ndef myDef() { ... } ... def anotherDef(x,y,z) { ... } We can recognize this concrete syntax, and capture the relevant information for our AST, with the following rule:\nDef: 'def' name=ID '(' (params+=Param (',' params+=Param)*)? ')' Block; As an additional note, much like regular expressions we use modifiers in our grammar to indicate that definitions can take any number of comma separated parameters.\nYou may be wondering what Block is as well. Block corresponds to a rule fragment, which is akin to a reusable rule body. It\u0026rsquo;s not a rule itself, but an reusable piece that can be reused to complete rules. It\u0026rsquo;s particularly handy when you find yourself writing the same pattern repeatedly, and want to factor it out.\nfragment Block: '{' body+=Stmt* '}'; Then we have Statements, which consist of Commands or Macros.\nStmt: Cmd | Macro; A Command describes an action that transforms the drawing state (which connects to our semantic domain from before). The commands in MiniLogo can be expressed like so:\nCmd: Pen | Move | Color | For; Where each command is also a separate rule:\n Pen: Corresponds to a command that turns on/off drawing Move: Updates the position of the pen (relatively) Color: Sets the stroke color of what is drawn For: A standard for loop control flow These commands describe the essential drawing instructions that we will be representing. We\u0026rsquo;ll go over those in a moment.\nA statement can also be a Macro. A Macro has 2 distinct parts:\n a reference to a Definition (more on this shortly, think of it like a \u0026lsquo;function\u0026rsquo; for now) a list of arguments to apply this definition to In our concrete syntax, we want macros to look like this:\nmyMacro() ... anotherMacro(1, 2, 3 * 3) We can encode this in MiniLogo like so:\nMacro: def=[Def:ID] '(' (args+=Expr (',' args+=Expr)*)? ')'; In this case def will be a Cross Reference to an existing Definition. This is a special syntax that says def will be assigned to a Definition object at runtime identified by an ID terminal token. Although we haven\u0026rsquo;t introduced this terminal yet, it\u0026rsquo;s a simple rule that captures literal strings as tokens. It\u0026rsquo;s also important to note that cross references implicitly utilize the name property to hookup the cross reference to the target object.\nWe also want to add the notion of a Parameter, which is quite simple to write in:\nParam: name=ID; As you may have guessed, by using the name property for a parameter, we\u0026rsquo;re allowing Langium to automatically setup cross references for parameters as well.\nAdding Commands For the commands, we\u0026rsquo;ll go through each one, and show examples of the concrete syntax we\u0026rsquo;re trying to capture:\nPen needs to have two modes, up and down. So it should capture syntax like this:\npen(up) ... pen(down) We can express this with the following parser rule.\nPen: 'pen' '(' mode=('up' | 'down') ')'; Move commands will take a pair of expressions, corresponding to the x and y components, and can look like so:\nmove(1,5) ... move(x * 10, y * 10) We haven\u0026rsquo;t defined it yet, but we can use an Expr rule to represent where our expressions will go, and capture this command like this:\nMove: 'move' '(' ex=Expr ',' ey=Expr ')'; We\u0026rsquo;ll define expressions shortly.\nSimple for loops can be defined too, which should look like this:\nfor x = 0 to 10 { ... } Again, we don\u0026rsquo;t have Expr defined yet, but we can still use it here. Also, since we have a block of statements, we can reuse that Block fragment that was defined earlier.\nFor: 'for' var=Param '=' e1=Expr 'to' e2=Expr Block; Color commands are the last one to add, and they\u0026rsquo;ll change the stroke color in a few ways. The first is by setting the RGB components as integers directly:\ncolor(128,64,255) The second is by passing in the name of a stroke color:\ncolor(blue) The last is by passing a hexadecimal value:\ncolor(#66CCFF) ... color(#6cf) The corresponding rule for this syntax is a special case where we have 3 different overloaded forms of the same command. To capture all of these forms, we can use two different sets of properties:\n r,g,b values for each color a single color value that can be either an ID or HEX We can encode this like so:\nColor: 'color' '(' ((r = Expr ',' g=Expr ',' b=Expr) | color=ID | color=HEX) ')'; What\u0026rsquo;s interesting here is that the color \u0026amp; r,g,b properties are both optional. Since in either case only one or the other will be defined. With the two forms, this is enough information to quickly determine what kind of color command we have, and to handle it correctly later on.\nAdding Expressions Now we\u0026rsquo;re at the core of our language, Expressions. In MiniLogo we want to be able to express not only literal values, but also references and arithmetic operations such as addition, subtraction, multiplication, and division. When implementing expressions, we need to keep in mind that Langium is based off of Chevrotain, which produces top-down parsers. This means we have to watch out for cases that lead to left-recursion. In order to avoid this, we need to be careful not to define a rule with itself on the left-hand side. For example, something like Expr: e1=Expr ... would not work, because the parser would infinitely try to parse another expression forever.\nHowever, we can work around this. We can introduce expressions and avoid left-recursion by writing them from the bottom up in terms of order of operations. We\u0026rsquo;ll start with Add (which also includes subtraction):\nExpr: Add; Then writing a rule to handle the addition (and subtraction) case.\nAdd infers Expr: Mult ({infer BinExpr.e1=current} op=('+'|'-') e2=Mult)*; To explain a bit, the Add rule introduces:\n a parser rule that produces an Expr instance (that\u0026rsquo;s what the infers is doing here) starts by recognizing a Mult instance then if there\u0026rsquo;s a binary operator to parse rewrite this parsed object into a BinExpr that will extend Expr (that\u0026rsquo;s what the second {infer ...} is doing) also capture the first Mult under the e1 property (that\u0026rsquo;s what the current keyword refers to) capture the operand +/- capture the following Mult instance (the right hand side of our binary expression) else simply returns the result of Mult (the case where we don\u0026rsquo;t have a binary expression) We can then repeat this pattern with the Mult rule:\nMult infers Expr: PrimExpr ({infer BinExpr.e1=current} op=('*'|'/') e2=PrimExpr)*; Lastly we can then introduce Primary expressions, or PrimExpr. This rule will match all the primitive cases, such as literals, references, groupings, and negation.\nPrimExpr: Lit | Ref | Group | NegExpr; // literal int Lit: val=INT; // cross-reference to a parameter Ref: val=[Param:ID]; // grouped expression with parentheses Group: '(' ge=Expr ')'; // negated expression NegExpr: '-' ne=Expr; By writing our parser rules first for Addition \u0026amp; Subtraction, and then later for Multiplication and Division, we can construct an abstract syntax text tree that will correctly preserve order of operations.\nAs a note, we could also write these rules without using actions to rewrite our parse tree. When we\u0026rsquo;re talking about actions, we\u0026rsquo;re talking about those cases of {infer ...}. However, then we\u0026rsquo;ll get nodes like Add and Mult, instead of Expr and BinaryExpr. This is a tradeoff that is a bit tough to grasp at first in the grammar, but translates to a more sensible AST to work on later. This is especially helpful when we get to generation.\nAdding Terminals Now that we\u0026rsquo;re almost done with our grammar, we need to add in the terminal rules. Conveniently, the body of a terminal rule can be defined as a Javascript regular expression; sharing the same syntax. This makes it very clear to determine what our terminals should recognize.\n// recognize a hexadecimal sequence, used to recognize colors for the 'Color' command terminal HEX returns string: /#(\\d|[a-fA-F])+/; // recognize an identifier terminal ID returns string: /[_a-zA-Z][\\w_]*/; // recognize an Integer (but represented via a 'number' type) terminal INT returns number: /-?[0-9]+/; Then, lastly, we want to add hidden terminals. These will describe tokens that we want to parse and discard while parsing any input. Since we\u0026rsquo;re adding whitespace \u0026amp; comments as hidden terminals, it\u0026rsquo;s the same as saying we do not care about these tokens while parsing, but we do recognize that they are tokens; they just don\u0026rsquo;t play a role in capturing the structure of our language.\nhidden terminal WS: /\\s+/; hidden terminal ML_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//; hidden terminal SL_COMMENT: /\\/\\/[^\\n\\r]*/; And that\u0026rsquo;s it, we\u0026rsquo;re all set writing up the grammar for MiniLogo. To verify that we correctly implemented the grammar with no problems, we can run the following command in the project root:\nnpm run langium:generate The generation should finish successfully, indicating that our grammar doesn\u0026rsquo;t have any errors in it. In some cases, you may get warnings \u0026ndash; such as from unreachable rules in your grammar \u0026ndash; but these won\u0026rsquo;t prevent the generation from completing successfully. Also, when we\u0026rsquo;re referring to the generation, we\u0026rsquo;re talking about the construction of the following from your grammar:\n a semantic model (that ASTs can be mapped onto) a parser that recognizes our language With that, we have the beginnings of our very own language! Hopefully this gives a good idea of how to express a grammar in Langium, particularly with consideration to your concrete syntax \u0026amp; semantic domain. You can also consider the ways we can express cases that are left-recursive, like expressions, in an alternative fashion. Overall, our grammar should now be ready for the next step of validation in the following tutorial.\n"}] \ No newline at end of file diff --git a/pr-previews/pr-246/sitemap.xml b/pr-previews/pr-246/sitemap.xml index 738c8a9f..327b71c4 100644 --- a/pr-previews/pr-246/sitemap.xml +++ b/pr-previews/pr-246/sitemap.xml @@ -1 +1 @@ -/docs/introduction/2024-08-22T17:26:20+03:00/docs/learn/minilogo/validation/2024-08-22T17:26:20+03:00/docs/learn/minilogo/customizing_cli/2024-08-22T17:26:20+03:00/docs/learn/minilogo/generation/2024-08-22T17:26:20+03:00/docs/learn/minilogo/building_an_extension/2024-08-22T17:26:20+03:00/docs/learn/minilogo/langium_and_monaco/2024-08-22T17:26:20+03:00/docs/learn/minilogo/generation_in_the_web/2024-08-22T17:26:20+03:00/docs/reference/glossary/2024-08-22T17:26:20+03:00/docs/recipes/lexing/2024-08-22T17:26:20+03:00/docs/recipes/lexing/case-insensitive-languages/2024-08-22T17:26:20+03:00/docs/reference/grammar-language/2024-08-22T17:26:20+03:00/docs/recipes/scoping/qualified-name/2024-08-22T17:26:20+03:00/docs/recipes/scoping/2024-08-22T17:26:20+03:00/showcase/statemachine/2024-08-22T17:26:20+03:00/docs/learn/workflow/install/2024-08-22T17:26:20+03:00/docs/recipes/builtin-library/2024-08-22T17:26:20+03:00/docs/recipes/scoping/class-member/2024-08-22T17:26:20+03:00/docs/reference/configuration-services/2024-08-22T17:26:20+03:00/docs/features/2024-08-22T17:26:20+03:00/docs/learn/minilogo/2024-08-22T17:26:20+03:00/docs/learn/workflow/scaffold/2024-08-22T17:26:20+03:00/showcase/arithmetics/2024-08-22T17:26:20+03:00/docs/reference/document-lifecycle/2024-08-22T17:26:20+03:00/docs/recipes/scoping/file-based/2024-08-22T17:26:20+03:00/docs/recipes/formatting/2024-08-22T17:26:20+03:00/docs/recipes/lexing/indentation-sensitive-languages/2024-08-22T17:26:20+03:00/docs/recipes/keywords-as-identifiers/2024-08-22T17:26:20+03:00/showcase/2024-08-22T17:26:20+03:00/showcase/minilogo/2024-08-22T17:26:20+03:00/docs/reference/2024-08-22T17:26:20+03:00/docs/introduction/showcases/2024-08-22T17:26:20+03:00/docs/learn/workflow/write_grammar/2024-08-22T17:26:20+03:00/docs/recipes/multiple-languages/2024-08-22T17:26:20+03:00/playground/2024-08-22T17:26:20+03:00/docs/recipes/2024-08-22T17:26:20+03:00/docs/reference/semantic-model/2024-08-22T17:26:20+03:00/showcase/sql/2024-08-22T17:26:20+03:00/docs/introduction/playground/2024-08-22T17:26:20+03:00/docs/learn/workflow/generate_ast/2024-08-22T17:26:20+03:00/showcase/domainmodel/2024-08-22T17:26:20+03:00/docs/learn/workflow/resolve_cross_references/2024-08-22T17:26:20+03:00/docs/learn/workflow/create_validations/2024-08-22T17:26:20+03:00/docs/learn/workflow/generate_everything/2024-08-22T17:26:20+03:00/docs/recipes/code-bundling/2024-08-22T17:26:20+03:00/2024-08-22T17:26:20+03:00/docs/2024-08-22T17:26:20+03:00/docs/learn/workflow/2024-08-22T17:26:20+03:00/docs/learn/2024-08-22T17:26:20+03:00/showcase/openapi/2024-08-22T17:26:20+03:00/tags//docs/learn/minilogo/writing_a_grammar/2024-08-22T17:26:20+03:00 \ No newline at end of file +/docs/introduction/2024-08-25T20:11:03+00:00/docs/learn/minilogo/validation/2024-08-25T20:11:03+00:00/docs/learn/minilogo/customizing_cli/2024-08-25T20:11:03+00:00/docs/learn/minilogo/generation/2024-08-25T20:11:03+00:00/docs/learn/minilogo/building_an_extension/2024-08-25T20:11:03+00:00/docs/learn/minilogo/langium_and_monaco/2024-08-25T20:11:03+00:00/docs/learn/minilogo/generation_in_the_web/2024-08-25T20:11:03+00:00/docs/reference/glossary/2024-08-25T20:11:03+00:00/docs/recipes/lexing/2024-08-25T20:11:03+00:00/docs/recipes/lexing/case-insensitive-languages/2024-08-25T20:11:03+00:00/docs/reference/grammar-language/2024-08-25T20:11:03+00:00/docs/recipes/scoping/qualified-name/2024-08-25T20:11:03+00:00/docs/recipes/scoping/2024-08-25T20:11:03+00:00/showcase/statemachine/2024-08-25T20:11:03+00:00/docs/learn/workflow/install/2024-08-25T20:11:03+00:00/docs/recipes/builtin-library/2024-08-25T20:11:03+00:00/docs/recipes/scoping/class-member/2024-08-25T20:11:03+00:00/docs/reference/configuration-services/2024-08-25T20:11:03+00:00/docs/features/2024-08-25T20:11:03+00:00/docs/learn/minilogo/2024-08-25T20:11:03+00:00/docs/learn/workflow/scaffold/2024-08-25T20:11:03+00:00/showcase/arithmetics/2024-08-25T20:11:03+00:00/docs/reference/document-lifecycle/2024-08-25T20:11:03+00:00/docs/recipes/scoping/file-based/2024-08-25T20:11:03+00:00/docs/recipes/formatting/2024-08-25T20:11:03+00:00/docs/recipes/lexing/indentation-sensitive-languages/2024-08-25T20:11:03+00:00/docs/recipes/keywords-as-identifiers/2024-08-25T20:11:03+00:00/showcase/2024-08-25T20:11:03+00:00/showcase/minilogo/2024-08-25T20:11:03+00:00/docs/reference/2024-08-25T20:11:03+00:00/docs/introduction/showcases/2024-08-25T20:11:03+00:00/docs/learn/workflow/write_grammar/2024-08-25T20:11:03+00:00/docs/recipes/multiple-languages/2024-08-25T20:11:03+00:00/playground/2024-08-25T20:11:03+00:00/docs/recipes/2024-08-25T20:11:03+00:00/docs/reference/semantic-model/2024-08-25T20:11:03+00:00/showcase/sql/2024-08-25T20:11:03+00:00/docs/introduction/playground/2024-08-25T20:11:03+00:00/docs/learn/workflow/generate_ast/2024-08-25T20:11:03+00:00/showcase/domainmodel/2024-08-25T20:11:03+00:00/docs/learn/workflow/resolve_cross_references/2024-08-25T20:11:03+00:00/docs/learn/workflow/create_validations/2024-08-25T20:11:03+00:00/docs/learn/workflow/generate_everything/2024-08-25T20:11:03+00:00/docs/recipes/code-bundling/2024-08-25T20:11:03+00:00/2024-08-25T20:11:03+00:00/docs/2024-08-25T20:11:03+00:00/docs/learn/workflow/2024-08-25T20:11:03+00:00/docs/learn/2024-08-25T20:11:03+00:00/showcase/openapi/2024-08-25T20:11:03+00:00/tags//docs/learn/minilogo/writing_a_grammar/2024-08-25T20:11:03+00:00 \ No newline at end of file