From 78067be21bd6d7605eb3621a64e9a82dc9dd7cb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alexander=20K=C3=B6nig?= <alex@clarin.eu>
Date: Thu, 9 Jan 2025 14:18:29 +0100
Subject: [PATCH] added language models

---
 .../language-models/albertina-pt-br-base.json | 16 +++++++++++++++
 .../albertina-pt-br-no-brwac.json             | 16 +++++++++++++++
 .../language-models/albertina-pt-br.json      | 16 +++++++++++++++
 .../language-models/albertina-pt-pt-base.json | 16 +++++++++++++++
 .../language-models/albertina-pt-pt.json      | 16 +++++++++++++++
 .../language-models/bertimbau-base.json       | 16 +++++++++++++++
 .../language-models/bertimbau-large.json      | 16 +++++++++++++++
 .../language-models/ccgigafida-arpa.json      | 16 +++++++++++++++
 .../language-models/cered-base.json           | 16 +++++++++++++++
 .../language-models/clarin-si-embed.json      | 20 +++++++++++++++++++
 .../classla-stanford-lemma-slv.json           | 16 +++++++++++++++
 .../classla-stanford-ner-bul.json             | 16 +++++++++++++++
 .../classla-stanford-ner-hrv.json             | 16 +++++++++++++++
 .../classla-stanford-ner-non-std-hrv.json     | 16 +++++++++++++++
 .../classla-stanford-ner-non-std-slv.json     | 16 +++++++++++++++
 .../classla-stanford-ner-non-std-srp.json     | 16 +++++++++++++++
 .../classla-stanford-ner-slv.json             | 16 +++++++++++++++
 .../classla-stanford-ner-srp.json             | 16 +++++++++++++++
 .../language-models/classla-stanza-bul.json   | 16 +++++++++++++++
 .../language-models/classla-stanza-hrv.json   | 16 +++++++++++++++
 .../classla-stanza-jos-dep-slv.json           | 16 +++++++++++++++
 .../classla-stanza-lemma-bul.json             | 16 +++++++++++++++
 .../classla-stanza-lemma-hrv.json             | 16 +++++++++++++++
 .../classla-stanza-lemma-mkd.json             | 16 +++++++++++++++
 .../classla-stanza-lemma-non-std-hrv.json     | 16 +++++++++++++++
 .../classla-stanza-lemma-non-std-slv.json     | 16 +++++++++++++++
 .../classla-stanza-lemma-non-std-srp.json     | 16 +++++++++++++++
 .../classla-stanza-lemma-srp.json             | 16 +++++++++++++++
 .../language-models/classla-stanza-mkd.json   | 16 +++++++++++++++
 .../classla-stanza-non-std-hrv.json           | 16 +++++++++++++++
 .../classla-stanza-non-std-slv.json           | 16 +++++++++++++++
 .../classla-stanza-non-std-srp.json           | 16 +++++++++++++++
 .../classla-stanza-sem-roles-slv.json         | 16 +++++++++++++++
 .../language-models/classla-stanza-slv.json   | 16 +++++++++++++++
 .../language-models/classla-stanza-srp.json   | 16 +++++++++++++++
 .../classla-stanza-ud-dep-bul.json            | 16 +++++++++++++++
 .../classla-stanza-ud-dep-hrv.json            | 16 +++++++++++++++
 .../classla-stanza-ud-dep-slv.json            | 16 +++++++++++++++
 .../classla-stanza-ud-dep-srp.json            | 16 +++++++++++++++
 .../language-models/cnec-nametag.json         | 16 +++++++++++++++
 .../language-models/commonsense-reason.json   | 16 +++++++++++++++
 .../language-models/conll-2017-shared.json    | 16 +++++++++++++++
 .../language-models/conll-2018-shared.json    | 16 +++++++++++++++
 .../language-models/conll-nametag.json        | 16 +++++++++++++++
 .../language-models/crosloengual-bert.json    | 16 +++++++++++++++
 .../language-models/cubbitt-en-cs.json        | 16 +++++++++++++++
 .../language-models/cubbitt-en-fr.json        | 16 +++++++++++++++
 .../language-models/cubbitt-en-pl.json        | 16 +++++++++++++++
 .../language-models/czech-neural-monkeys.json | 16 +++++++++++++++
 .../language-models/dep-parsing-pol.json      | 16 +++++++++++++++
 .../language-models/dep-parsing-stanza.json   | 16 +++++++++++++++
 .../language-models/elmo-embeddings.json      | 16 +++++++++++++++
 .../language-models/embeddings-eng-wiki.json  | 16 +++++++++++++++
 .../language-models/eng-mod-morphodita.json   | 16 +++++++++++++++
 .../language-models/face-domain-specific.json | 16 +++++++++++++++
 .../language-models/finbert.json              | 16 +++++++++++++++
 .../language-models/frenk-mmc-rtv.json        | 16 +++++++++++++++
 .../language-models/g2p-icelandic.json        | 16 +++++++++++++++
 .../language-models/gervasio-pt-br-base.json  | 16 +++++++++++++++
 .../language-models/gervasio-pt-pt-base.json  | 16 +++++++++++++++
 .../language-models/greynir-mbart.json        | 16 +++++++++++++++
 .../language-models/greynir-t2t.json          | 16 +++++++++++++++
 .../language-models/korektor-czech.json       | 16 +++++++++++++++
 .../language-models/lemma-stanza.json         | 16 +++++++++++++++
 .../language-models/liner-events.json         | 16 +++++++++++++++
 .../language-models/liner-ner-nkjp.json       | 16 +++++++++++++++
 .../language-models/liner-ner.json            | 16 +++++++++++++++
 .../language-models/liner-timex.json          | 16 +++++++++++++++
 lexical-resources/language-models/liner.json  | 16 +++++++++++++++
 .../language-models/litlatbert.json           | 16 +++++++++++++++
 lexical-resources/language-models/lvbert.json | 16 +++++++++++++++
 .../language-models/lx-dsemvectors.json       | 16 +++++++++++++++
 .../language-models/mcsq-tm-en-de.json        | 16 +++++++++++++++
 .../language-models/mcsq-tm-en-ru.json        | 16 +++++++++++++++
 .../language-models/morflex-cz-161115.json    | 16 +++++++++++++++
 .../language-models/nametag2.json             | 16 +++++++++++++++
 .../language-models/por-roberta.json          | 16 +++++++++++++++
 .../language-models/pos-lemma-ces.json        | 16 +++++++++++++++
 .../language-models/pos-tag-flair.json        | 16 +++++++++++++++
 .../language-models/pos-tag-marmot.json       | 16 +++++++++++++++
 .../language-models/pos-tag-stanza.json       | 16 +++++++++++++++
 .../language-models/pytorch-sloner.json       | 16 +++++++++++++++
 .../language-models/pytorch-slv.json          | 16 +++++++++++++++
 lexical-resources/language-models/ruv-di.json | 16 +++++++++++++++
 .../language-models/sentiment-czech.json      | 16 +++++++++++++++
 .../language-models/slavic-forest.json        | 16 +++++++++++++++
 .../language-models/slk-morphodita.json       | 16 +++++++++++++++
 .../language-models/sloberta.json             | 15 ++++++++++++++
 .../language-models/trans-models-en-de.json   | 16 +++++++++++++++
 .../language-models/trans-models-en-ru.json   | 16 +++++++++++++++
 .../language-models/ud-parsito-models.json    | 16 +++++++++++++++
 .../language-models/udify-pre.json            | 16 +++++++++++++++
 .../language-models/udpipe-models.json        | 16 +++++++++++++++
 .../language-models/wmt-ca-oc-multi.json      | 16 +++++++++++++++
 .../language-models/wmt-ca-oc.json            | 16 +++++++++++++++
 .../language-models/wmt-ca-ro-it.json         | 16 +++++++++++++++
 .../language-models/wmt-tuning-cs-en.json     | 16 +++++++++++++++
 97 files changed, 1555 insertions(+)
 create mode 100644 lexical-resources/language-models/albertina-pt-br-base.json
 create mode 100644 lexical-resources/language-models/albertina-pt-br-no-brwac.json
 create mode 100644 lexical-resources/language-models/albertina-pt-br.json
 create mode 100644 lexical-resources/language-models/albertina-pt-pt-base.json
 create mode 100644 lexical-resources/language-models/albertina-pt-pt.json
 create mode 100644 lexical-resources/language-models/bertimbau-base.json
 create mode 100644 lexical-resources/language-models/bertimbau-large.json
 create mode 100644 lexical-resources/language-models/ccgigafida-arpa.json
 create mode 100644 lexical-resources/language-models/cered-base.json
 create mode 100644 lexical-resources/language-models/clarin-si-embed.json
 create mode 100644 lexical-resources/language-models/classla-stanford-lemma-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanford-ner-bul.json
 create mode 100644 lexical-resources/language-models/classla-stanford-ner-hrv.json
 create mode 100644 lexical-resources/language-models/classla-stanford-ner-non-std-hrv.json
 create mode 100644 lexical-resources/language-models/classla-stanford-ner-non-std-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanford-ner-non-std-srp.json
 create mode 100644 lexical-resources/language-models/classla-stanford-ner-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanford-ner-srp.json
 create mode 100644 lexical-resources/language-models/classla-stanza-bul.json
 create mode 100644 lexical-resources/language-models/classla-stanza-hrv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-jos-dep-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-lemma-bul.json
 create mode 100644 lexical-resources/language-models/classla-stanza-lemma-hrv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-lemma-mkd.json
 create mode 100644 lexical-resources/language-models/classla-stanza-lemma-non-std-hrv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-lemma-non-std-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-lemma-non-std-srp.json
 create mode 100644 lexical-resources/language-models/classla-stanza-lemma-srp.json
 create mode 100644 lexical-resources/language-models/classla-stanza-mkd.json
 create mode 100644 lexical-resources/language-models/classla-stanza-non-std-hrv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-non-std-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-non-std-srp.json
 create mode 100644 lexical-resources/language-models/classla-stanza-sem-roles-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-srp.json
 create mode 100644 lexical-resources/language-models/classla-stanza-ud-dep-bul.json
 create mode 100644 lexical-resources/language-models/classla-stanza-ud-dep-hrv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-ud-dep-slv.json
 create mode 100644 lexical-resources/language-models/classla-stanza-ud-dep-srp.json
 create mode 100644 lexical-resources/language-models/cnec-nametag.json
 create mode 100644 lexical-resources/language-models/commonsense-reason.json
 create mode 100644 lexical-resources/language-models/conll-2017-shared.json
 create mode 100644 lexical-resources/language-models/conll-2018-shared.json
 create mode 100644 lexical-resources/language-models/conll-nametag.json
 create mode 100644 lexical-resources/language-models/crosloengual-bert.json
 create mode 100644 lexical-resources/language-models/cubbitt-en-cs.json
 create mode 100644 lexical-resources/language-models/cubbitt-en-fr.json
 create mode 100644 lexical-resources/language-models/cubbitt-en-pl.json
 create mode 100644 lexical-resources/language-models/czech-neural-monkeys.json
 create mode 100644 lexical-resources/language-models/dep-parsing-pol.json
 create mode 100644 lexical-resources/language-models/dep-parsing-stanza.json
 create mode 100644 lexical-resources/language-models/elmo-embeddings.json
 create mode 100644 lexical-resources/language-models/embeddings-eng-wiki.json
 create mode 100644 lexical-resources/language-models/eng-mod-morphodita.json
 create mode 100644 lexical-resources/language-models/face-domain-specific.json
 create mode 100644 lexical-resources/language-models/finbert.json
 create mode 100644 lexical-resources/language-models/frenk-mmc-rtv.json
 create mode 100644 lexical-resources/language-models/g2p-icelandic.json
 create mode 100644 lexical-resources/language-models/gervasio-pt-br-base.json
 create mode 100644 lexical-resources/language-models/gervasio-pt-pt-base.json
 create mode 100644 lexical-resources/language-models/greynir-mbart.json
 create mode 100644 lexical-resources/language-models/greynir-t2t.json
 create mode 100644 lexical-resources/language-models/korektor-czech.json
 create mode 100644 lexical-resources/language-models/lemma-stanza.json
 create mode 100644 lexical-resources/language-models/liner-events.json
 create mode 100644 lexical-resources/language-models/liner-ner-nkjp.json
 create mode 100644 lexical-resources/language-models/liner-ner.json
 create mode 100644 lexical-resources/language-models/liner-timex.json
 create mode 100644 lexical-resources/language-models/liner.json
 create mode 100644 lexical-resources/language-models/litlatbert.json
 create mode 100644 lexical-resources/language-models/lvbert.json
 create mode 100644 lexical-resources/language-models/lx-dsemvectors.json
 create mode 100644 lexical-resources/language-models/mcsq-tm-en-de.json
 create mode 100644 lexical-resources/language-models/mcsq-tm-en-ru.json
 create mode 100644 lexical-resources/language-models/morflex-cz-161115.json
 create mode 100644 lexical-resources/language-models/nametag2.json
 create mode 100644 lexical-resources/language-models/por-roberta.json
 create mode 100644 lexical-resources/language-models/pos-lemma-ces.json
 create mode 100644 lexical-resources/language-models/pos-tag-flair.json
 create mode 100644 lexical-resources/language-models/pos-tag-marmot.json
 create mode 100644 lexical-resources/language-models/pos-tag-stanza.json
 create mode 100644 lexical-resources/language-models/pytorch-sloner.json
 create mode 100644 lexical-resources/language-models/pytorch-slv.json
 create mode 100644 lexical-resources/language-models/ruv-di.json
 create mode 100644 lexical-resources/language-models/sentiment-czech.json
 create mode 100644 lexical-resources/language-models/slavic-forest.json
 create mode 100644 lexical-resources/language-models/slk-morphodita.json
 create mode 100644 lexical-resources/language-models/sloberta.json
 create mode 100644 lexical-resources/language-models/trans-models-en-de.json
 create mode 100644 lexical-resources/language-models/trans-models-en-ru.json
 create mode 100644 lexical-resources/language-models/ud-parsito-models.json
 create mode 100644 lexical-resources/language-models/udify-pre.json
 create mode 100644 lexical-resources/language-models/udpipe-models.json
 create mode 100644 lexical-resources/language-models/wmt-ca-oc-multi.json
 create mode 100644 lexical-resources/language-models/wmt-ca-oc.json
 create mode 100644 lexical-resources/language-models/wmt-ca-ro-it.json
 create mode 100644 lexical-resources/language-models/wmt-tuning-cs-en.json

diff --git a/lexical-resources/language-models/albertina-pt-br-base.json b/lexical-resources/language-models/albertina-pt-br-base.json
new file mode 100644
index 0000000..0775e74
--- /dev/null
+++ b/lexical-resources/language-models/albertina-pt-br-base.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Albertina PT-BR base",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-FF45-5",
+      "Family": "Language Models",
+      "Description": "This model is for Portuguese spoken in Brazil. It is based on the Transformer neural architecture and is developed over the <a href=\"https://huggingface.co/docs/transformers/model_doc/deberta\">DeBERTa model</a>. ",
+      "Language": ["por"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://huggingface.co/PORTULAN/albertina-ptbr-base"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/albertina-pt-br-no-brwac.json b/lexical-resources/language-models/albertina-pt-br-no-brwac.json
new file mode 100644
index 0000000..b414e7b
--- /dev/null
+++ b/lexical-resources/language-models/albertina-pt-br-no-brwac.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Albertina PT-BR No-brWaC",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-FF46-4 ",
+      "Family": "Language Models",
+      "Description": "This is a model for Portuguese spoken in Brazil trained on adta sets othan than brWaC. It is I developed over the <a href=\"https://huggingface.co/docs/transformers/model_doc/deberta\">DeBERTa model</a>.\nThe model is available for download from Hugging Face.",
+      "Language": ["por"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://huggingface.co/PORTULAN/albertina-ptbr-nobrwac"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/albertina-pt-br.json b/lexical-resources/language-models/albertina-pt-br.json
new file mode 100644
index 0000000..9ef65ca
--- /dev/null
+++ b/lexical-resources/language-models/albertina-pt-br.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Albertina PT-BR",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-FF43-7 ",
+      "Family": "Language Models",
+      "Description": "This model is an encoder of the BERT family and is based on the neural architecture Transformer and developed over the <a href=\"https://huggingface.co/docs/transformers/model_doc/deberta\">DeBERTa</a> model. This model is for American Portuguese spoken in Brazil, is trained on the <a href=\"https://huggingface.co/datasets/brwac\">brWaC</a> dataset, and is a larger version of the <a href=\"https://hdl.handle.net/21.11129/0000-000F-FF45-5\">Albertina PT-BR</a> base model.\nThis model is available for download through Hugging Face.",
+      "Language": ["por"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://huggingface.co/PORTULAN/albertina-ptbr"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/albertina-pt-pt-base.json b/lexical-resources/language-models/albertina-pt-pt-base.json
new file mode 100644
index 0000000..ee104a3
--- /dev/null
+++ b/lexical-resources/language-models/albertina-pt-pt-base.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Albertina PT-PT base",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-FF44-6",
+      "Family": "Language Models",
+      "Description": "This model is for European. It is based on the Transformer neural architecture and is developed over the <a href=\"https://huggingface.co/docs/transformers/model_doc/deberta\">DeBERTa model</a>.\nThis model is available for download through Hugging Face.",
+      "Language": ["por"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://huggingface.co/PORTULAN/albertina-ptpt-base"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/albertina-pt-pt.json b/lexical-resources/language-models/albertina-pt-pt.json
new file mode 100644
index 0000000..dced56a
--- /dev/null
+++ b/lexical-resources/language-models/albertina-pt-pt.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Albertina PT-PT",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-FF42-8",
+      "Family": "Language Models",
+      "Description": "This model is an encoder of the BERT family and is based on the neural architecture Transformer and developed over the <a href=\"https://huggingface.co/docs/transformers/model_doc/deberta\">DeBERTa</a> model. This model is for European Portuguese and is trained on the <a href=\"https://huggingface.co/datasets/brwac\">brWaC</a> dataset, and is a larger version of the <a href=\"https://hdl.handle.net/21.11129/0000-000F-FF45-6\">Albertina PT-PT</a> base model.\nThis model is available for download through Hugging Face.",
+      "Language": ["por"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://huggingface.co/PORTULAN/albertina-ptpt"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/bertimbau-base.json b/lexical-resources/language-models/bertimbau-base.json
new file mode 100644
index 0000000..f63fe38
--- /dev/null
+++ b/lexical-resources/language-models/bertimbau-base.json
@@ -0,0 +1,16 @@
+{
+      "Name": "BERTimbau - Portuguese BERT-Base language model",
+      "URL": "https://hdl.handle.net/21.11129/0000-000E-6726-4",
+      "Family": "Language Models",
+      "Description": "This is a <a href=\"https://github.com/google-research/bert\">BERT</a> model, trained on <a href=\"https://www.inf.ufrgs.br/pln/wiki/index.php?title=BrWaC#Current_version\">BrWaC</a> (Brazilian Web as Corpus), a large Portuguese corpus, for 1,000,000 steps, using whole-word mask.\nThe model is available for download from the PORTULAN repository.",
+      "Language": ["por"],
+      "Licence": "Under negotiation",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://huggingface.co/PORTULAN/gervasio-ptpt"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/bertimbau-large.json b/lexical-resources/language-models/bertimbau-large.json
new file mode 100644
index 0000000..ebed339
--- /dev/null
+++ b/lexical-resources/language-models/bertimbau-large.json
@@ -0,0 +1,16 @@
+{
+      "Name": "BERTimbau - Portuguese BERT-Large language model",
+      "URL": "https://hdl.handle.net/21.11129/0000-000E-6725-5",
+      "Family": "Language Models",
+      "Description": "This is a <a href=\"https://github.com/google-research/bert\">BERT</a> model, trained on <a href=\"https://www.inf.ufrgs.br/pln/wiki/index.php?title=BrWaC#Current_version\">BrWaC</a> (Brazilian Web as Corpus), a large Portuguese corpus, for 1,000,000 steps, using whole-word mask.\nThe model is available for download from the PORTULAN repository.",
+      "Language": ["por"],
+      "Licence": "Under negotiation",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://github.com/neuralmind-ai/portuguese-bert/"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/ccgigafida-arpa.json b/lexical-resources/language-models/ccgigafida-arpa.json
new file mode 100644
index 0000000..cf7b9db
--- /dev/null
+++ b/lexical-resources/language-models/ccgigafida-arpa.json
@@ -0,0 +1,16 @@
+{
+      "Name": "ccGigafida ARPA language model 1.0",
+      "URL": "http://hdl.handle.net/11356/1119",
+      "Family": "Language Models",
+      "Description": "This model was created from the <a href=\"http://hdl.handle.net/11356/1035\">ccGigafida written corpus of Slovenian</a> using the <a href=\"https://github.com/kpu/kenlm\">KenLM algorithm</a> in the <a href=\"http://www2.statmt.org/moses/\">Moses machine translation framework</a>. It is a general language model of contemporary standard Slovenian language that can be used as a language model in statistical machine translation systems.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1119"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/cered-base.json b/lexical-resources/language-models/cered-base.json
new file mode 100644
index 0000000..e2af5b0
--- /dev/null
+++ b/lexical-resources/language-models/cered-base.json
@@ -0,0 +1,16 @@
+{
+      "Name": "CERED baseline models",
+      "URL": "http://hdl.handle.net/11234/1-3266",
+      "Family": "Language Models",
+      "Description": "These models are trained on <a href=\"http://hdl.handle.net/11234/1-3265\">CERED</a>, a dataset created by distant supervision on Czech Wikipedia and Wikidata, and recognize a subset of Wikidata relations.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["ces"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3266"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/clarin-si-embed.json b/lexical-resources/language-models/clarin-si-embed.json
new file mode 100644
index 0000000..93d4d0c
--- /dev/null
+++ b/lexical-resources/language-models/clarin-si-embed.json
@@ -0,0 +1,20 @@
+{
+      "Name": "Word embeddings CLARIN.SI-embed",
+      "URL": "http://hdl.handle.net/11356/1796",
+      "Family": "Language Models",
+      "Description": "This is a set of word embeddings for 5 languages.<ul><li>CLARIN.SI-embed.bg contains word embeddings for Bulgarian induced from the MaCoCu-bg web crawl corpus. The embeddings are based on the skip-gram model of fastText trained on 4,120,343,820 tokens of running text for 2,746,640 lowercased surface forms.</li><li>CLARIN.SI-embed.hr contains word embeddings induced from a large collection of Croatian texts composed of the Croatian web corpus hrWaC, a 400-million-token-heavy collection of newspaper texts and MaCoCu-hr. The embeddings are based on the skip-gram model of fastText trained on 4,586,769,197 tokens of running text for 3,406,574 lowercased surface forms.</li><li>CLARIN.SI-embed.mk contains word embeddings induced from a large collection of Macedonian texts crawled from the .mk top-level domain. The embeddings are based on the skip-gram model of fastText trained on 933,231,582 tokens of running text for 986,670 lowercased surface forms. </li><li>CLARIN.SI-embed.sr contains word embeddings induced from the srWaC and MaCoCu-sr web corpora. The embeddings are based on the skip-gram model of fastText trained on 3,434,602,575 tokens of running text for 2,676,036 lowercased surface forms. </li><li>CLARIN.SI-embed.sl contains word embeddings induced from a large collection of Slovene texts composed of existing corpora of Slovene, e.g GigaFida, Janes, KAS, slWaC, MaCoCu-sl, etc. The embeddings are based on the skip-gram model of fastText trained on 5,791,405,942 tokens of running text for 3,471,054 lowercased surface forms.</li></ul>\nThe models are available for download from the CLARIN.SI repository.",
+      "Language": ["bul", "hrv", "mkd", "srp", "slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["word embeddings"],
+      "Infrastructure": "CLARIN",
+      "Group": "Contextual Word Embeddings",
+      "Access": {
+	"Download (Bulgarian)": "http://hdl.handle.net/11356/1796",
+	"Download (Croatian)": "http://hdl.handle.net/11356/1790",
+	"Download (Macedonian)": "http://hdl.handle.net/11356/1788",
+	"Download (Serbian)": "http://hdl.handle.net/11356/1789",
+	"Download (Slovenian)": "http://hdl.handle.net/11356/1791"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/classla-stanford-lemma-slv.json b/lexical-resources/language-models/classla-stanford-lemma-slv.json
new file mode 100644
index 0000000..ae93e26
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-lemma-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for lemmatisation of standard Slovenian 2.0",
+      "URL": "http://hdl.handle.net/11356/1768",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1747\">SUK training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a> expanded with the <a href=\"http://hdl.handle.net/11356/1517\">MaCoCu-sl Slovene web corpus</a>. The estimated F1 of the lemma annotations is ~99.7.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1768"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanford-ner-bul.json b/lexical-resources/language-models/classla-stanford-ner-bul.json
new file mode 100644
index 0000000..6cb5461
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-ner-bul.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for named entity recognition of standard Bulgarian 1.0",
+      "URL": "http://hdl.handle.net/11356/1329",
+      "Family": "Language Models",
+      "Description": "This model for named entity recognition of standard Bulgarian was built with the <a href=\"https://github.com/clarinsi/classla-stanfordnlp\">CLASSLA-StanfordNLP tool</a> by training on the <a href=\"http://hdl.handle.net/11495/D93F-C6E9-65D9-2\">BulTreeBank training corpus</a> and using the <a href=\"http://hdl.handle.net/11234/1-1989\">CoNLL2017 word embeddings</a>.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["bul"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1329"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanford-ner-hrv.json b/lexical-resources/language-models/classla-stanford-ner-hrv.json
new file mode 100644
index 0000000..4c2445f
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-ner-hrv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for named entity recognition of standard Croatian 1.0",
+      "URL": "http://hdl.handle.net/11356/1322",
+      "Family": "Language Models",
+      "Description": "This model for named entity recognition of standard Croatian was built with the <a href=\"https://github.com/clarinsi/classla-stanfordnlp\">CLASSLA-StanfordNLP tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1183\">hr500k training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1205\">CLARIN.SI-embed.hr word embeddings</a>.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["hrv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1322"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanford-ner-non-std-hrv.json b/lexical-resources/language-models/classla-stanford-ner-non-std-hrv.json
new file mode 100644
index 0000000..73d1655
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-ner-non-std-hrv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for named entity recognition of non-standard Croatian 1.0",
+      "URL": "http://hdl.handle.net/11356/1340",
+      "Family": "Language Models",
+      "Description": "This model for named entity recognition of non-standard Croatian was built with the <a href=\"https://github.com/clarinsi/classla-stanfordnlp\">CLASSLA-StanfordNLP tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1183\">hr500k training corpus</a>, the <a href=\"http://hdl.handle.net/11356/1241\">ReLDI-NormTagNER-hr</a> corpus and the <a href=\"http://hdl.handle.net/11356/1240\">ReLDI-NormTagNER-sr corpus</a>, using the <a href=\"http://hdl.handle.net/11356/1205\">CLARIN.SI-embed.hr word embeddings</a> . The training corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Croatian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1340"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanford-ner-non-std-slv.json b/lexical-resources/language-models/classla-stanford-ner-non-std-slv.json
new file mode 100644
index 0000000..80c5fe7
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-ner-non-std-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for named entity recognition of non-standard Slovenian 1.0",
+      "URL": "http://hdl.handle.net/11356/1339",
+      "Family": "Language Models",
+      "Description": "This model for named entity recognition of non-standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla-stanfordnlp\">CLASSLA-StanfordNLP tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1210\">ssj500k training corpus</a> and the <a href=\"http://hdl.handle.net/11356/1238\">Janes-Tag training corpus</a>, using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a>. The training corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Slovenian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1339"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanford-ner-non-std-srp.json b/lexical-resources/language-models/classla-stanford-ner-non-std-srp.json
new file mode 100644
index 0000000..f4fb7ad
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-ner-non-std-srp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for named entity recognition of non-standard Serbian 1.0",
+      "URL": "http://hdl.handle.net/11356/1341",
+      "Family": "Language Models",
+      "Description": "This model for named entity recognition of non-standard Serbian was built with the <a href=\"https://github.com/clarinsi/classla-stanfordnlp\">CLASSLA-StanfordNLP tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1200\">SETimes.SR training corpus/a>, the <a href=\"http://hdl.handle.net/11356/1183\">hr500k training corpus</a>, the <a href=\"http://hdl.handle.net/11356/1240\">ReLDI-NormTagNER-sr corpus</a>, and the <a href=\"http://hdl.handle.net/11356/1241\">ReLDI-NormTagNER-hr corpus</a>, using the <a href=\"http://hdl.handle.net/11356/1206\">CLARIN.SI-embed.sr word embeddings</a>. The training corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Serbian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1341"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanford-ner-slv.json b/lexical-resources/language-models/classla-stanford-ner-slv.json
new file mode 100644
index 0000000..7b3bed5
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-ner-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for named entity recognition of standard Slovenian 1.0",
+      "URL": "http://hdl.handle.net/11356/1321",
+      "Family": "Language Models",
+      "Description": "This model for named entity recognition of standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla-stanfordnlp\">CLASSLA-StanfordNLP tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1210\">ssj500k training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a>.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1321"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanford-ner-srp.json b/lexical-resources/language-models/classla-stanford-ner-srp.json
new file mode 100644
index 0000000..e180715
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanford-ner-srp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-StanfordNLP model for named entity recognition of standard Serbian 1.0",
+      "URL": "http://hdl.handle.net/11356/1323",
+      "Family": "Language Models",
+      "Description": "This model for named entity recognition of standard Serbian was built with the <a href=\"https://github.com/clarinsi/classla-stanfordnlp\">CLASSLA-StanfordNLP tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1200\">SETimes.SR training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1206\">CLARIN.SI-embed.sr word embeddings</a>.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["srp"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1323"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-bul.json b/lexical-resources/language-models/classla-stanza-bul.json
new file mode 100644
index 0000000..c3c5cf1
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-bul.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of standard Bulgarian 2.1",
+      "URL": "http://hdl.handle.net/11356/1849",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of standard Bulgarian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"https://clarino.uib.no/korpuskel/corpora\">BulTreeBank training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1796\">CLARIN.SI-embed.bg word embeddings</a>. The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~96.83.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["bul"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1849"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-hrv.json b/lexical-resources/language-models/classla-stanza-hrv.json
new file mode 100644
index 0000000..c3fb1aa
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-hrv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of standard Croatian 2.1",
+      "URL": "http://hdl.handle.net/11356/1832",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of standard Croatian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1792\">hr500k training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1790\">CLARIN.SI-embed.hr word embeddings</a>. The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~94.87.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["hrv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1832"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-jos-dep-slv.json b/lexical-resources/language-models/classla-stanza-jos-dep-slv.json
new file mode 100644
index 0000000..ab78216
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-jos-dep-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for JOS dependency parsing of standard Slovenian 2.0",
+      "URL": "http://hdl.handle.net/11356/1764",
+      "Family": "Language Models",
+      "Description": "The model for JOS dependency parsing of standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1747\">SUK training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a> expanded with the <a href=\"http://hdl.handle.net/11356/1517\">MaCoCu-sl Slovene web corpus</a>. The estimated LAS of the parser is ~93.89.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1764"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-lemma-bul.json b/lexical-resources/language-models/classla-stanza-lemma-bul.json
new file mode 100644
index 0000000..3161d0a
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-lemma-bul.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for lemmatisation of standard Bulgarian 2.1",
+      "URL": "http://hdl.handle.net/11356/1850",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of standard Bulgarian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"https://clarino.uib.no/korpuskel/corpora\">BulTreeBank training corpus</a> and using the Bulgarian inflectional lexicon (Popov, Simov, and Vidinska 1998). The estimated F1 of the lemma annotations is ~98.93.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["bul"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1850"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-lemma-hrv.json b/lexical-resources/language-models/classla-stanza-lemma-hrv.json
new file mode 100644
index 0000000..0d2e4ec
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-lemma-hrv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for lemmatisation of standard Croatian 2.1",
+      "URL": "http://hdl.handle.net/11356/1829",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of standard Croatian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1792\">hr500k training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1232\">hrLex inflectional lexicon</a>. The estimated F1 of the lemma annotations is ~98.02.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["hrv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1829"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-lemma-mkd.json b/lexical-resources/language-models/classla-stanza-lemma-mkd.json
new file mode 100644
index 0000000..69de92e
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-lemma-mkd.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for lemmatisation of standard Macedonian 2.1",
+      "URL": "http://hdl.handle.net/11356/1848",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of standard Macedonian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the 1984 training corpus expanded with the Macedonian SETimes corpus (to be published). The estimated F1 of the lemma annotations is ~98.81.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["mkd"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1848"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-lemma-non-std-hrv.json b/lexical-resources/language-models/classla-stanza-lemma-non-std-hrv.json
new file mode 100644
index 0000000..43af036
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-lemma-non-std-hrv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for lemmatisation of non-standard Croatian 2.1",
+      "URL": "http://hdl.handle.net/11356/1827",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of non-standard Croatian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1792\">hr500k training corpus</a> and the <a href=\"http://hdl.handle.net/11356/1793\">ReLDI-NormTagNER-hr corpus</a>, using the <a href=\"http://hdl.handle.net/11356/1232\">hrLex inflectional lexicon</a>. These corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed. The estimated F1 of the lemma annotations is ~94.23.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Croatian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1827"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-lemma-non-std-slv.json b/lexical-resources/language-models/classla-stanza-lemma-non-std-slv.json
new file mode 100644
index 0000000..8ff9c78
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-lemma-non-std-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for lemmatisation of non-standard Slovenian 2.1",
+      "URL": "http://hdl.handle.net/11356/1784",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of non-standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1747\">SUK training corpus</a> and on the <a href=\"http://hdl.handle.net/11356/1732\">Janes-Tag corpus</a> using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a> expanded with the <a href=\"http://hdl.handle.net/11356/1517\">MaCoCu-sl Slovene web corpus</a>. These corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed. The estimated F1 of the lemma annotations is ~91.45.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Slovenian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1784"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-lemma-non-std-srp.json b/lexical-resources/language-models/classla-stanza-lemma-non-std-srp.json
new file mode 100644
index 0000000..2d57356
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-lemma-non-std-srp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for lemmatisation of non-standard Serbian 2.1",
+      "URL": "http://hdl.handle.net/11356/1828",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of non-standard Serbian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1200\">SETimes.SR training corpus</a> combined with the <a href=\"http://hdl.handle.net/11356/1794\">Serbian non-standard training corpus ReLDI-NormTagNER-sr</a> and using the <a href=\"http://hdl.handle.net/11356/1233\">srLex inflectional lexicon</a>. These corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed. The estimated F1 of the lemma annotations is ~94.92.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Serbian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1828"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-lemma-srp.json b/lexical-resources/language-models/classla-stanza-lemma-srp.json
new file mode 100644
index 0000000..dc68abe
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-lemma-srp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for lemmatisation of standard Serbian 2.1",
+      "URL": "http://hdl.handle.net/11356/1830",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of standard Serbian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1200\">SETimes.SR training corpus</a> combined with the <a href=\"http://hdl.handle.net/11356/1794\">Serbian non-standard training corpus ReLDI-NormTagNER-sr</a> and using the <a href=\"http://hdl.handle.net/11356/1233\">srLex inflectional lexicon</a>. The estimated F1 of the lemma annotations is ~98.02.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["srp"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1830"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-mkd.json b/lexical-resources/language-models/classla-stanza-mkd.json
new file mode 100644
index 0000000..8cc18f2
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-mkd.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of standard Macedonian 2.1",
+      "URL": "http://hdl.handle.net/11356/1847",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of standard Macedonian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the 1984 training corpus expanded with the Macedonian SETimes corpus (to be published) and using the <a href=\"http://hdl.handle.net/11356/1788\">Macedonian CLARIN.SI word embeddings</a>. The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~97.14.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["mkd"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1847"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-non-std-hrv.json b/lexical-resources/language-models/classla-stanza-non-std-hrv.json
new file mode 100644
index 0000000..7cb4410
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-non-std-hrv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of non-standard Croatian 2.1",
+      "URL": "http://hdl.handle.net/11356/1826",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of non-standard Croatian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1792\">hr500k training corpus</a> and the <a href=\"http://hdl.handle.net/11356/1793\">ReLDI-NormTagNER-hr corpus</a>, using the <a href=\"http://hdl.handle.net/11356/1790\">CLARIN.SI-embed.hr word embeddings</a>. These corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed. The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~92.49.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["hrv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1826"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-non-std-slv.json b/lexical-resources/language-models/classla-stanza-non-std-slv.json
new file mode 100644
index 0000000..18fbbd2
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-non-std-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of non-standard Slovenian 2.1",
+      "URL": "http://hdl.handle.net/11356/1786",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of non-standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1747\">SUK training corpus</a> and on the <a href=\"http://hdl.handle.net/11356/1732\">Janes-Tag corpus</a> using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a> expanded with the <a href=\"http://hdl.handle.net/11356/1517\">MaCoCu-sl Slovene web corpus</a>. These corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed. The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~92.17.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Slovenian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1786"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-non-std-srp.json b/lexical-resources/language-models/classla-stanza-non-std-srp.json
new file mode 100644
index 0000000..f14f66d
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-non-std-srp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of non-standard Serbian 2.1",
+      "URL": "http://hdl.handle.net/11356/1825",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of non-standard Serbian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1200\">SETimes.SR training corpus</a> combined with the <a href=\"http://hdl.handle.net/11356/1794\">Serbian non-standard training corpus ReLDI-NormTagNER-sr</a> and the <a href=\"http://hdl.handle.net/11356/1792\">hr500k training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1789\">CLARIN.SI-embed.sr word embeddings</a>. These corpora were additionally augmented for handling missing diacritics by repeating parts of the corpora with diacritics removed. The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~92.64.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["Serbian (non-standard)"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1825"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-sem-roles-slv.json b/lexical-resources/language-models/classla-stanza-sem-roles-slv.json
new file mode 100644
index 0000000..6681740
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-sem-roles-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for semantic role labeling of standard Slovenian 2.0",
+      "URL": "http://hdl.handle.net/11356/1770",
+      "Family": "Language Models",
+      "Description": "The model for lemmatisation of standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1747\">SUK training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a> extended with the <a href=\"http://hdl.handle.net/11356/1517\">MaCoCu-sl Slovene web corpus</a>. The estimated F1 of the lemma annotations is ~76.24.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["semantic role labeling"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1770"
+	},
+      "Publication": "Ljubešić & Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-slv.json b/lexical-resources/language-models/classla-stanza-slv.json
new file mode 100644
index 0000000..08c318c
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of standard Slovenian 2.0",
+      "URL": "http://hdl.handle.net/11356/1767",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1747\">SUK training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a> expanded with the <a href=\"http://hdl.handle.net/11356/1517\">MaCoCu-sl Slovene web corpus</a>.The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~98.27.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1767"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-srp.json b/lexical-resources/language-models/classla-stanza-srp.json
new file mode 100644
index 0000000..f976e49
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-srp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for morphosyntactic annotation of standard Serbian 2.1",
+      "URL": "http://hdl.handle.net/11356/1831",
+      "Family": "Language Models",
+      "Description": "The model for morphosyntactic annotation of standard Serbian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1200\">SETimes.SR training corpus</a> combined with the <a href=\"http://hdl.handle.net/11356/1792\">Croatian hr500k training dataset</a> to ensure sufficient representation of certain labels, and using the <a href=\"http://hdl.handle.net/11356/1789\">CLARIN.SI-embed.sr word embeddings</a>. The model produces simultaneously UPOS, FEATS and XPOS (MULTEXT-East) labels. The estimated F1 of the XPOS annotations is ~96.19.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["srp"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1831"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-ud-dep-bul.json b/lexical-resources/language-models/classla-stanza-ud-dep-bul.json
new file mode 100644
index 0000000..3c9c3a7
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-ud-dep-bul.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for UD dependency parsing of standard Bulgarian 2.1",
+      "URL": "http://hdl.handle.net/11356/1851",
+      "Family": "Language Models",
+      "Description": "The model for UD dependency parsing of standard Bulgarian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the UD-parsed portion of the <a href=\"https://clarino.uib.no/korpuskel/corpora\">BulTreeBank training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1796\">CLARIN.SI-embed.bg word embeddings</a>. The estimated LAS of the parser is ~91.18.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["bul"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1851"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-ud-dep-hrv.json b/lexical-resources/language-models/classla-stanza-ud-dep-hrv.json
new file mode 100644
index 0000000..50cb01c
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-ud-dep-hrv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for UD dependency parsing of standard Croatian 2.1",
+      "URL": "http://hdl.handle.net/11356/1836",
+      "Family": "Language Models",
+      "Description": "The model for UD dependency parsing of standard Croatian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the UD-parsed portion of the <a href=\"http://hdl.handle.net/11356/1792\">hr500k training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1790\">CLARIN.SI-embed.hr word embeddings</a>.The estimated LAS of the parser is ~87.46.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["hrv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1836"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-ud-dep-slv.json b/lexical-resources/language-models/classla-stanza-ud-dep-slv.json
new file mode 100644
index 0000000..18aa345
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-ud-dep-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for UD dependency parsing of standard Slovenian 2.0",
+      "URL": "http://hdl.handle.net/11356/1769",
+      "Family": "Language Models",
+      "Description": "The model for UD dependency parsing of standard Slovenian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1747\">SUK training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1204\">CLARIN.SI-embed.sl word embeddings</a> expanded with the <a href=\"http://hdl.handle.net/11356/1517\">MaCoCu-sl Slovene web corpus</a>. The estimated LAS of the parser is ~91.11.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1769"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/classla-stanza-ud-dep-srp.json b/lexical-resources/language-models/classla-stanza-ud-dep-srp.json
new file mode 100644
index 0000000..94e43f4
--- /dev/null
+++ b/lexical-resources/language-models/classla-stanza-ud-dep-srp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "The CLASSLA-Stanza model for UD dependency parsing of standard Serbian 2.1",
+      "URL": "http://hdl.handle.net/11356/1835",
+      "Family": "Language Models",
+      "Description": "The model for UD dependency parsing of standard Serbian was built with the <a href=\"https://github.com/clarinsi/classla\">CLASSLA-Stanza tool</a> by training on the <a href=\"http://hdl.handle.net/11356/1200\">SETimes.SR training corpus</a> and using the <a href=\"http://hdl.handle.net/11356/1789\">CLARIN.SI-embed.sr word embeddings</a>.The estimated LAS of the parser is ~89.83.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["srp"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1835"
+	},
+      "Publication": "Ljubešić and Dobrovoljc (2019)"
+}
diff --git a/lexical-resources/language-models/cnec-nametag.json b/lexical-resources/language-models/cnec-nametag.json
new file mode 100644
index 0000000..571dca7
--- /dev/null
+++ b/lexical-resources/language-models/cnec-nametag.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Czech Models (CNEC) for NameTag",
+      "URL": "http://hdl.handle.net/11858/00-097C-0000-0023-7D42-8",
+      "Family": "Language Models",
+      "Description": "These are models for the named entity recognizer <a href=\"http://hdl.handle.net/11858/00-097C-0000-0023-43CE-E\">NameTag</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["ces"],
+      "Licence": "CC BY-NC-SA 3.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11858/00-097C-0000-0023-7D42-8"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/commonsense-reason.json b/lexical-resources/language-models/commonsense-reason.json
new file mode 100644
index 0000000..8504205
--- /dev/null
+++ b/lexical-resources/language-models/commonsense-reason.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Model weights for a study of commonsense reasoning",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-4869-B",
+      "Family": "Language Models",
+      "Description": "This resource contains model weights for five Transformer-based models: <a href=\"https://huggingface.co/docs/transformers/model_doc/roberta\">roBERTa</a>, <a href=\"https://huggingface.co/docs/transformers/model_doc/gpt2\">GPT-2</a>, <a href=\"https://huggingface.co/docs/transformers/model_doc/t5\">T5</a>, <a href=\"https://huggingface.co/docs/transformers/model_doc/bart\">BART</a> and <a href=\"https://aclanthology.org/P19-1470/\">COMET</a>.These models were implemented using <a href=\"https://huggingface.co/\">HuggingFace</a>, and fine-tuned on the following four commonsense reasoning tasks: Argument Reasoning Comprehension Task (ARCT), AI2 Reasoning Challenge (ARC), Physical Interaction Question Answering (PIQA) and CommonsenseQA (CSQA).\nThe models are available for download form the PORTULAN repository.",
+      "Language": ["eng"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["commonsense reasoning"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "https://hdl.handle.net/21.11129/0000-000F-4869-B"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/conll-2017-shared.json b/lexical-resources/language-models/conll-2017-shared.json
new file mode 100644
index 0000000..b834207
--- /dev/null
+++ b/lexical-resources/language-models/conll-2017-shared.json
@@ -0,0 +1,16 @@
+{
+      "Name": "CoNLL 2017 Shared Task - UDPipe Baseline Models and Supplementary Materials",
+      "URL": "http://hdl.handle.net/11234/1-1990",
+      "Family": "Language Models",
+      "Description": "These are models for the dependency parser <a href=\"http://hdl.handle.net/11234/1-1702\">UDPipe</a>, developed as part of the <a href=\"http://universaldependencies.org/conll17/\">CoNLL 2017 Shared Task in UD Parsing</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["Multiple languages"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-1990"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/conll-2018-shared.json b/lexical-resources/language-models/conll-2018-shared.json
new file mode 100644
index 0000000..a6e3749
--- /dev/null
+++ b/lexical-resources/language-models/conll-2018-shared.json
@@ -0,0 +1,16 @@
+{
+      "Name": "CoNLL 2018 Shared Task - UDPipe Baseline Models and Supplementary Materials",
+      "URL": "http://hdl.handle.net/11234/1-2859",
+      "Family": "Language Models",
+      "Description": "This is a baseline model for <a href=\"http://hdl.handle.net/11234/1-1702\">UDPipe</a> (version 1.2 and up), created for the <a href=\"https://universaldependencies.org/conll18/\">CoNLL 2018 Shared Task in UD Parsing</a>. The models were trained using a custom data split for treebanks where no development data is provided.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["Multiple languages"],
+      "Licence": "License Universal Dependencies v2.2",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-2859"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/conll-nametag.json b/lexical-resources/language-models/conll-nametag.json
new file mode 100644
index 0000000..2255042
--- /dev/null
+++ b/lexical-resources/language-models/conll-nametag.json
@@ -0,0 +1,16 @@
+{
+      "Name": "English Model (CoNLL-2003) for NameTag",
+      "URL": "http://hdl.handle.net/11234/1-3118",
+      "Family": "Language Models",
+      "Description": "This is an English model for <a href=\"http://hdl.handle.net/11858/00-097C-0000-0023-43CE-E\">NameTag</a>, a named entity recognition tool. The model is trained on CoNLL-2003 training data and recognizes PER, ORG, LOC and MISC named entities. It achieves an F-measure 84.73 on the CoNLL-2003 test data.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["eng"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3118"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/crosloengual-bert.json b/lexical-resources/language-models/crosloengual-bert.json
new file mode 100644
index 0000000..1e10455
--- /dev/null
+++ b/lexical-resources/language-models/crosloengual-bert.json
@@ -0,0 +1,16 @@
+{
+      "Name": "CroSloEngual BERT 1.1",
+      "URL": "http://hdl.handle.net/11356/1330",
+      "Family": "Language Models",
+      "Description": "Trilingual BERT (Bidirectional Encoder Representations from Transformers) model, trained on Croatian, Slovenian, and English data. State of the art tool representing words/tokens as contextually dependent word embeddings, used for various NLP classification tasks by finetuning the model end-to-end. CroSloEngual BERT are neural network weights and configuration files in pytorch format (i.e. to be used with pytorch library).\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["hrv", "eng", "slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["word embeddings"],
+      "Infrastructure": "CLARIN",
+      "Group": "Contextual Word Embeddings",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1330"
+	},
+      "Publication": "Ulčar and Robnik-Šikonja (2020)"
+}
diff --git a/lexical-resources/language-models/cubbitt-en-cs.json b/lexical-resources/language-models/cubbitt-en-cs.json
new file mode 100644
index 0000000..d962ea0
--- /dev/null
+++ b/lexical-resources/language-models/cubbitt-en-cs.json
@@ -0,0 +1,16 @@
+{
+      "Name": "CUBBITT Translation Models (en-cs) (v1.0)",
+      "URL": "http://hdl.handle.net/11234/1-3733",
+      "Family": "Language Models",
+      "Description": "These English-Czech translation models are used by the <a href=\"https://lindat.mff.cuni.cz/services/translation/\"Lindat translation service</a>.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["ces", "eng"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3733"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/cubbitt-en-fr.json b/lexical-resources/language-models/cubbitt-en-fr.json
new file mode 100644
index 0000000..0f24dda
--- /dev/null
+++ b/lexical-resources/language-models/cubbitt-en-fr.json
@@ -0,0 +1,16 @@
+{
+      "Name": "CUBBITT Translation Models (en-fr) (v1.0)",
+      "URL": "http://hdl.handle.net/11234/1-3743",
+      "Family": "Language Models",
+      "Description": "These are <a href=\"https://ufal.mff.cuni.cz/cubbitt\">CUBBITT</a> English-French translation models available in the LINDAT <a href=\"https://lindat.mff.cuni.cz/services/translation/\">translation service</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["eng", "fra"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3743"
+	},
+      "Publication": "Popel et al. (2020)"
+}
diff --git a/lexical-resources/language-models/cubbitt-en-pl.json b/lexical-resources/language-models/cubbitt-en-pl.json
new file mode 100644
index 0000000..32283f1
--- /dev/null
+++ b/lexical-resources/language-models/cubbitt-en-pl.json
@@ -0,0 +1,16 @@
+{
+      "Name": "CUBBITT Translation Models (en-pl) (v1.0)",
+      "URL": "http://hdl.handle.net/11234/1-3742",
+      "Family": "Language Models",
+      "Description": "These are <a href=\"https://ufal.mff.cuni.cz/cubbitt\">CUBBITT</a> English-Polish translation models available in the LINDAT <a href=\"https://lindat.mff.cuni.cz/services/translation/\">translation service</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["eng", "pol"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3742"
+	},
+      "Publication": "Popel et al. (2020)"
+}
diff --git a/lexical-resources/language-models/czech-neural-monkeys.json b/lexical-resources/language-models/czech-neural-monkeys.json
new file mode 100644
index 0000000..37f42f9
--- /dev/null
+++ b/lexical-resources/language-models/czech-neural-monkeys.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Czech image captioning, machine translation, sentiment analysis and summarization (Neural Monkey models)",
+      "URL": "http://hdl.handle.net/11234/1-3145",
+      "Family": "Language Models",
+      "Description": "These models are for the <a href=\"https://ufal.mff.cuni.cz/neuralmonkey\">Neural Monkey</a> toolkit for Czech and English, solving four NLP tasks: machine translation, image captioning, sentiment analysis, and summarization. The models are trained on standard datasets and achieve state-of-the-art or near state-of-the-art performance in the tasks. The same models can also be invoked via <a href=\"https://ufal.mff.cuni.cz/grants/lsd\">an online demo</a>.\nThis entry also includes models for automatic news summarization for Czech and English. The Czech models were trained using the <a href=\"http://hdl.handle.net/11234/1-2615\">SumeCzech dataset</a>, while the English models were trained using the <a href=\"https://paperswithcode.com/dataset/cnn-daily-mail-1\">CNN-Daily Mail</a> corpus, using the standard recurrent sequence-to-sequence architecture.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["ces", "eng"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3145"
+	},
+      "Publication": "Libovicky et al. (2018)"
+}
diff --git a/lexical-resources/language-models/dep-parsing-pol.json b/lexical-resources/language-models/dep-parsing-pol.json
new file mode 100644
index 0000000..df8e56d
--- /dev/null
+++ b/lexical-resources/language-models/dep-parsing-pol.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Dependency parsing models for Polish",
+      "URL": "http://hdl.handle.net/11321/552",
+      "Family": "Language Models",
+      "Description": "These models are trained on the <a href=\"http://hdl.handle.net/11234/1-2621\">3.5 version of the Polish Dependency Treebank</a> with the publicly available parsing systems: <a href=\"https://www.maltparser.org/\">MaltParser</a>, <a href=\"https://github.com/kuhumcst/mate-parser\">MateParser</a>, and <a href=\"http://hdl.handle.net/11234/1-1702\">UDPipe</a>.\nThe models are available for download from the CLARIN-PL repository.",
+      "Language": ["pol"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://zil.ipipan.waw.pl/PDB/PDBparser"
+	},
+      "Publication": "Wroblewska and Rybak (2019)"
+}
diff --git a/lexical-resources/language-models/dep-parsing-stanza.json b/lexical-resources/language-models/dep-parsing-stanza.json
new file mode 100644
index 0000000..85e93ba
--- /dev/null
+++ b/lexical-resources/language-models/dep-parsing-stanza.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Dependency parsing model: Stanza",
+      "URL": "https://spraakbanken.gu.se/index.php/en/resources/stanzasynt",
+      "Family": "Language Models",
+      "Description": "",
+      "Language": ["swe"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "https://spraakbanken.gu.se/index.php/en/resources/stanzasynt"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/elmo-embeddings.json b/lexical-resources/language-models/elmo-embeddings.json
new file mode 100644
index 0000000..32e269d
--- /dev/null
+++ b/lexical-resources/language-models/elmo-embeddings.json
@@ -0,0 +1,16 @@
+{
+      "Name": "ELMo embeddings models for seven languages",
+      "URL": "http://hdl.handle.net/11356/1277",
+      "Family": "Language Models",
+      "Description": "This model is used to produce contextual word embeddings. It is trained on large monolingual corpora for 7 languages. Each language's model was trained for approximately 10 epochs. Corpora sizes used in training range from over 270 M tokens in Latvian to almost 2 B tokens in Croatian. About 1 million most common tokens were provided as vocabulary during the training for each language model. The model can also infer OOV words, since the neural network input is on the character level.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["hrv", "est", "fin", "lav", "lit", "slv", "swe"],
+      "Licence": "Apache License 2.0",
+      "Size": [],
+      "Annotation": ["word embeddings"],
+      "Infrastructure": "CLARIN",
+      "Group": "Contextual Word Embeddings",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1277"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/embeddings-eng-wiki.json b/lexical-resources/language-models/embeddings-eng-wiki.json
new file mode 100644
index 0000000..93e58e0
--- /dev/null
+++ b/lexical-resources/language-models/embeddings-eng-wiki.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Word Embeddings trained on English Wikipedia",
+      "URL": "https://spraakbanken.gu.se/en/resources/wikipedia-embeddings",
+      "Family": "Language Models",
+      "Description": "This is a set of contextual word embeddings.\nThe models are available for download from the Swedish Language Bank.",
+      "Language": ["swe"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["word embeddings"],
+      "Infrastructure": "CLARIN",
+      "Group": "Contextual Word Embeddings",
+      "Access": {
+	"Download": "https://spraakbanken.gu.se/en/resources/wikipedia-embeddings"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/eng-mod-morphodita.json b/lexical-resources/language-models/eng-mod-morphodita.json
new file mode 100644
index 0000000..1d76494
--- /dev/null
+++ b/lexical-resources/language-models/eng-mod-morphodita.json
@@ -0,0 +1,16 @@
+{
+      "Name": "English Models (Morphium + WSJ) for MorphoDiTa",
+      "URL": "http://hdl.handle.net/11858/00-097C-0000-0023-68D9-0",
+      "Family": "Language Models",
+      "Description": "These models are for <a href=\"http://hdl.handle.net/11858/00-097C-0000-0023-43CD-0\">MorphoDiTa</a>, which performs morphological analysis, morphological generation and part-of-speech tagging (see also the <a href=\"https://www.clarin.eu/resource-families/tools-part-speech-tagging-and-lemmatisation\">PoS-taggers and lemmatizers Resource Family</a>).\nThe morphological dictionary is created from Morphium and <a href=\"http://wordlist.aspell.net/\">SCOWL</a> (Spell Checker Oriented Word Lists), the PoS tagger is trained on the Wall Street Journal.",
+      "Language": ["eng"],
+      "Licence": "CC BY-NC-SA 3.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11858/00-097C-0000-0023-68D9-0"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/face-domain-specific.json b/lexical-resources/language-models/face-domain-specific.json
new file mode 100644
index 0000000..3c38e20
--- /dev/null
+++ b/lexical-resources/language-models/face-domain-specific.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Face-domain-specific automatic speech recognition models",
+      "URL": "http://hdl.handle.net/11356/1749",
+      "Family": "Language Models",
+      "Description": "This model contains all the files required to implement face-domain-specific automatic speech recognition (ASR) applications using the <a href=\"https://github.com/kaldi-asr/kaldi\">Kaldi ASR toolkit</a>, including the acoustic model, language model, and other relevant files. It also includes all the scripts and configuration files needed to use these models for implementing face-domain-specific automatic speech recognition.\nThe acoustic model was trained using the relevant Kaldi ASR tools and the Artur speech corpus (<a href=\"http://hdl.handle.net/11356/1776\">audio</a>,<a href=\"http://hdl.handle.net/11356/1772\">transcriptions</a>). The language model was trained using the domain-specific text data involving face descriptions obtained by translating the <a href=\"https://github.com/mtanti/face2text-dataset\"> Face2Text English dataset </a> into the Slovenian language. These models, combined with other necessary files like the HCLG.fst and decoding scripts, enable the implementation of face-domain-specific ASR applications.\nThis resource is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "Apache License 2.0",
+      "Size": [],
+      "Annotation": ["face-domain-specific automatic speech recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1749"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/finbert.json b/lexical-resources/language-models/finbert.json
new file mode 100644
index 0000000..a278eef
--- /dev/null
+++ b/lexical-resources/language-models/finbert.json
@@ -0,0 +1,16 @@
+{
+      "Name": "FinBERT",
+      "URL": "http://urn.fi/urn:nbn:fi:lb-202004212",
+      "Family": "Language Models",
+      "Description": "This <a href=\"https://github.com/google-research/bert\">BERT</a> model can be fine-tuned to achieve state-of-the-art results for various Finnish natural language processing tasks.\nThe model is available for download from the Language Bank of Finland.",
+      "Language": ["fin"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://urn.fi/urn:nbn:fi:lb-202004212"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/frenk-mmc-rtv.json b/lexical-resources/language-models/frenk-mmc-rtv.json
new file mode 100644
index 0000000..02cd118
--- /dev/null
+++ b/lexical-resources/language-models/frenk-mmc-rtv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Dataset and baseline model of moderated content FRENK-MMC-RTV 1.0",
+      "URL": "http://hdl.handle.net/11356/1201",
+      "Family": "Language Models",
+      "Description": "FRENK-MMC-RTV is a dataset of moderated newspaper comments from the website <a href=\"https://www.rtvslo.si/\">rtvslo.si</a> with metadata on the time of publishing, user identifier, thread identifier and whether the comment was deleted by the moderators or not. The full text of each comment is encrypted via a character-replacement method so that the comments are not readable by humans. Basic punctuation is not encrypted in order to enable tokenization. The main use of this dataset are experiments on automating comment moderation. For real-world usage, a fastText classification model trained on non-encrypted data is made available as well.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1201"
+	},
+      "Publication": "Ljubešić et al. (2018)"
+}
diff --git a/lexical-resources/language-models/g2p-icelandic.json b/lexical-resources/language-models/g2p-icelandic.json
new file mode 100644
index 0000000..cdda283
--- /dev/null
+++ b/lexical-resources/language-models/g2p-icelandic.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Models for automatic g2p for Icelandic (20.10)",
+      "URL": "http://hdl.handle.net/20.500.12537/84",
+      "Family": "Language Models",
+      "Description": "These are grapheme-to-phoneme models for Icelandic, trained on an encoder-decoder <a href=\"https://en.wikipedia.org/wiki/Long_short-term_memory\">LSTM</a> neural network. The models are delivered with scripts for automatic transcription of Icelandic in the standard pronunciation variation, in the northern variation, north-east variation, and the south variation. To run the scripts the user needs to install <a href=\"https://github.com/grammatek/g2p-lstm\">Fairseq</a>.",
+      "Language": ["isl"],
+      "Licence": "Apache License 2.0",
+      "Size": [],
+      "Annotation": ["phonemic transcription"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/20.500.12537/84"
+	},
+      "Publication": "Gorman et al. (2020)"
+}
diff --git a/lexical-resources/language-models/gervasio-pt-br-base.json b/lexical-resources/language-models/gervasio-pt-br-base.json
new file mode 100644
index 0000000..80ccccc
--- /dev/null
+++ b/lexical-resources/language-models/gervasio-pt-br-base.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Gervásio PT-BR base",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-FF48-2 ",
+      "Family": "Language Models",
+      "Description": "This model, which is for Portuguese spoken in Brazil, is a decoder of the GPT family that is based on the neural architecture Transformer and developed over the <a href=\"https://www.eleuther.ai/papers-blog/pythia-a-suite-for-analyzing-large-language-modelsacross-training-and-scaling\">Pythia model</a>.\nThe model is available for download from Hugging Face.",
+      "Language": ["por"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://hdl.handle.net/21.11129/0000-000F-FF48-2 "
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/gervasio-pt-pt-base.json b/lexical-resources/language-models/gervasio-pt-pt-base.json
new file mode 100644
index 0000000..1cced0c
--- /dev/null
+++ b/lexical-resources/language-models/gervasio-pt-pt-base.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Gervásio PT-PT base",
+      "URL": "https://hdl.handle.net/21.11129/0000-000F-FF47-3",
+      "Family": "Language Models",
+      "Description": "This model, which is for European Portuguese, is a decoder of the GPT family that is based on the neural architecture Transformer and developed over the <a href=\"https://www.eleuther.ai/papers-blog/pythia-a-suite-for-analyzing-large-language-modelsacross-training-and-scaling\">Pythia model</a>.\nThe model is available for download from Hugging Face.",
+      "Language": ["por"],
+      "Licence": "MIT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://huggingface.co/PORTULAN/gervasio-ptpt-base"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/greynir-mbart.json b/lexical-resources/language-models/greynir-mbart.json
new file mode 100644
index 0000000..f44ef43
--- /dev/null
+++ b/lexical-resources/language-models/greynir-mbart.json
@@ -0,0 +1,16 @@
+{
+      "Name": "GreynirTranslate - mBART25 NMT (with layer drop) models for Translations between Icelandic and English (1.0)",
+      "URL": "http://hdl.handle.net/20.500.12537/128",
+      "Family": "Language Models",
+      "Description": "These are a variant of <a href=\"http://hdl.handle.net/20.500.12537/125\">GreynirTranslate - mBART25 NMT models for Translations between Icelandic and English (1.0)</a>, trained with a 40% layer drop. They are suitable for inference using every other layer for optimized inference speed with lower translation performance.\nThese models are available for download from the repository of CLARIN-IS.",
+      "Language": ["isl", "eng"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/20.500.12537/128"
+	},
+      "Publication": "Simonarson et al. (2021)"
+}
diff --git a/lexical-resources/language-models/greynir-t2t.json b/lexical-resources/language-models/greynir-t2t.json
new file mode 100644
index 0000000..c5572f5
--- /dev/null
+++ b/lexical-resources/language-models/greynir-t2t.json
@@ -0,0 +1,16 @@
+{
+      "Name": "GreynirT2T Serving - En--Is NMT Inference and Pre-trained Models (1.0)",
+      "URL": "http://hdl.handle.net/20.500.12537/72",
+      "Family": "Language Models",
+      "Description": "This CLARIN-IS repository entry includes code and models required to run the <a href=\"https://github.com/mideind/GreynirT2T\">GreynirT2T Transformer NMT</a> system for translation between English and Icelandic.\nThe models along with the code are available for download from the CLARIN-IS repository.",
+      "Language": ["eng", "isl"],
+      "Licence": "The MIT License",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/20.500.12537/72"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/korektor-czech.json b/lexical-resources/language-models/korektor-czech.json
new file mode 100644
index 0000000..f51e7b8
--- /dev/null
+++ b/lexical-resources/language-models/korektor-czech.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Czech Models for Korektor 2",
+      "URL": "http://hdl.handle.net/11234/1-1460",
+      "Family": "Language Models",
+      "Description": "These models are for the statistical spellchecker <a href=\"https://ufal.mff.cuni.cz/korektor\">Korektor 2</a>. The models can either perform spellchecking and grammar-checking, or only generate diacritical marks.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["ces"],
+      "Licence": "CC BY-NC-SA 3.0",
+      "Size": [],
+      "Annotation": ["normalization"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-1460"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/lemma-stanza.json b/lexical-resources/language-models/lemma-stanza.json
new file mode 100644
index 0000000..c03eea2
--- /dev/null
+++ b/lexical-resources/language-models/lemma-stanza.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Lemmatization model: Stanza",
+      "URL": "https://spraakbanken.gu.se/index.php/en/resources/stanzalem",
+      "Family": "Language Models",
+      "Description": "This model enables lemmatisation of Swedish text following the SUC3 standard.\nThe models are available for download from the Swedish Language Bank.",
+      "Language": ["swe"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Lemmatisation",
+      "Access": {
+	"Download": "https://spraakbanken.gu.se/index.php/en/resources/stanzalem"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/liner-events.json b/lexical-resources/language-models/liner-events.json
new file mode 100644
index 0000000..d15ff6c
--- /dev/null
+++ b/lexical-resources/language-models/liner-events.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Liner2.5 model Events",
+      "URL": "http://hdl.handle.net/11321/301",
+      "Family": "Language Models",
+      "Description": "This is a model for the <a href=\"https://github.com/CLARIN-PL/Liner2\">Liner2.5</a> tool for the recognition of event mentions.\nThe model is available for download from the CLARIN-PL repository.",
+      "Language": ["pol"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["event mentions"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/11321/301"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/liner-ner-nkjp.json b/lexical-resources/language-models/liner-ner-nkjp.json
new file mode 100644
index 0000000..5b388ac
--- /dev/null
+++ b/lexical-resources/language-models/liner-ner-nkjp.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Liner2.6 model NER NKJP",
+      "URL": "http://hdl.handle.net/11321/598",
+      "Family": "Language Models",
+      "Description": "This is a <a href=\"https://github.com/CLARIN-PL/Liner2\">Liner2</a> model for the recognition of named entities. The model was trained on the <a href=\"http://nkjp.pl/\">NKJP corpus</a> and evaluated in the <a href=\"http://poleval.pl/tasks/\">PolEval 2018 Task 2</a>.\nThe model is available for download from the CLARIN-PL repository.",
+      "Language": ["pol"],
+      "Licence": "GNU GPL3",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11321/598"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/liner-ner.json b/lexical-resources/language-models/liner-ner.json
new file mode 100644
index 0000000..b768c17
--- /dev/null
+++ b/lexical-resources/language-models/liner-ner.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Liner2.5 model NER",
+      "URL": "http://hdl.handle.net/11321/263",
+      "Family": "Language Models",
+      "Description": "This is a model for the <a href=\"https://github.com/CLARIN-PL/Liner2\">Liner 2.5</a> tool. #SEPThe model is available for download from the CLARIN-PL repository.",
+      "Language": ["pol"],
+      "Licence": "GNU LGPL 3.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11321/263"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/liner-timex.json b/lexical-resources/language-models/liner-timex.json
new file mode 100644
index 0000000..d63cbf3
--- /dev/null
+++ b/lexical-resources/language-models/liner-timex.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Liner2.5 model Timex",
+      "URL": "http://hdl.handle.net/11321/302",
+      "Family": "Language Models",
+      "Description": "This is a model for the <a href=\"https://github.com/CLARIN-PL/Liner2\">Liner2.5</a> tool for the recognition and normalization on temporal expressions.#SEPThe model is available for download from the CLARIN-PL repository.",
+      "Language": ["pol"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["temporal expressions"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/11321/302"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/liner.json b/lexical-resources/language-models/liner.json
new file mode 100644
index 0000000..76d6946
--- /dev/null
+++ b/lexical-resources/language-models/liner.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Liner2.5 model Minos",
+      "URL": "http://hdl.handle.net/11321/292",
+      "Family": "Language Models",
+      "Description": "This is a model for the <a href=\"https://github.com/CLARIN-PL/Liner2\">Liner2.5</a> tool for the recognition of verbs without explicit subjects.\nThe model is available for download from the CLARIN-PL repository.",
+      "Language": ["pol"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11321/292"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/litlatbert.json b/lexical-resources/language-models/litlatbert.json
new file mode 100644
index 0000000..f894798
--- /dev/null
+++ b/lexical-resources/language-models/litlatbert.json
@@ -0,0 +1,16 @@
+{
+      "Name": "LitLat BERT",
+      "URL": "http://hdl.handle.net/20.500.11821/42",
+      "Family": "Language Models",
+      "Description": "",
+      "Language": ["lit", "lav", "eng"],
+      "Licence": "PUB CLARIN-LT",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "http://hdl.handle.net/20.500.11821/42"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/lvbert.json b/lexical-resources/language-models/lvbert.json
new file mode 100644
index 0000000..a7d09ef
--- /dev/null
+++ b/lexical-resources/language-models/lvbert.json
@@ -0,0 +1,16 @@
+{
+      "Name": "LVBERT - Latvian BERT",
+      "URL": "http://hdl.handle.net/20.500.12574/43",
+      "Family": "Language Models",
+      "Description": "This model is trained on the original implementation of <a href=\"https://github.com/google-research/bert\">BERT</a> on the <a href=\"https://www.tensorflow.org/\">TensorFlow</a> machine-learning platform with the whole-word masking and the next sentence prediction objectives. This uses the BERT configuration with 12 layers, 768 hidden units, 12 heads, 128 sequence length, 128 mini-batch size and a 32,000 token vocabulary.\nTHe model is available for download from the CLARIN-LV repository.",
+      "Language": ["lav"],
+      "Licence": "GNU GPL3",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "http://hdl.handle.net/20.500.12574/43"
+	},
+      "Publication": "Znotinš and Barzdinš (2020)"
+}
diff --git a/lexical-resources/language-models/lx-dsemvectors.json b/lexical-resources/language-models/lx-dsemvectors.json
new file mode 100644
index 0000000..76b96e8
--- /dev/null
+++ b/lexical-resources/language-models/lx-dsemvectors.json
@@ -0,0 +1,16 @@
+{
+      "Name": "LX-DSemVectors",
+      "URL": "https://hdl.handle.net/21.11129/0000-000B-D38A-B",
+      "Family": "Language Models",
+      "Description": "This model represents tokens as contextual  word embeddings for Portuguese. It was trained on a corpus of 2 billion tokens and achieved state-of-the-art results on multiple lexical semantic tasks.\nThe model is available for download from the PORTULAN repository.",
+      "Language": ["por"],
+      "Licence": "CC-BY",
+      "Size": [],
+      "Annotation": ["word embeddings"],
+      "Infrastructure": "CLARIN",
+      "Group": "Contextual Word Embeddings",
+      "Access": {
+	"Download": "https://hdl.handle.net/21.11129/0000-000B-D38A-B"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/mcsq-tm-en-de.json b/lexical-resources/language-models/mcsq-tm-en-de.json
new file mode 100644
index 0000000..e7457c2
--- /dev/null
+++ b/lexical-resources/language-models/mcsq-tm-en-de.json
@@ -0,0 +1,16 @@
+{
+      "Name": "MCSQ Translation Models (en-de) (v1.0)",
+      "URL": "http://hdl.handle.net/11234/1-4680",
+      "Family": "Language Models",
+      "Description": "These are English-German translation models available in the LINDAT <a href=\"https://lindat.mff.cuni.cz/services/translation/\">translation service</a>. The models are trained using the MCSQ social surveys dataset (available <a href=\"https://repo.clarino.uib.no/xmlui/bitstream/handle/11509/142/mcsq_v3.zip\">here</a> ).\nThe models are available for download from the LINDAT repository.",
+      "Language": ["eng", "deu"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-4680"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/mcsq-tm-en-ru.json b/lexical-resources/language-models/mcsq-tm-en-ru.json
new file mode 100644
index 0000000..4a5e41e
--- /dev/null
+++ b/lexical-resources/language-models/mcsq-tm-en-ru.json
@@ -0,0 +1,16 @@
+{
+      "Name": "MCSQ Translation Models (en-ru) (v1.0)",
+      "URL": "http://hdl.handle.net/11234/1-4681",
+      "Family": "Language Models",
+      "Description": "These are English-Russian translation models available in the LINDAT <a href=\"https://lindat.mff.cuni.cz/services/translation/\">translation service</a>. The models are trained using the MCSQ social surveys dataset (available <a href=\"https://repo.clarino.uib.no/xmlui/bitstream/handle/11509/142/mcsq_v3.zip\">here</a> ).\nThe models are available for download from the LINDAT repository.",
+      "Language": ["eng", "rus"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-4681"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/morflex-cz-161115.json b/lexical-resources/language-models/morflex-cz-161115.json
new file mode 100644
index 0000000..da7c653
--- /dev/null
+++ b/lexical-resources/language-models/morflex-cz-161115.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Czech Models (MorfFlex CZ 161115 + PDT 3.0) for MorphoDiTa 161115",
+      "URL": "http://hdl.handle.net/11234/1-1836",
+      "Family": "Language Models",
+      "Description": "These models were developed for <a href=\"http://hdl.handle.net/11858/00-097C-0000-0023-43CD-0\">MorphoDiTa</a>, which performs morphological analysis, morphological generation and part-of-speech tagging (see also the <a href=\"https://www.clarin.eu/resource-families/tools-part-speech-tagging-and-lemmatisation\">PoS-taggers and lemmatizers Resource Family</a>). The morphological dictionary is created from the <a href=\"http://hdl.handle.net/11234/1-1834\">161115 version of the MorfFlex CZ lexicon</a> and the <a href=\"http://hdl.handle.net/11234/1-1807\">1.2 version of the DeriNet lexical network</a>. The PoS tagger is trained on <a href=\"http://hdl.handle.net/11858/00-097C-0000-0023-1AAF-3\">Prague Dependency Treebank 3.0</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["ces"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-1836"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/nametag2.json b/lexical-resources/language-models/nametag2.json
new file mode 100644
index 0000000..0d937bc
--- /dev/null
+++ b/lexical-resources/language-models/nametag2.json
@@ -0,0 +1,16 @@
+{
+      "Name": "NameTag 2 Models",
+      "URL": "http://hdl.handle.net/11234/1-3773",
+      "Family": "Language Models",
+      "Description": "These models are for <a href=\"http://hdl.handle.net/11234/1-3633\">NameTag 2</a>, a named entity recognition tool (see also the Named Entity Recognizers Resource Family). The documentation is available separately on <a href=\"https://ufal.mff.cuni.cz/nametag/2/models\">the project webpage</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["ces", "nld", "eng", "deu", "spa"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3773"
+	},
+      "Publication": "Straková et al. (2019)"
+}
diff --git a/lexical-resources/language-models/por-roberta.json b/lexical-resources/language-models/por-roberta.json
new file mode 100644
index 0000000..e9fab43
--- /dev/null
+++ b/lexical-resources/language-models/por-roberta.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Portuguese RoBERTa language model",
+      "URL": "https://hdl.handle.net/21.11129/0000-000E-631E-2",
+      "Family": "Language Models",
+      "Description": "This is a pre-trained <a href=\"https://huggingface.co/docs/transformers/model_doc/roberta\">roBERTa</a> model in Portuguese, with 6 layers and 12 attention-heads, totaling 68M parameters. Pre-training was done on 10 million Portuguese sentences and 10 million English sentences from the <a href=\"https://oscar-corpus.com/\">OSCAR corpus</a>.\nThe model is available for download from the PORUTLAN repository.",
+      "Language": ["por"],
+      "Licence": "CC-BY",
+      "Size": [],
+      "Annotation": ["Baseline"],
+      "Infrastructure": "CLARIN",
+      "Group": "Baseline",
+      "Access": {
+	"Download": "https://hdl.handle.net/21.11129/0000-000E-631E-2"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/pos-lemma-ces.json b/lexical-resources/language-models/pos-lemma-ces.json
new file mode 100644
index 0000000..50e254b
--- /dev/null
+++ b/lexical-resources/language-models/pos-lemma-ces.json
@@ -0,0 +1,16 @@
+{
+      "Name": "POS Tagging and Lemmatization (Czech model)",
+      "URL": "http://hdl.handle.net/11234/1-4613",
+      "Family": "Language Models",
+      "Description": "This model is trained using <a href=\"https://arxiv.org/abs/2105.11314\">RobeCzech</a>, which is the Czech version of BERT. The model is trained on the <a href=\"http://hdl.handle.net/11234/1-2621\">Prague Dependency Treebank 3.5</a>.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["ces"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax and lemmatisation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-4613"
+	},
+      "Publication": "Vysušilová (2021)"
+}
diff --git a/lexical-resources/language-models/pos-tag-flair.json b/lexical-resources/language-models/pos-tag-flair.json
new file mode 100644
index 0000000..84f58ef
--- /dev/null
+++ b/lexical-resources/language-models/pos-tag-flair.json
@@ -0,0 +1,16 @@
+{
+      "Name": "POS-tagging model: Flair",
+      "URL": "https://spraakbanken.gu.se/index.php/en/resources/flair",
+      "Family": "Language Models",
+      "Description": "This is a set of 2 models. <i>flair_eval</i> is trained on <a href=\"https://spraakbanken.gu.se/en/resources/suc3\">SUC3</a> with <a href=\"https://spraakbanken.gu.se/en/resources/talbanken\">Talbanken_SBX_dev</a>as dev set. The advantage of this model is that it can be evaluated, using Talbanken_SBX_test or <a href=\"https://spraakbanken.gu.se/en/resources/sic2\">SIC2</a>. <i>flair_full</i> is trained on SUC3, Talbanken_SBX_test, SIC2 with Talbanken_SBX_dev as dev set.\nThe models are available for download from the Swedish Language Bank.",
+      "Language": ["swe"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "https://spraakbanken.gu.se/index.php/en/resources/flair"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/pos-tag-marmot.json b/lexical-resources/language-models/pos-tag-marmot.json
new file mode 100644
index 0000000..a337915
--- /dev/null
+++ b/lexical-resources/language-models/pos-tag-marmot.json
@@ -0,0 +1,16 @@
+{
+      "Name": "POS-tagging model: Marmot",
+      "URL": "https://spraakbanken.gu.se/index.php/en/resources/marmot",
+      "Family": "Language Models",
+      "Description": "This is a set of 2 models. <i>marmot_eval</i> is trained on <a href=\"https://spraakbanken.gu.se/en/resources/suc3\">SUC3</a> and the <a href=\"https://spraakbanken.gu.se/en/resources/talbanken\">Talbanken_SBX_dev</a> treebank, using <a href=\"https://spraakbanken.gu.se/en/resources/saldo\">Saldo</a> as dictionary. <i>marmot_full</i> is trained on <a href=\"https://spraakbanken.gu.se/en/resources/suc3\">SUC3</a>, the <a href=\"https://spraakbanken.gu.se/en/resources/talbanken\">Talbanken_SBX_dev</a> treebank, and SIC2 (with Saldo as dictionary).\nThe models are available for download from the Swedish Language Bank.",
+      "Language": ["swe"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "https://spraakbanken.gu.se/index.php/en/resources/marmot"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/pos-tag-stanza.json b/lexical-resources/language-models/pos-tag-stanza.json
new file mode 100644
index 0000000..681186b
--- /dev/null
+++ b/lexical-resources/language-models/pos-tag-stanza.json
@@ -0,0 +1,16 @@
+{
+      "Name": "POS-tagging model: Stanza",
+      "URL": "https://spraakbanken.gu.se/index.php/en/resources/stanzamorph",
+      "Family": "Language Models",
+      "Description": "This is a set of 2 models. <i>stanza_eval</i> is trained on <a href=\"https://spraakbanken.gu.se/en/resources/suc3\">SUC3</a> and the <a href=\"https://spraakbanken.gu.se/en/resources/talbanken\">Talbanken_SBX_dev</a> treebank. <i>stanza_full</i> is trained on the SUC3, Talbanken_SBX_test, and SIC2 sets, with Talbanken_SBX_dev as dev set.\nThe models are available for download from the Swedish Language Bank.",
+      "Language": ["swe"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "https://spraakbanken.gu.se/index.php/en/resources/stanzamorph"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/pytorch-sloner.json b/lexical-resources/language-models/pytorch-sloner.json
new file mode 100644
index 0000000..b26cf69
--- /dev/null
+++ b/lexical-resources/language-models/pytorch-sloner.json
@@ -0,0 +1,16 @@
+{
+      "Name": "PyTorch model for Slovenian Named Entity Recognition SloNER 1.0",
+      "URL": "http://hdl.handle.net/11356/1758",
+      "Family": "Language Models",
+      "Description": "This is a model for Slovenian Named Entity Recognition. It is is a PyTorch neural network model, intended for usage with the <a href=\"https://github.com/huggingface/transformers\">HuggingFace transformers library</a> .\nThe model is based on the Slovenian RoBERTa contextual embeddings model <a href=\"http://hdl.handle.net/11356/1397\">SloBERTa 2.0</a>. The model was trained on the <a href=\"http://hdl.handle.net/11356/1747\">SUK 1.0 training corpus</a>.The source code of the model is available on <a href=\"https://github.com/clarinsi/SloNER\">GitHub repository</a>.\nThe model is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["named entity recognition"],
+      "Infrastructure": "CLARIN",
+      "Group": "Named Entity Recognition",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1758"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/pytorch-slv.json b/lexical-resources/language-models/pytorch-slv.json
new file mode 100644
index 0000000..0d6862b
--- /dev/null
+++ b/lexical-resources/language-models/pytorch-slv.json
@@ -0,0 +1,16 @@
+{
+      "Name": "PyTorch model for Slovenian Coreference Resolution",
+      "URL": "http://hdl.handle.net/11356/1773",
+      "Family": "Language Models",
+      "Description": "This is a Slovenian model for coreference resolution: a neural network based on a customized transformer architecture, usable with <a href=\"https://github.com/matejklemen/slovene-coreference-resolution\">this code</a>. The model is based on the <a href=\"http://hdl.handle.net/11356/1330\">Slovenian CroSloEngual BERT 1.1 model</a>. It was trained on the <a href=\"http://hdl.handle.net/11356/1747\">SUK 1.0 training corpus</a>, specifically the SentiCoref subcorpus.\nThis resource is available for download from the CLARIN.SI repository.",
+      "Language": ["slv"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["coreference resolution"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/11356/1773"
+	},
+      "Publication": "Klemen & Žitnik (2022)"
+}
diff --git a/lexical-resources/language-models/ruv-di.json b/lexical-resources/language-models/ruv-di.json
new file mode 100644
index 0000000..039ab31
--- /dev/null
+++ b/lexical-resources/language-models/ruv-di.json
@@ -0,0 +1,16 @@
+{
+      "Name": "RÚV-DI Speaker Diarization v5 models (21.05)",
+      "URL": "http://hdl.handle.net/20.500.12537/109",
+      "Family": "Language Models",
+      "Description": "These models are trained on the <a href=\"https://clarin.is/en/resources/althingisgognin/\">Althingi Parliamentary Speech</a> corpus hosted by CLARIN-IS.  The models use MFCCS, x-vectors, PLDA and AHC\nThe models are available for download from the CLARIN-IS repository.",
+      "Language": ["isl"],
+      "Licence": "CC BY 4.0",
+      "Size": [],
+      "Annotation": ["diarization"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/20.500.12537/109"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/sentiment-czech.json b/lexical-resources/language-models/sentiment-czech.json
new file mode 100644
index 0000000..466099f
--- /dev/null
+++ b/lexical-resources/language-models/sentiment-czech.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Sentiment Analysis (Czech Model)",
+      "URL": "http://hdl.handle.net/11234/1-4601",
+      "Family": "Language Models",
+      "Description": "These models are trained on data from the following sources: Mall (product reviews), CSFD (movie reviews), and Facebook, and joint data from all three datasets above (data available <a href=\"http://liks.fav.zcu.cz/sentiment/\">here</a>, using <a href=\"https://arxiv.org/abs/2105.11314\">RobeCzech</a>, which is the Czech version of BERT.",
+      "Language": ["ces"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["sentiment analysis"],
+      "Infrastructure": "CLARIN",
+      "Group": "Other",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-4601"
+	},
+      "Publication": "Vysušilová (2021)"
+}
diff --git a/lexical-resources/language-models/slavic-forest.json b/lexical-resources/language-models/slavic-forest.json
new file mode 100644
index 0000000..b119d27
--- /dev/null
+++ b/lexical-resources/language-models/slavic-forest.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Slavic Forest, Norwegian Wood (models)",
+      "URL": "http://hdl.handle.net/11234/1-1971",
+      "Family": "Language Models",
+      "Description": "These are models for the dependency parser <a href=\"http://hdl.handle.net/11234/1-1702\">UDPipe</a> used to produce the authors' final submission to the <a href=\"https://bitbucket.org/hy-crossNLP/vardial2017\">Vardial 2017 CLP shared task</a>. The scripts and commands used to create the models are part of a <a href=\"http://hdl.handle.net/11234/1-1970\">separate LINDAT repository entry</a>. The models were trained with <a href=\"https://github.com/ufal/udpipe\">UDPipe version 3e65d69 from 3 January 2017</a>; their functionality with newer or older versions of UDPipe is not guaranteed.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["hrv", "nor", "slk"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-1971"
+	},
+      "Publication": "Rosa et al. (2017)"
+}
diff --git a/lexical-resources/language-models/slk-morphodita.json b/lexical-resources/language-models/slk-morphodita.json
new file mode 100644
index 0000000..2805d79
--- /dev/null
+++ b/lexical-resources/language-models/slk-morphodita.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Slovak MorphoDiTa Models 170914",
+      "URL": "http://hdl.handle.net/11234/1-3278",
+      "Family": "Language Models",
+      "Description": "These are Slovak models for <a href=\"http://hdl.handle.net/11858/00-097C-0000-0023-43CD-0\">MorphoDiTa</a>, a tool which provides morphological analysis, morphological generation and part-of-speech tagging. The morphological dictionary is created from <a href=\"http://hdl.handle.net/11234/1-3186\">MorfFlex</a> (SK 170914) and the PoS tagger is trained on automatic translations in <a href=\"http://hdl.handle.net/11858/00-097C-0000-0023-1AAF-3\">Prague Dependency Treebank 3.0</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["slk"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["morphosyntax"],
+      "Infrastructure": "CLARIN",
+      "Group": "Morphosyntax",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3278"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/sloberta.json b/lexical-resources/language-models/sloberta.json
new file mode 100644
index 0000000..9986aac
--- /dev/null
+++ b/lexical-resources/language-models/sloberta.json
@@ -0,0 +1,15 @@
+{
+      "Name": "Slovenian RoBERTa contextual embeddings model: SloBERTa 2.0",
+      "URL": "http://hdl.handle.net/11356/1397",
+      "Family": "Language Models",
+      "Description": "The monolingual Slovene RoBERTa (A Robustly Optimized Bidirectional Encoder Representations from Transformers) model is a state-of-the-art model representing words/tokens as contextually dependent word embeddings, used for various NLP tasks. Word embeddings can be extracted for every word occurrence and then used in training a model for an end task, but typically the whole RoBERTa model is fine-tuned end-to-end.",
+      "Language": ["slv"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["word embeddings"],
+      "Infrastructure": "CLARIN",
+      "Group": "Contextual Word Embeddings",
+      "Access": {
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/trans-models-en-de.json b/lexical-resources/language-models/trans-models-en-de.json
new file mode 100644
index 0000000..c1e8992
--- /dev/null
+++ b/lexical-resources/language-models/trans-models-en-de.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Translation Models (English-German)",
+      "URL": "http://hdl.handle.net/11234/1-3732",
+      "Family": "Language Models",
+      "Description": "These English-German translation models are used by the <a href=\"https://lindat.mff.cuni.cz/services/translation/\">Lindat translation service</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["eng", "deu"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3732"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/trans-models-en-ru.json b/lexical-resources/language-models/trans-models-en-ru.json
new file mode 100644
index 0000000..a15b46c
--- /dev/null
+++ b/lexical-resources/language-models/trans-models-en-ru.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Translation Models (en-ru) (v1.0)",
+      "URL": "http://hdl.handle.net/11234/1-3744",
+      "Family": "Language Models",
+      "Description": "These are <a href=\"https://ufal.mff.cuni.cz/cubbitt\">CUBBITT</a> English-Russiantranslation models available in the LINDAT <a href=\"https://lindat.mff.cuni.cz/services/translation/\">translation service</a>.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["eng", "rus"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3744"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/ud-parsito-models.json b/lexical-resources/language-models/ud-parsito-models.json
new file mode 100644
index 0000000..904e790
--- /dev/null
+++ b/lexical-resources/language-models/ud-parsito-models.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Universal Dependencies 1.2 Models for Parsito",
+      "URL": "http://hdl.handle.net/11234/1-1573",
+      "Family": "Language Models",
+      "Description": "These are models for the dependency parser <a href=\"https://ufal.mff.cuni.cz/parsito\">Parsito</a>. They are trained on <a href=\"http://hdl.handle.net/11234/1-1548\">Universal Dependencies 1.2 Treebanks</a>.",
+      "Language": ["eng"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-1573"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/udify-pre.json b/lexical-resources/language-models/udify-pre.json
new file mode 100644
index 0000000..eb75389
--- /dev/null
+++ b/lexical-resources/language-models/udify-pre.json
@@ -0,0 +1,16 @@
+{
+      "Name": "UDify Pretrained Model",
+      "URL": "http://hdl.handle.net/11234/1-3042",
+      "Family": "Language Models",
+      "Description": "UDify is a single model that parses Universal Dependencies (UPOS, UFeats, Lemmas, Deps) jointly, accepting any of 75 supported languages as input (trained on UD v2.3 with 124 treebanks). ",
+      "Language": ["afr", "akk", "amh", "grc", "ara", "hye", "bam", "eus", "bel", "bre", "bul", "cat", "zho", "Church Slavonic", "cop", "hrv", "ces", "dan", "nld", "eng", "myv", "est", "fao", "fin", "fra", "glg", "deu", "got", "heb", "hin", "hun", "ind", "gle", "ita", "jpn", "kaz", "kpv", "kor", "lat", "lav", "lit", "mlt", "mar", "ell", "pcm", "kmr", "sme", "nor", "fro", "fas", "pol", "por", "ron", "Buryat", "rus", "san", "srp", "slk", "slv", "spa", "swe", "swl", "tgl", "tam", "tel", "tha", "tur", "uig", "ukr", "hsb", "urd", "vie", "wbp", "yor", "yue"],
+      "Licence": "CC BY-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3042"
+	},
+      "Publication": "Kondratyuk and Straka (2019)"
+}
diff --git a/lexical-resources/language-models/udpipe-models.json b/lexical-resources/language-models/udpipe-models.json
new file mode 100644
index 0000000..2162849
--- /dev/null
+++ b/lexical-resources/language-models/udpipe-models.json
@@ -0,0 +1,16 @@
+{
+      "Name": "Universal Dependencies 2.5 Models for UDPipe",
+      "URL": "http://hdl.handle.net/11234/1-3131",
+      "Family": "Language Models",
+      "Description": "These models  are for the <a href=\"http://hdl.handle.net/11234/1-3105\">Universal Dependencies 2.5</a> treebanks (94 treebanks of 61 languages). In addition to dependency parsing, the models are also for toeknisation, part-of-speech tagging and lemmatisation.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["afr", "grc", "ara", "hye", "eus", "bel", "bul", "cat", "zho", "Church Slavonic", "cop", "hrv", "ces", "dan", "nld", "eng", "est", "fin", "fra", "glg", "wof", "deu", "got", "heb", "hin", "hun", "ind", "gle", "ita", "jpn", "kaz", "kor", "lat", "lav", "lzh", "lit", "mlt", "mar", "ell", "sme", "nob", "nno", "fro", "orv", "fas", "pol", "por", "ron", "rus", "san", "gla", "srp", "slk", "slv", "spa", "swe", "tam", "tel", "tur", "uig", "ukr", "urd", "vie", "wol"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["syntactic parsing"],
+      "Infrastructure": "CLARIN",
+      "Group": "Syntactic Parsing",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3131"
+	},
+      "Publication": ""
+}
diff --git a/lexical-resources/language-models/wmt-ca-oc-multi.json b/lexical-resources/language-models/wmt-ca-oc-multi.json
new file mode 100644
index 0000000..f94e7d2
--- /dev/null
+++ b/lexical-resources/language-models/wmt-ca-oc-multi.json
@@ -0,0 +1,16 @@
+{
+      "Name": "WMT21 Marian translation model (ca-oc multi-task)",
+      "URL": "http://hdl.handle.net/11234/1-3772",
+      "Family": "Language Models",
+      "Description": "This is a neural machine translation model for Catalan to Occitan translation. It is a multi-task model, also producing  phonemic transcription of the Catalan source. The model was submitted to <a href=\"https://www.statmt.org/wmt21/multilingualHeritage-translation-task.html\">WMT21 Multilingual Low-Resource Translation for Indo-European Languages Shared Task</a> as a CUNI-Contrastive system for Catalan to Occitan.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["cat", "oci"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3772"
+	},
+      "Publication": "Jon et al. (2021)"
+}
diff --git a/lexical-resources/language-models/wmt-ca-oc.json b/lexical-resources/language-models/wmt-ca-oc.json
new file mode 100644
index 0000000..6074262
--- /dev/null
+++ b/lexical-resources/language-models/wmt-ca-oc.json
@@ -0,0 +1,16 @@
+{
+      "Name": "WMT21 Marian translation model (ca-oc)",
+      "URL": "http://hdl.handle.net/11234/1-3770",
+      "Family": "Language Models",
+      "Description": "This is a neural machine translation model for Catalan to Occitan translation and constitutes the primary CUNI submission for <a href=\"https://www.statmt.org/wmt21/multilingualHeritage-translation-task.html\">WMT21 Multilingual Low-Resource Translation for Indo-European Languages Shared Task</a>.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["cat", "oci"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3770"
+	},
+      "Publication": "Jon et al. (2021)"
+}
diff --git a/lexical-resources/language-models/wmt-ca-ro-it.json b/lexical-resources/language-models/wmt-ca-ro-it.json
new file mode 100644
index 0000000..29104df
--- /dev/null
+++ b/lexical-resources/language-models/wmt-ca-ro-it.json
@@ -0,0 +1,16 @@
+{
+      "Name": "WMT21 Marian translation models (ca-ro,it,oc)",
+      "URL": "http://hdl.handle.net/11234/1-3769",
+      "Family": "Language Models",
+      "Description": "This is a translation model from Catalan into Romanian, Italian, and Occitan that was part of the submission for <a href=\"https://www.statmt.org/wmt21/multilingualHeritage-translation-task.html\">WMT21 Multilingual Low-Resource Translation for Indo-European Languages Shared Task</a>.\nThe model is available for download from the LINDAT repository.",
+      "Language": ["cat", "ita", "oci", "ron"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11234/1-3769"
+	},
+      "Publication": "Jon et al. (2021)"
+}
diff --git a/lexical-resources/language-models/wmt-tuning-cs-en.json b/lexical-resources/language-models/wmt-tuning-cs-en.json
new file mode 100644
index 0000000..2513825
--- /dev/null
+++ b/lexical-resources/language-models/wmt-tuning-cs-en.json
@@ -0,0 +1,16 @@
+{
+      "Name": "WMT16 Tuning Shared Task Models (Czech-to-English)",
+      "URL": "http://hdl.handle.net/11372/LRT-1671",
+      "Family": "Language Models",
+      "Description": "These Czech to English translation models are trained on the <a href=\"http://ufal.mff.cuni.cz/czeng/czeng16pre\">parallel CzEng 1.6 corpus</a>. The data is tokenized with <a href=\"https://pypi.org/project/mosestokenizer/\">Moses</a>). Alignment is done using <a href=\"https://github.com/clab/fast_align\">fast_align</a> and the standard Moses pipeline is used for training.\nThe models are available for download from the LINDAT repository.",
+      "Language": ["ces", "eng"],
+      "Licence": "CC BY-NC-SA 4.0",
+      "Size": [],
+      "Annotation": ["machine translation"],
+      "Infrastructure": "CLARIN",
+      "Group": "Machine Translation",
+      "Access": {
+	"Download": "http://hdl.handle.net/11372/LRT-1671"
+	},
+      "Publication": ""
+}