Merge pull request #41 from zazuko/lang-string

Filtering language tagged strings
zazuko · Jul 9, 2020 · 9d6d07b · 9d6d07b
2 parents 4a5ff35 + 1458822
commit 9d6d07b
Show file tree

Hide file tree

Showing 12 changed files with 386 additions and 17 deletions.
diff --git a/docs/_sidebar.md b/docs/_sidebar.md
@@ -7,6 +7,7 @@
   * [Manipulating data](manipulation.md)
   * [Working with named graphs](named-graphs.md)
   * [RDF Lists](rdf-lists.md)
+  * [Tagged literals](tagged-literals.md)
 * Reference
   * [JSDoc](api.md)
   * [TypeScript](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/master/types/clownface)
diff --git a/docs/api.md b/docs/api.md
@@ -51,7 +51,7 @@ A graph pointer object, which points at 0..N nodes within a dataset
     * [.literal(values, [languageOrDatatype])](#Clownface+literal) ⇒ [<code>Clownface</code>](#Clownface)
     * [.namedNode(values)](#Clownface+namedNode) ⇒ [<code>Clownface</code>](#Clownface)
     * [.in(predicates)](#Clownface+in) ⇒ [<code>Clownface</code>](#Clownface)
-    * [.out(predicates)](#Clownface+out) ⇒ [<code>Clownface</code>](#Clownface)
+    * [.out(predicates, [options])](#Clownface+out) ⇒ [<code>Clownface</code>](#Clownface)
     * [.has(predicates, [objects])](#Clownface+has) ⇒ [<code>Clownface</code>](#Clownface)
     * [.addIn(predicates, subjects, [callback])](#Clownface+addIn) ⇒ [<code>Clownface</code>](#Clownface)
     * [.addOut(predicates, objects, [callback])](#Clownface+addOut) ⇒ [<code>Clownface</code>](#Clownface)
@@ -279,7 +279,7 @@ Creates a graph pointer to nodes which are linked to the current pointer by `pre
 
 <a name="Clownface+out"></a>
 
-### clownface.out(predicates) ⇒ [<code>Clownface</code>](#Clownface)
+### clownface.out(predicates, [options]) ⇒ [<code>Clownface</code>](#Clownface)
 Creates a graph pointer to nodes which link the current pointer by `predicates`
 
 **Kind**: instance method of [<code>Clownface</code>](#Clownface)  
@@ -293,6 +293,10 @@ Creates a graph pointer to nodes which link the current pointer by `predicates`
 <tr>
     <td>predicates</td><td><code>Term</code> | <code>Array.&lt;Term&gt;</code> | <code><a href="#Clownface">Clownface</a></code> | <code><a href="#Clownface">Array.&lt;Clownface&gt;</a></code></td><td><p>one or more RDF/JS term identifying a property</p>
 </td>
+    </tr><tr>
+    <td>[options]</td><td><code>object</code></td><td></td>
+    </tr><tr>
+    <td>[options.language]</td><td><code>string</code> | <code>Array.&lt;string&gt;</code> | <code>undefined</code></td><td></td>
     </tr>  </tbody>
 </table>
 

diff --git a/docs/tagged-literals.md b/docs/tagged-literals.md
@@ -0,0 +1,135 @@
+# Literals with language tags
+
+Using the `.out()` method it is possible to only find literals in specific languages by passing a second `{ language }` parameter to the method.
+
+When that parameter is defined, only string literal nodes will be returned.
+
+For any given subject, all strings in the chosen language will be returned.
+
+## Finding specific language
+
+To find string literal in a given language, pass a second object argument with a string `language` key.
+
+<run-kit>
+
+```js
+const cf = require('clownface')
+const RDF = require('@rdfjs/dataset')
+const { literal } = require('@rdfjs/data-model')
+const { rdf, rdfs } = require('@tpluscode/rdf-ns-builders')
+
+// create two labels for a resource
+const apple = cf({ dataset: RDF.dataset() })
+  .node(rdf.Resource)
+  .addOut(rdfs.label, literal('apple', 'en'))
+  .addOut(rdfs.label, literal('Apfel', 'de'))
+
+// find German label
+apple.out(rdfs.label, { language: 'de' }).value
+```
+
+</run-kit>
+
+## Finding plain literals
+
+Using an empty string for the `language` parameter will find strings without a language.
+
+<run-kit>
+
+```js
+const cf = require('clownface')
+const RDF = require('@rdfjs/dataset')
+const { literal } = require('@rdfjs/data-model')
+const { rdf, rdfs } = require('@tpluscode/rdf-ns-builders')
+
+// create two labels for a resource
+const apple = cf({ dataset: RDF.dataset() })
+  .node(rdf.Resource)
+  .addOut(rdfs.label, literal('apple'))
+  .addOut(rdfs.label, literal('Apfel', 'de'))
+
+// find literal without language tag
+apple.out(rdfs.label, { language: '' }).value
+```
+
+</run-kit>
+
+## Finding from a choice of potential languages
+
+It is possible to look up the literals in multiple alternatives byt providing an array of languages instead. The first language which gets matched to the literals will be used.
+
+<run-kit>
+
+```js
+const cf = require('clownface')
+const RDF = require('@rdfjs/dataset')
+const { literal } = require('@rdfjs/data-model')
+const { rdf, rdfs } = require('@tpluscode/rdf-ns-builders')
+
+// create two labels for a resource
+const apple = cf({ dataset: RDF.dataset() })
+  .node(rdf.Resource)
+  .addOut(rdfs.label, literal('apple', 'en'))
+  .addOut(rdfs.label, literal('Apfel', 'de'))
+
+// there is no French translation so English will be returned
+apple.out(rdfs.label, { language: ['fr', 'en'] }).value
+```
+
+</run-kit>
+
+A wildcard (asterisk) can also be used to choose any other (random) literal if the preceding choices did not yield any results. It would look similarly to previous example.
+
+```js
+apple.out(rdfs.label, { language: ['fr', '*'] }).value
+```
+
+!> The result can be either English or German with equal probability.
+
+## Matching subtags
+
+In specific cases [subtags](https://tools.ietf.org/html/bcp47#section-2.2), such as `de-CH` can be matched to a given language. By analogy, it is also possible to find a subtag of any length by applying a "starts with" match.
+
+For example, in the snippet below the more specific subtag `de-CH-1996` will indeed be matched to the more general Swiss German `de-CH`
+
+<run-kit>
+
+```js
+const cf = require('clownface')
+const RDF = require('@rdfjs/dataset')
+const { literal } = require('@rdfjs/data-model')
+const { rdf, rdfs } = require('@tpluscode/rdf-ns-builders')
+
+// create two labels for a resource
+const bicycle = cf({ dataset: RDF.dataset() })
+  .node(rdf.Resource)
+  .addOut(rdfs.label, literal('Fahrrad', 'de'))
+  .addOut(rdfs.label, literal('Velo', 'de-CH-1996'))
+
+// finds a Swiss translation
+bicycle.out(rdfs.label, { language: 'de-CH' }).value
+```
+
+</run-kit>
+
+!> However, any exact match will always take precedence before the subtag match
+
+<run-kit>
+
+```js
+const cf = require('clownface')
+const RDF = require('@rdfjs/dataset')
+const { literal } = require('@rdfjs/data-model')
+const { rdf, rdfs } = require('@tpluscode/rdf-ns-builders')
+
+// create two labels for a resource
+const bicycle = cf({ dataset: RDF.dataset() })
+  .node(rdf.Resource)
+  .addOut(rdfs.label, literal('Fahrrad', 'de'))
+  .addOut(rdfs.label, literal('Velo', 'de-CH-1996'))
+
+// finds the standard German label
+bicycle.out(rdfs.label, { language: 'de' }).value
+```
+
+</run-kit>
diff --git a/lib/Clownface.js b/lib/Clownface.js
@@ -262,12 +262,14 @@ class Clownface {
   /**
    * Creates a graph pointer to nodes which link the current pointer by `predicates`
    * @param {Term|Term[]|Clownface|Clownface[]} predicates one or more RDF/JS term identifying a property
+   * @param {object} [options]
+   * @param {string | string[] | undefined} [options.language]
    * @returns {Clownface}
    */
-  out (predicates) {
+  out (predicates, options = {}) {
     predicates = this._toTermArray(predicates)
 
-    const context = this._context.reduce((all, current) => all.concat(current.out(predicates)), [])
+    const context = this._context.reduce((all, current) => all.concat(current.out(predicates, options)), [])
 
     return Clownface.fromContext(context)
   }

diff --git a/lib/Context.js b/lib/Context.js
@@ -1,6 +1,7 @@
 const inArray = require('./inArray')
 const term = require('./term')
 const toArray = require('./toArray')
+const { createLanguageMapper } = require('../lib/languageTag')
 
 class Context {
   constructor ({ dataset, graph, value, factory, namespace }) {
@@ -27,9 +28,18 @@ class Context {
     })
   }
 
-  out (predicate) {
-    return this.matchProperty(toArray(this.term), predicate, null, toArray(this.graph), 'object').map(subject => {
-      return this.clone({ value: subject })
+  out (predicate, { language }) {
+    let objects = this.matchProperty(toArray(this.term), predicate, null, toArray(this.graph), 'object')
+
+    if (typeof language !== 'undefined') {
+      const languages = (typeof language === 'string' ? [language] : language)
+      const getLiteralsForLanguage = createLanguageMapper(objects)
+
+      objects = languages.map(getLiteralsForLanguage).find(Boolean) || []
+    }
+
+    return objects.map(object => {
+      return this.clone({ value: object })
     })
   }
 

diff --git a/lib/fromPrimitive.js b/lib/fromPrimitive.js
@@ -1,7 +1,7 @@
 const rdf = require('@rdfjs/data-model')
-const namespace = require('@rdfjs/namespace')
+const namespace = require('./namespace')
 
-const xsd = namespace('http://www.w3.org/2001/XMLSchema#')
+const { xsd } = namespace(rdf)
 
 function booleanToLiteral (value, factory = rdf) {
   if (typeof value !== 'boolean') {

diff --git a/lib/languageTag.js b/lib/languageTag.js
@@ -0,0 +1,47 @@
+const RDF = require('@rdfjs/data-model')
+const namespace = require('./namespace')
+
+const ns = namespace(RDF)
+
+function mapLiteralsByLanguage (map, current) {
+  const notLiteral = current.termType !== 'Literal'
+  const notStringLiteral = ns.langString.equals(current.datatype) || ns.xsd.string.equals(current.datatype)
+
+  if (notLiteral || !notStringLiteral) return map
+
+  const language = current.language.toLowerCase()
+
+  if (map.has(language)) {
+    map.get(language).push(current)
+  } else {
+    map.set(language, [current])
+  }
+
+  return map
+}
+
+function createLanguageMapper (objects) {
+  const literalsByLanguage = objects.reduce(mapLiteralsByLanguage, new Map())
+  const langMapEntries = [...literalsByLanguage.entries()]
+
+  return language => {
+    const languageLowerCase = language.toLowerCase()
+
+    if (languageLowerCase === '*') {
+      return langMapEntries[0] && langMapEntries[0][1]
+    }
+
+    const exactMatch = literalsByLanguage.get(languageLowerCase)
+    if (exactMatch) {
+      return exactMatch
+    }
+
+    const secondaryMatches = langMapEntries.find(([entryLanguage]) => entryLanguage.startsWith(languageLowerCase))
+
+    return secondaryMatches && secondaryMatches[1]
+  }
+}
+
+module.exports = {
+  createLanguageMapper
+}
diff --git a/lib/namespace.js b/lib/namespace.js
@@ -1,8 +1,16 @@
+const namespace = require('@rdfjs/namespace')
 
-const ns = (factory) => ({
-  first: factory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'),
-  nil: factory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'),
-  rest: factory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
-})
+const ns = (factory) => {
+  const xsd = namespace('http://www.w3.org/2001/XMLSchema#', { factory })
+  const rdf = namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#', { factory })
+
+  return {
+    first: rdf.first,
+    nil: rdf.nil,
+    rest: rdf.rest,
+    langString: rdf.langString,
+    xsd
+  }
+}
 
 module.exports = ns
diff --git a/package.json b/package.json
@@ -30,6 +30,8 @@
   },
   "devDependencies": {
     "@rdfjs/parser-n3": "^1.1.2",
+    "@tpluscode/rdf-ns-builders": "^0.3.6",
+    "@tpluscode/rdf-string": "^0.2.15",
     "docsify-cli": "^4.4.0",
     "husky": "^4.2.5",
     "jsdoc-to-markdown": "^5.0.3",
@@ -39,6 +41,7 @@
     "rdf-ext": "^1.3.0",
     "rimraf": "^3.0.2",
     "standard": "^12.0.1",
+    "string-to-stream": "^3.0.1",
     "tbbt-ld": "^1.1.0"
   },
   "nyc": {