diff --git a/.gitignore b/.gitignore index 99f939b..cb8f36c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ afwiki-latest-pages-articles.xml node_modules dump.rdb .DS_STORE +tmp.txt diff --git a/package-lock.json b/package-lock.json index e30ac50..6bdf726 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1065,9 +1065,9 @@ "dev": true }, "wtf_wikipedia": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/wtf_wikipedia/-/wtf_wikipedia-6.2.0.tgz", - "integrity": "sha512-flTL95xVC7myhDA5TaLXmxg9CMkzjpEMUUmOnALoaSZIzGPfWECGVt/l/NlmB87SDfvn+QvBxGqaZEBLnkf9rA==", + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/wtf_wikipedia/-/wtf_wikipedia-6.2.1.tgz", + "integrity": "sha512-ABlngbgO/SAKaIcd5CvSbiUY+ICW5XqFxgNQpfDHChbtmpCOgIpOb9K6q3jT1gXU1BoXxNWFMB+8vN2p+uu12g==", "requires": { "cross-fetch": "2.2.3" } diff --git a/package.json b/package.json index 922daf2..d8c5fb6 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,7 @@ "prettysize": "1.1.0", "sunday-driver": "1.0.1", "worker-nodes": "1.6.1", - "wtf_wikipedia": "6.2.0", + "wtf_wikipedia": "6.2.1", "yargs": "12.0.2" }, "devDependencies": { diff --git a/scripts/missing-templates.js b/scripts/missing-templates.js new file mode 100644 index 0000000..1c15bd5 --- /dev/null +++ b/scripts/missing-templates.js @@ -0,0 +1,37 @@ +//first, run dumpster with {} piped to a text file +// npm run watch > ./tmp.txt +const sundayDriver = require('sunday-driver'); + +let templates = {} + +let obj = { + file: './tmp.txt', + splitter: '\n', + each: (line, resume) => { + if (line[0] === ':') { + templates[line] = templates[line] || 0 + templates[line] += 1 + } + resume() + } +} + +let p = sundayDriver(obj); +p.then(() => { + let keys = Object.keys(templates) + keys = keys.sort((a, b) => { + if (templates[a] > templates[b]) { + return -1 + } + return 1 + }) + let show = keys.slice(0, 1000) + show.forEach((key) => { + let k = key.replace('\n', '') + k = k.replace('::', '') + k = k.replace(/^ +/, '') + k = k.replace(/ /g, '_') + let link = `* [${k}](https://en.wikipedia.org/wiki/Template:${k})` + console.log(link + '\t - ' + templates[key]) + }) +}) diff --git a/src/worker/02-parseWiki.js b/src/worker/02-parseWiki.js index 0272f62..92f6ffd 100644 --- a/src/worker/02-parseWiki.js +++ b/src/worker/02-parseWiki.js @@ -16,7 +16,7 @@ const escapeXML = function(str) { const parseWiki = function(page, options, worker) { try { page.wiki = escapeXML(page.wiki || ''); - let doc = wtf(page.wiki); + let doc = wtf(page.wiki, options); //dont insert this if it's a redirect if (options.skip_redirects === true && doc.isRedirect()) { worker.counts.redirects += 1