diff --git a/lib/DOMParser.js b/lib/DOMParser.js index 7df0391..fb8ad8b 100644 --- a/lib/DOMParser.js +++ b/lib/DOMParser.js @@ -3,7 +3,7 @@ const htmlparser = require("htmlparser2"); const DOMImplementation = require('./DOMImplementation'); const HTMLElement = require('./HTMLElement'); -const domTreeToW3C = (document, { type, name, attribs, children = [], data }) => { +const objTreeToW3C = (document, { type, name, attribs, children = [], data }) => { const nodeTypeMapping = { tag: () => document.createElement(name), text: () => document.createTextNode(data), @@ -19,7 +19,7 @@ const domTreeToW3C = (document, { type, name, attribs, children = [], data }) = const node = nodeTypeMapping[type](arguments); for (let child of children) { - const el = domTreeToW3C(document, child); + const el = objTreeToW3C(document, child); node.appendChild(el); } @@ -42,10 +42,15 @@ class DOMParser extends XMLDOMParser { if (/\/x?html?$/.test(mimeType)) { const impl = new DOMImplementation(); const document = impl.createHTMLDocument(); + + // We're going to replace initial scaffold while (document.lastChild) { document.removeChild(document.lastChild); } + + // Actually parse html string let tree; + const handler = new htmlparser.DomHandler(function (error, dom) { if (error) { throw error; @@ -53,9 +58,45 @@ class DOMParser extends XMLDOMParser { tree = dom; }); const parser = new htmlparser.Parser(handler); + parser.write(source); parser.end(); - tree.forEach(el => document.appendChild(domTreeToW3C(document, el))); + + // Wrap tree into proper scaffold if missing + let root = tree.find(node => node.name === 'html'); + + if (!root) { + root = { + type: 'tag', + name: 'html', + attribs: {}, + children: tree + }; + } + + tree = [ root ]; + + const body = root.children.find(node => node.name === 'body'); + + if (!body) { + root.children = [{ + type: 'tag', + name: 'body', + attribs: {}, + children: root.children + }]; + } + + // Transform to w3c-compliant object + const fragment = document.createDocumentFragment(); + + tree.forEach(node => { + const w3cNode = objTreeToW3C(document, node); + + fragment.appendChild(w3cNode); + }); + document.appendChild(fragment); + return document; } } diff --git a/package-lock.json b/package-lock.json index 0f4d2ea..30f120c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,9 +1,27 @@ { "name": "htmldom2", - "version": "0.0.4", + "version": "0.0.5", "lockfileVersion": 1, "requires": true, "dependencies": { + "@types/node": { + "version": "10.12.18", + "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.18.tgz", + "integrity": "sha512-fh+pAqt4xRzPfqA6eh3Z2y6fyZavRIumvjhaCL753+TVkGKGhpPeyrJG2JftD0T9q4GF00KjefsQ+PQNDdWQaQ==", + "dev": true + }, + "@types/semver": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/@types/semver/-/semver-5.5.0.tgz", + "integrity": "sha512-41qEJgBH/TWgo5NFSvBCJ1qkoi3Q6ONSF2avrHq1LVEZfYpdHmj0y9SuTK+u9ZhG1sYQKBL1AWXKyLWP4RaUoQ==", + "dev": true + }, + "abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==", + "dev": true + }, "balanced-match": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", @@ -38,6 +56,27 @@ "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=", "dev": true }, + "condense-newlines": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/condense-newlines/-/condense-newlines-0.2.1.tgz", + "integrity": "sha1-PemFVTE5R10yUCyDsC9gaE0kxV8=", + "dev": true, + "requires": { + "extend-shallow": "^2.0.1", + "is-whitespace": "^0.3.0", + "kind-of": "^3.0.2" + } + }, + "config-chain": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/config-chain/-/config-chain-1.1.12.tgz", + "integrity": "sha512-a1eOIcu8+7lUInge4Rpf/n4Krkf3Dd9lqhljRzII1/Zno/kRtUWnznPO3jOKBmTEktkt3fkxisUcivoj0ebzoA==", + "dev": true, + "requires": { + "ini": "^1.3.4", + "proto-list": "~1.2.1" + } + }, "core-util-is": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", @@ -96,6 +135,28 @@ "domelementtype": "1" } }, + "editorconfig": { + "version": "0.15.2", + "resolved": "https://registry.npmjs.org/editorconfig/-/editorconfig-0.15.2.tgz", + "integrity": "sha512-GWjSI19PVJAM9IZRGOS+YKI8LN+/sjkSjNyvxL5ucqP9/IqtYNXBaQ/6c/hkPNYQHyOHra2KoXZI/JVpuqwmcQ==", + "dev": true, + "requires": { + "@types/node": "^10.11.7", + "@types/semver": "^5.5.0", + "commander": "^2.19.0", + "lru-cache": "^4.1.3", + "semver": "^5.6.0", + "sigmund": "^1.0.1" + }, + "dependencies": { + "commander": { + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.19.0.tgz", + "integrity": "sha512-6tvAOO+D6OENvRAh524Dh9jcfKTYDQAqvqezbCW82xj5X0pSrcpxtvRKHLG0yBY6SD7PSDrJaj+0AiOcKVd1Xg==", + "dev": true + } + } + }, "entities": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.1.tgz", @@ -107,6 +168,15 @@ "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=", "dev": true }, + "extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha1-Ua99YUrZqfYQ6huvu5idaxxWiQ8=", + "dev": true, + "requires": { + "is-extendable": "^0.1.0" + } + }, "fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", @@ -173,11 +243,83 @@ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" }, + "ini": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.5.tgz", + "integrity": "sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==", + "dev": true + }, + "is-buffer": { + "version": "1.1.6", + "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz", + "integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==", + "dev": true + }, + "is-extendable": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz", + "integrity": "sha1-YrEQ4omkcUGOPsNqYX1HLjAd/Ik=", + "dev": true + }, + "is-whitespace": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/is-whitespace/-/is-whitespace-0.3.0.tgz", + "integrity": "sha1-Fjnssb4DauxppUy7QBz77XEUq38=", + "dev": true + }, "isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" }, + "js-beautify": { + "version": "1.8.9", + "resolved": "https://registry.npmjs.org/js-beautify/-/js-beautify-1.8.9.tgz", + "integrity": "sha512-MwPmLywK9RSX0SPsUJjN7i+RQY9w/yC17Lbrq9ViEefpLRgqAR2BgrMN2AbifkUuhDV8tRauLhLda/9+bE0YQA==", + "dev": true, + "requires": { + "config-chain": "^1.1.12", + "editorconfig": "^0.15.2", + "glob": "^7.1.3", + "mkdirp": "~0.5.0", + "nopt": "~4.0.1" + }, + "dependencies": { + "glob": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz", + "integrity": "sha512-vcfuiIxogLV4DlGBHIUOwI0IbrJ8HWPc4MU7HzviGeNho/UJDfi6B5p3sHeWIQ0KGIU0Jpxi5ZHxemQfLkkAwQ==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + } + } + }, + "kind-of": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-3.2.2.tgz", + "integrity": "sha1-MeohpzS6ubuw8yRm2JOupR5KPGQ=", + "dev": true, + "requires": { + "is-buffer": "^1.1.5" + } + }, + "lru-cache": { + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz", + "integrity": "sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g==", + "dev": true, + "requires": { + "pseudomap": "^1.0.2", + "yallist": "^2.1.2" + } + }, "minimatch": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", @@ -227,6 +369,16 @@ "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=", "dev": true }, + "nopt": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-4.0.1.tgz", + "integrity": "sha1-0NRoWv1UFRk8jHUFYC0NF81kR00=", + "dev": true, + "requires": { + "abbrev": "1", + "osenv": "^0.1.4" + } + }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -236,17 +388,62 @@ "wrappy": "1" } }, + "os-homedir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", + "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=", + "dev": true + }, + "os-tmpdir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=", + "dev": true + }, + "osenv": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/osenv/-/osenv-0.1.5.tgz", + "integrity": "sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==", + "dev": true, + "requires": { + "os-homedir": "^1.0.0", + "os-tmpdir": "^1.0.0" + } + }, "path-is-absolute": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", "dev": true }, + "pretty": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pretty/-/pretty-2.0.0.tgz", + "integrity": "sha1-rbx5YLe7/iiaVX3F9zdhmiINBqU=", + "dev": true, + "requires": { + "condense-newlines": "^0.2.1", + "extend-shallow": "^2.0.1", + "js-beautify": "^1.6.12" + } + }, "process-nextick-args": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==" }, + "proto-list": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/proto-list/-/proto-list-1.2.4.tgz", + "integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=", + "dev": true + }, + "pseudomap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", + "integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM=", + "dev": true + }, "query-selector": { "version": "1.0.9", "resolved": "https://registry.npmjs.org/query-selector/-/query-selector-1.0.9.tgz", @@ -271,6 +468,18 @@ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" }, + "semver": { + "version": "5.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz", + "integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg==", + "dev": true + }, + "sigmund": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/sigmund/-/sigmund-1.0.1.tgz", + "integrity": "sha1-P/IfGYytIXX587eBhT/ZTQ0ZtZA=", + "dev": true + }, "string_decoder": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", @@ -303,6 +512,12 @@ "version": "0.1.27", "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.1.27.tgz", "integrity": "sha1-1QH5ezvbQDr4757MIFcxh6rawOk=" + }, + "yallist": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-2.1.2.tgz", + "integrity": "sha1-HBH5IY8HYImkfdUS+TxmmaaoHVI=", + "dev": true } } } diff --git a/package.json b/package.json index 31ebc6a..1455de9 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,15 @@ { "name": "htmldom2", - "version": "0.0.4", + "version": "0.0.5", "description": "Parse html on top of xmldom and htmlparser2", "main": "index.js", + "repository": { + "type": "git", + "url": "https://github.com/benignware/htmldom2.git" + }, + "bugs": { + "url": "https://github.com/benignware/htmldom2/issues" + }, "scripts": { "test": "mocha test/*.test.js" }, @@ -18,6 +25,7 @@ "xmldom": "^0.1.27" }, "devDependencies": { - "mocha": "^5.2.0" + "mocha": "^5.2.0", + "pretty": "^2.0.0" } } diff --git a/test/DOMParser.test.js b/test/DOMParser.test.js index ab6ea22..31d1850 100644 --- a/test/DOMParser.test.js +++ b/test/DOMParser.test.js @@ -1,6 +1,7 @@ const path = require('path'); const fs = require('fs'); const { strict: assert } = require('assert'); +const pretty = require('pretty'); const { serializeToString } = require('./utils'); const { DOMParser } = require('..'); @@ -10,8 +11,19 @@ describe('DOMParser', function() { it('parses html', () => { const source = fs.readFileSync(`${__dirname}/fixtures/example.html`, 'utf-8'); const document = DOMParser.parseFromString(source, 'text/html'); + const actual = pretty(serializeToString(document)); + const expected = pretty(source); - assert.equal(serializeToString(document), source); + assert.equal(actual, expected); + }); + + it('creates scaffold', () => { + const source = fs.readFileSync(`${__dirname}/fixtures/partial.html`, 'utf-8'); + const document = DOMParser.parseFromString(source, 'text/html'); + const actual = pretty(serializeToString(document)); + const expected = pretty(`${source}`); + + assert.equal(actual, expected); }); }) }); diff --git a/test/fixtures/partial.html b/test/fixtures/partial.html new file mode 100644 index 0000000..f3e333e --- /dev/null +++ b/test/fixtures/partial.html @@ -0,0 +1 @@ +

Hello World