Skip to content

Commit

Permalink
Merge pull request #323 from evolvedbinary/bugfix/doctype
Browse files Browse the repository at this point in the history
Document node should have the DocType Decl and XML Decl as atributes
  • Loading branch information
adamretter authored Nov 14, 2024
2 parents 2c07510 + a4a9083 commit d06d4bd
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 27 deletions.
34 changes: 29 additions & 5 deletions packages/lwdita-ast/src/nodes/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import { AbstractBaseNode } from "./base";
import { stringToChildTypes } from "../utils";
import { parseDocTypeDecl, stringToChildTypes } from "../utils";
import { JDita } from "../ast-classes";

/**
* Interface DocumentNode defines the attribute types for a document node.
Expand All @@ -25,11 +26,19 @@ export interface DocumentNodeAttributes {}
// XML declaration interface
export interface XMLDecl {
/** The version specified by the XML declaration. */
version?: string;
version: string;
/** The encoding specified by the XML declaration. */
encoding?: string;
/** The value of the standalone parameter */
standalone?: string;
standalone?: boolean;
}

// docTypeDecl declaration interface
export interface DocTypeDecl {
/** The docTypeDecl declaration */
name: string;
systemId?: string;
publicId?: string;
}

/**
Expand All @@ -46,9 +55,24 @@ export class DocumentNode extends AbstractBaseNode implements DocumentNodeAttrib
// TODO rename this to undefined
static nodeName = 'document';
static childTypes = stringToChildTypes(['topic']);
static fields = [];
static fields = ['xmlDecl', 'docTypeDecl'];
static isValidField = (): boolean => true;
xmlDecl: XMLDecl | undefined;
doctype: string | undefined;
docTypeDecl: DocTypeDecl | undefined;

get json(): JDita {
return {
nodeName: this.static.nodeName,
attributes: {
xmlDecl: this.xmlDecl,
docTypeDecl: this.docTypeDecl,
},
children: this._children?.map(child => child.json),
};
}

setDocTypeDecl(docTypeDecl: string | undefined) {
this.docTypeDecl = parseDocTypeDecl(docTypeDecl);
}

}
27 changes: 27 additions & 0 deletions packages/lwdita-ast/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
import { ChildType, ChildTypes} from "./ast-classes";
import { OrArray } from "./classes";
import { BasicValue } from "./classes";
import { DocTypeDecl } from "./nodes";

/**
* acceptsNodeName - Check whether a child type accepts a node name
Expand Down Expand Up @@ -280,3 +281,29 @@ export function stringToChildTypes(value: OrArray<string>, topLevel = true): Chi
// export function childTypesArray(childTypes: ChildTypes): ChildTypes[] {
// return Array.isArray(childTypes) ? childTypes : [childTypes];
// }

/**
* Deconstruct a doctype string to an object
*
* @param docTypeDecl - DocTypeDecl string
* @returns - Doctype object
*/
export function parseDocTypeDecl(docTypeDecl: string | undefined): DocTypeDecl | undefined {
if(!docTypeDecl) return;
// test if the doctype has the internal subset defined
if(docTypeDecl.includes('[')) {
throw new Error('Internal subset is not supported');
}

const regex = new RegExp(/^([^"']+)(?:\s+(?:(?:SYSTEM\s+(?:["']([^"']+)["']))|(?:PUBLIC(?:\s+["']([^"']+)["']\s+["']([^"']+)["']))))?$/);
const result = regex.exec(docTypeDecl);
const name = result?.[1].trim() as string; // the name is not optional in doctype
const systemId = result?.[2] || result?.[4]; // systemId can be in the second or fourth group based on the publicId
const publicId = result?.[3]; // publicId can only be in the third group

return {
name,
systemId,
publicId,
};
}
39 changes: 37 additions & 2 deletions packages/lwdita-ast/test/utils.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.

import { assert, expect } from 'chai';
import { ChildType, ChildTypes } from "../src/ast-classes";
import { acceptsNodeName, areFieldsValid, has, isChildTypeRequired, isChildTypeSingle, isOrUndefined, splitTypenames, stringToChildTypes } from "../src/utils";
import { acceptsNodeName, areFieldsValid, has, isChildTypeRequired, isChildTypeSingle, isOrUndefined, parseDocTypeDecl, splitTypenames, stringToChildTypes } from "../src/utils";
import { BasicValue } from "../src/classes";

describe('acceptsNodeName', () => {
Expand Down Expand Up @@ -555,4 +555,39 @@ describe('Childtype from string', () => {
// const result = childTypesArray([childType]);
// expect(result).to.deep.equal([childType]);
// });
// });
// });

describe('DocTypeDecl parsing', () => {
[
{
docTypeDecl: `greeting`,
parts: {
name: 'greeting',
publicId: undefined,
systemId: undefined,
}
},
{
docTypeDecl: `note SYSTEM "note.dtd"`,
parts: {
name: 'note',
publicId: undefined,
systemId: 'note.dtd',
}
},
{
docTypeDecl: `html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"`,
parts: {
name: 'html',
publicId: '-//W3C//DTD XHTML 1.0 Strict//EN',
systemId: 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd',
}
},
].forEach(({ docTypeDecl, parts }) => {
it('should return the correct doctype object', () => {
const result = parseDocTypeDecl(docTypeDecl);
expect(result).to.deep.equal(parts);
});
});

});
19 changes: 13 additions & 6 deletions packages/lwdita-xdita/src/converter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.

import * as saxes from "@rubensworks/saxes";
import { createCDataSectionNode, createNode } from "./factory";
import { Attributes, BasicValue, TextNode, getNodeClass, JDita, BaseNode, DocumentNode, CDataNode, AbstractBaseNode } from "@evolvedbinary/lwdita-ast";
import { Attributes, BasicValue, TextNode, getNodeClass, JDita, BaseNode, DocumentNode, DocTypeDecl, CDataNode, AbstractBaseNode, XMLDecl } from "@evolvedbinary/lwdita-ast";
import { InMemoryTextSimpleOutputStreamCollector } from "./stream";
import { XditaSerializer } from "./xdita-serializer";

Expand Down Expand Up @@ -48,12 +48,15 @@ export async function xditaToAst(xml: string, abortOnError = true): Promise<Docu
const stack: BaseNode[] = [doc];

// Look for the XML declaration and the DOCTYPE declaration
parser.on("xmldecl", function (xmlDecl) {

doc.xmlDecl = xmlDecl;
parser.on("xmldecl", function ({ version, encoding, standalone }) {
doc.xmlDecl = {
version: version || "1.0",
encoding,
standalone: standalone? standalone === 'yes' : undefined,
};
});
parser.on("doctype", function (doctype) {
doc.doctype = doctype;
parser.on("doctype", function (docTypeDecl) {
doc.setDocTypeDecl(docTypeDecl);
});

// Parse the text and add a new node item to the node-array
Expand Down Expand Up @@ -199,6 +202,10 @@ function jditaAttrToSaxesAttr(attr: Record<string, BasicValue> | undefined): Att
export function jditaToAst(jdita: JDita): AbstractBaseNode {
if(jdita.nodeName === 'document') {
const doc = new DocumentNode();
// set docTypeDecl and xmlDecl
doc.docTypeDecl = jdita.attributes?.docTypeDecl as DocTypeDecl;
doc.xmlDecl = jdita.attributes?.xmlDecl as XMLDecl;

jdita.children?.forEach(child => {
doc.add(jditaToAst(child));
});
Expand Down
53 changes: 47 additions & 6 deletions packages/lwdita-xdita/src/xdita-serializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,61 @@ export class XditaSerializer {
* Serialize a document node to the output stream.
*
* @param node - the document node to serialize
*/
*/
private serializeDocument(node: DocumentNode): void {
// emit the XML declaration and doctype declaration
const xmlDeclaration = `<?xml version="${node.xmlDecl?.version || "1.0"}" encoding="${node.xmlDecl?.encoding || "UTF-8"}"?>`;
const docTypeDeclaration = `<!DOCTYPE${node.doctype || ' topic PUBLIC "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN" "lw-topic.dtd"'}>`;

this.outputStream.emit(xmlDeclaration);
this.outputStream.emit(this.xmlDeclString(node));
this.outputStream.emit(this.EOL);
this.outputStream.emit(docTypeDeclaration);
this.outputStream.emit(this.docTypeDeclString(node));
this.outputStream.emit(this.EOL);
// a document node has no string representation, so move on to its children
node.children.forEach(child => this.serialize(child, node));
}

/**
* Construct the doctype declaration string
* @param node - the document node to serialize
* @returns doctype declaration string
*/
private docTypeDeclString(node: DocumentNode): string {
if (!node.docTypeDecl) {
return "<!DOCTYPE topic PUBLIC \"-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN\" \"lw-topic.dtd\">";
}

let docTypeDeclaration = `<!DOCTYPE ${node.docTypeDecl.name}`;
if (node.docTypeDecl.publicId) {
docTypeDeclaration += ` PUBLIC "${node.docTypeDecl.publicId}" "${node.docTypeDecl.systemId}"`;
} else if (node.docTypeDecl.systemId) {
docTypeDeclaration += ` SYSTEM "${node.docTypeDecl.systemId}"`;
}
docTypeDeclaration += ">";
return docTypeDeclaration;
}

/**
* Construct the XML declaration string
* @param node - the document node to serialize
* @returns XML declaration string
*/
private xmlDeclString(node: DocumentNode): string {
let xmlDeclaration = "<?xml ";

if (node.xmlDecl) {
xmlDeclaration += `version="${node.xmlDecl.version}"`;
xmlDeclaration += ` encoding="${node.xmlDecl.encoding || "UTF-8"}"`;
if (node.xmlDecl.standalone !== undefined) {
xmlDeclaration += ` standalone="${node.xmlDecl.standalone? "yes" : "no"}"`;
}
} else {
xmlDeclaration += "version=\"1.0\"";
xmlDeclaration += " encoding=\"UTF-8\"";
}

xmlDeclaration += "?>";

return xmlDeclaration;
};

/**
* Serialize an element node to the output stream.
*
Expand Down
38 changes: 34 additions & 4 deletions packages/lwdita-xdita/test/converter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ describe('jditaToXdita', () => {
const xditaJson = await xditaToJdita(xdita);
const json = {
"nodeName": "document",
"attributes": undefined,
"attributes": {
"xmlDecl": undefined,
"docTypeDecl": undefined
},
"children": [
{
"nodeName": "topic",
Expand Down Expand Up @@ -296,8 +299,35 @@ describe('A round trip conversion between xdita, ast, and jdita', () => {
const serializer = new XditaSerializer(outStream);
serializer.serialize(newAst);
const newXdita = outStream.getText();
const declaration = `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE topic PUBLIC "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN" "lw-topic.dtd">\n`;
const expected = declaration + xdita;
expect(newXdita).to.equal(expected);
expect(newXdita).to.equal(xdita);
});
});

describe('Round trip with custom doctype and xml declaration', () => {
[
{test:"custom doctype", header: `<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<!DOCTYPE topic PUBLIC "-//OASIS//DTD Custom Topic//EN" "lw-topic.dtd">\n`},
{test:"custom XML declaration", header: `<?xml version="1.6" encoding="UTF-8" standalone="yes"?>\n<!DOCTYPE topic PUBLIC "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN" "lw-topic.dtd">\n`},
{test:"default XML declaration", header: `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE topic PUBLIC "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN" "lw-topic.dtd">\n`},
].forEach(({test, header}) => {
it(`round trip with ${test}`, async () => {
const xdita = `<topic id="topicID"><title>text content</title></topic>`;

// xdita -> ast
const ast = await xditaToAst(header + xdita);

// ast -> jdita
const jdita = astToJdita(ast);

// jdita -> ast
const newAst = jditaToAst(jdita);

// ast -> xdita
const outStream = new InMemoryTextSimpleOutputStreamCollector();
const serializer = new XditaSerializer(outStream);
serializer.serialize(newAst);
const newXdita = outStream.getText();

expect(newXdita).to.equal(header + xdita);
});
});
});
27 changes: 25 additions & 2 deletions packages/lwdita-xdita/test/test-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,24 @@ export const AUDIO_NODE_OBJECT = {
};
export const XMLNODE_UNKNOWN = `{"name":"unknown","attributes":{},"ns":{},"prefix":"","local":"audio","uri":"","isSelfClosing":true}`;

export const fullXditaExample = `<topic id="fullTopic"><title dir="ltr" xml:lang="english" translate="no">
export const fullXditaExample = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE topic PUBLIC "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN" "lw-topic.dtd">
<topic id="fullTopic"><title dir="ltr" xml:lang="english" translate="no">
<b>bold</b> and <em>emphasized</em> and <i>italic</i> and <ph>Phrase content</ph> and <strong>strong</strong>
and <sub>subscript</sub> and <sup>superscipt</sup> and <tt>tele type</tt> and <u>underline</u><image/>
</title><shortdesc>Short description of the full topic.</shortdesc><prolog props="metadata"><metadata><othermeta name="test" content="test"/></metadata></prolog><body outputclass="outputclass"><p>Paragraph content</p><ul><li><p>Unordered list item</p></li></ul><ol><li><p>Ordered list item</p></li></ol><dl><dlentry><dt>Definition term</dt><dd><p>Definition description</p></dd></dlentry></dl><pre>Preformatted content</pre><audio autoplay="false" controls="true" loop="false" muted="false"><desc>Theme song for the LwDITA podcast</desc><fallback><p>The theme song is not available.</p></fallback><media-source href="theme-song.mp3"/><media-track href="theme-song.vtt" srclang="en"/></audio><video width="400px" height="300px" loop="false" muted="false"><desc>Video about the Sensei Sushi promise.</desc><fallback><image href="video-not-available.png"><alt>This video cannot be displayed.</alt></image></fallback><video-poster href="sensei-video.jpg"/><media-source href="sensei-video.mp4"/><media-source href="sensei-video.ogg"/><media-source href="sensei-video.webm"/><media-track href="sensei-video.vtt" srclang="en"/></video><example><title>title</title></example><simpletable><title>Table title</title><sthead><stentry><p>Header 1</p></stentry><stentry><p>Header 2</p></stentry></sthead><strow><stentry><p>Row 1, Cell 1</p></stentry><stentry><p>Row 1, Cell 2</p></stentry></strow><strow><stentry><p>Row 2, Cell 1</p></stentry><stentry><p>Row 2, Cell 2</p></stentry></strow></simpletable><fig><title>Figure title</title><desc>Figure description</desc><image href="images/image.png"><alt>alt text</alt></image></fig><note type="note"><p>Note content</p></note><section><title>Section title</title><p>Section content</p></section><div><fn id="footnote"/></div></body></topic>`

export const fullAstObject = {
xmlDecl: {
version: "1.0",
encoding: "UTF-8",
standalone: undefined,
},
docTypeDecl: {
name: "topic",
publicId: "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN",
systemId: "lw-topic.dtd",
},
_children: [
{
_props: {
Expand Down Expand Up @@ -1359,7 +1371,18 @@ export const fullAstObject = {

export const fullJditaObject = {
nodeName: "document",
attributes: undefined,
attributes: {
xmlDecl: {
version: "1.0",
encoding: "UTF-8",
standalone: undefined,
},
docTypeDecl: {
name: "topic",
publicId: "-//OASIS//DTD LIGHTWEIGHT DITA Topic//EN",
systemId: "lw-topic.dtd",
},
},
children: [
{
nodeName: "topic",
Expand Down
5 changes: 3 additions & 2 deletions packages/lwdita-xdita/test/xdita-serializer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -279,12 +279,13 @@ describe('handles custom xml declaration and doctype', () => {
it('should read and output custom doctype', async () => {
const { serializer, outStream } = newSerializer(false);

const input = '<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Some random declaration for testing>\n<topic><title>Hello World</title><body><p>Good\nMorning</p></body></topic>'
const input = '<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE test PUBLIC "PUB" "SYS">\n<topic><title>Hello World</title><body><p>Good\nMorning</p></body></topic>'

const orginalAst = await xditaToAst(input);
serializer.serialize(orginalAst);


const declaration = `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE Some random declaration for testing>\n`;
const declaration = `<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE test PUBLIC "PUB" "SYS">\n`;
const expected = declaration + "<topic><title>Hello World</title><body><p>Good\nMorning</p></body></topic>"

const actual = outStream.getText()
Expand Down

0 comments on commit d06d4bd

Please sign in to comment.