Skip to content

Commit

Permalink
Use W3CDom class for converting from JSoup document.
Browse files Browse the repository at this point in the history
  • Loading branch information
Andreas Rosdal committed May 27, 2024
1 parent 49dd8de commit 1895583
Showing 1 changed file with 4 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,16 @@
package org.xhtmlrenderer.resource;

import org.jsoup.Jsoup;
import org.jsoup.helper.W3CDom;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.xhtmlrenderer.util.XRLog;
import org.xhtmlrenderer.util.XRRuntimeException;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.net.URL;

import javax.annotation.ParametersAreNonnullByDefault;
import javax.xml.transform.TransformerException;

/**
* HTMLResource uses JSoup to parse XML resources.
Expand All @@ -47,8 +42,9 @@ private HTMLResource(InputStream stream) {
super(stream);
try {
Document jsoupDoc = Jsoup.parse(stream, StandardCharsets.UTF_8.name(), "", Parser.xmlParser());
this.document = convertJsoupToW3CDocument(jsoupDoc);
} catch (IOException | ParserConfigurationException | TransformerException | SAXException e) {
W3CDom w3cDom = new W3CDom();
this.document = w3cDom.fromJsoup(jsoupDoc);
} catch (IOException e) {
XRLog.load(java.util.logging.Level.SEVERE, "Failed to parse and convert HTML document.", e);
throw new XRRuntimeException("Failed to parse and convert HTML document.", e);
}
Expand Down Expand Up @@ -85,13 +81,6 @@ public org.w3c.dom.Document getDocument() {
return document;
}

private static org.w3c.dom.Document convertJsoupToW3CDocument(Document jsoupDoc) throws ParserConfigurationException, IOException, TransformerException, SAXException {
String html = jsoupDoc.outerHtml();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
return builder.parse(new ByteArrayInputStream(html.getBytes(StandardCharsets.UTF_8)));
}

private static InputStream convertReaderToInputStream(Reader reader) throws IOException {
char[] charBuffer = new char[8 * 1024];
StringBuilder builder = new StringBuilder();
Expand Down

0 comments on commit 1895583

Please sign in to comment.