Release 5.2.1

Xceptance · Nov 4, 2020 · 1f9d0f1 · 1f9d0f1
2 parents 9075bcf + cfb2a51
commit 1f9d0f1
Show file tree

Hide file tree

Showing 8 changed files with 376 additions and 237 deletions.
diff --git a/doc/xltdoc/release-notes/5.2.x.textile b/doc/xltdoc/release-notes/5.2.x.textile
@@ -5,6 +5,18 @@ position: 982
 sorted: true
 ---
 
+h2. XLT 5.2.1
+
+See "here":https://github.com/Xceptance/XLT/milestone/7?closed=1 for the complete list of improvements and fixes.
+
+h3. Test Framework
+
+h4. Support content types with additional parameters
+
+XLT tries to detect the character set of received responses in different ways, for example by inspecting the @Content-Type@ response header and the content-type @meta@ tag. In case the server returned a content-type value that contains additional parameters, such as @application/hal+json;charset=utf8;profile="https://my.api.com/";version=1@, XLT did not extract the character set correctly. Fixed now.
+
+
+
 h2. XLT 5.2.0
 
 See "here":https://github.com/Xceptance/XLT/milestone/6?closed=1 for the complete list of improvements and fixes.

diff --git a/pom.xml b/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>com.xceptance</groupId>
     <artifactId>xlt</artifactId>
-    <version>5.2.0</version>
+    <version>5.2.1</version>
     <packaging>jar</packaging>
 
     <name>XLT</name>
@@ -579,5 +579,11 @@
             <version>${asm.version}</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>pl.pragmatists</groupId>
+            <artifactId>JUnitParams</artifactId>
+            <version>1.1.1</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
 </project>
diff --git a/src/main/java/com/gargoylesoftware/htmlunit/javascript/host/css/CSSStyleSheet.java b/src/main/java/com/gargoylesoftware/htmlunit/javascript/host/css/CSSStyleSheet.java
@@ -936,7 +936,6 @@ private static CSSStyleSheetImpl parseCSS(final InputSource source, final WebCli
             final CSSOMParser parser = new CSSOMParser(new CSS3Parser());
             parser.setErrorHandler(errorHandler);
             ss = parser.parseStyleSheet(source, null);
-            System.out.println(errorHandler);
         }
         catch (final Throwable t) {
             if (LOG.isErrorEnabled()) {

diff --git a/src/main/java/com/xceptance/xlt/api/htmlunit/LightWeightPage.java b/src/main/java/com/xceptance/xlt/api/htmlunit/LightWeightPage.java
@@ -128,37 +128,71 @@ private Charset determineContentCharset()
     {
         if (response != null)
         {
+            /*
+             * TODO: I would love to replace all this code with a simple "response.getContentCharset()" as it is much
+             * more elaborate and robust. Unfortunately, that method behaves (slightly) different. Maybe we can do this
+             * with the next major version.
+             */
+
             // 1st: get value of content-type response header
-            String charsetName = StringUtils.substringAfter(response.getResponseHeaderValue("content-type"), "charset=");
-            if (StringUtils.isEmpty(charsetName))
+            String charsetName = getCharsetNameFromContentTypeHeader(response);
+            if (StringUtils.isBlank(charsetName))
             {
                 final String content = response.getContentAsString(StandardCharsets.ISO_8859_1);
-                if (!StringUtils.isEmpty(content))
+                if (StringUtils.isNotBlank(content))
                 {
-                    // 2nd: get the encoding attribute from a potential <?xml?>
-                    // header (in case of XHTML)
-                    charsetName = RegExUtils.getFirstMatch(content, "<\\?xml.*? encoding=\"(.+?)\".*?\\?>", 1);
-                    if (StringUtils.isEmpty(charsetName))
+                    // 2nd: get the encoding attribute from a potential <?xml?> header (in case of XHTML)
+                    charsetName = RegExUtils.getFirstMatch(content, "<\\?xml\\s[^>]*?encoding=\"([^\"]+)", 1);
+                    if (StringUtils.isBlank(charsetName))
                     {
-                        // 3rd: get declared charset in content-type meta tag
-                        charsetName = RegExUtils.getFirstMatch(content, "<meta [^>]*?content=\"[^\"]*?charset=([^\";]+)\"", 1);
+                        // 3rd: get declared charset from a content-type meta tag
+                        charsetName = RegExUtils.getFirstMatch(content, "<meta\\s[^>]*?content=\"[^\"]*?charset=([^\";]+)", 1);
+                        if (StringUtils.isBlank(charsetName))
+                        {
+                            // 4th: get declared charset from a charset meta tag
+                            charsetName = RegExUtils.getFirstMatch(content, "<meta\\s+charset=\"([^\"]+)", 1);
+                        }
                     }
                 }
 
-                if (StringUtils.isEmpty(charsetName))
+                if (StringUtils.isBlank(charsetName))
                 {
-                    // 4th: get content charset of request settings
+                    // 5th: get content charset of request settings
                     final WebRequest request = response.getWebRequest();
                     charsetName = request != null ? request.getCharset().name() : null;
                 }
             }
 
-            if (!StringUtils.isEmpty(charsetName) && Charset.isSupported(charsetName))
+            // now see what we have got
+            if (StringUtils.isNotBlank(charsetName))
             {
-                return Charset.forName(charsetName);
+                charsetName = charsetName.trim();
+                if (Charset.isSupported(charsetName))
+                {
+                    return Charset.forName(charsetName);
+                }
             }
         }
 
         return StandardCharsets.ISO_8859_1;
     }
+
+    private String getCharsetNameFromContentTypeHeader(final WebResponse response)
+    {
+        final String contentType = response.getResponseHeaderValue("content-type");
+
+        if (contentType == null)
+        {
+            return null;
+        }
+        else
+        {
+            // Examples:
+            // - text/plain
+            // - text/plain; charset=utf-8
+            // - application/hal+json;charset=utf8;profile="https://my.api.com/";version=1
+
+            return StringUtils.substringBetween(contentType + ";", "charset=", ";");
+        }
+    }
 }