Merge pull request #81 from ecosia/mw-improve-sanitizer

Improve sanitizer
ecosia · Nov 4, 2022 · 24660d2 · 24660d2
2 parents 0c3baf9 + bf395d1
commit 24660d2
Show file tree

Hide file tree

Showing 3 changed files with 63 additions and 5 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "vue-safe-html",
-  "version": "2.1.0",
+  "version": "2.2.0",
   "description": "A Vue directive which renders sanitised HTML dynamically",
   "main": "dist/main.js",
   "repository": "[email protected]:ecosia/vue-safe-html.git",

diff --git a/src/utils.js b/src/utils.js
@@ -6,9 +6,32 @@
  */
 // eslint-disable-next-line import/prefer-default-export
 export const sanitizeHTML = (htmlString, allowedTags = []) => {
+  // Add an optional white space to the allowed tags
+  const allowedTagsWhiteSpaced = allowedTags.map((tag) => `${tag}\\s*`);
+
+  // Remove tag attributes
+  // The solution for this was found on:
+  // https://stackoverflow.com/questions/4885891/regex-for-removing-all-attributes-from-a-paragraph
+  const htmlWithoutAttributes = htmlString.replace(/<(\w+)(.|[\r\n])*?>/g, '<$1>');
+
   const expression = (allowedTags.length > 0) ?
-    `<(?!((?:/s*)?(?:${allowedTags.join('|')})))([^>])+>` :
-    '<[^>]*>';
-  const regExp = new RegExp(expression, 'g');
-  return htmlString.replace(regExp, '');
+    // Regex explanation
+    // Note: \ needs to be escaped in the final expression
+    // '<' Match the starting tag
+    // '(' Create a matching group
+    // '?!' Use negative lookup
+    //      we only want to match the tags that are not in the allowedTags array
+    // '\s*?' Optional match of any white space charater before optional /
+    // '\/?' Matches / zero to one time for the closing tag
+    // '\s*?' Optional match of any white space charater after optional /
+    // '(${allowedTags.join('|')})>' matching group of the allowed tags
+    // ')' close the matching group of negative lookup
+    // '\w*[^<>]*' matches any word that isn't in the excluded group
+    // '>' Match closing tagq
+    `<(?!\\s*\\/?\\s*(${allowedTagsWhiteSpaced.join('|')})>)\\w*[^<>]*>` :
+    // Strips all tags
+    '<(\\/?\\w*)\\w*[^<>]*>';
+
+  const regExp = new RegExp(expression, 'gm');
+  return htmlWithoutAttributes.replace(regExp, '');
 };
diff --git a/src/utils.test.js b/src/utils.test.js
@@ -14,5 +14,40 @@ describe('Utils', () => {
       const expected = 'An html<br><strong>string</strong>';
       expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
     });
+
+    it('Strips input tags', () => {
+      const allowedTags = ['strong', 'i'];
+      const given = '<p><i>An</i> <strong>input field</strong><input type="button" /></p>';
+      const expected = '<i>An</i> <strong>input field</strong>';
+      expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
+    });
+
+    it('Strips similar tags', () => {
+      const allowedTags = ['p', 'b', 's'];
+      const given = '<sp>Test1</sp> <sssp>Test2</sssp><script></script> <blockquote>quote</blockquote>';
+      const expected = 'Test1 Test2 quote';
+      expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
+    });
+
+    it('Considers whitespaces', () => {
+      const allowedTags = ['p'];
+      const given = '<p>Test1</ p><p>Test2</  p>';
+      const expected = '<p>Test1</ p><p>Test2</  p>';
+      expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
+    });
+
+    it('Removes all tags with empty allowed tags', () => {
+      const allowedTags = [];
+      const given = '<p>Test1</p> <strong  >Test2</strong> <  i>Test3</i>';
+      const expected = 'Test1 Test2 Test3';
+      expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
+    });
+
+    it('Removes attributes from html', () => {
+      const allowedTags = ['p'];
+      const given = '<p data-test="test" title="test2">Test1</p> <strong data-test=\'test2\'>Test2</strong>';
+      const expected = '<p>Test1</p> Test2';
+      expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
+    });
   });
 });