From 66221dae613067d5c7827867d0ffad77ba301688 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Thu, 3 Nov 2022 13:55:24 +0100 Subject: [PATCH 01/10] Rework regular expression and add explanation --- src/utils.js | 24 +++++++++++++++++++++--- src/utils.test.js | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/utils.js b/src/utils.js index 0ea23dd..e5a4f63 100644 --- a/src/utils.js +++ b/src/utils.js @@ -6,9 +6,27 @@ */ // eslint-disable-next-line import/prefer-default-export export const sanitizeHTML = (htmlString, allowedTags = []) => { + // Add an optional white space to the allowed tags + const allowedTagsWhiteSpaced = allowedTags.map((tag) => `${tag}\\s*`); + const expression = (allowedTags.length > 0) ? - `<(?!((?:/s*)?(?:${allowedTags.join('|')})))([^>])+>` : - '<[^>]*>'; - const regExp = new RegExp(expression, 'g'); + // Regex explanation + // Note: \ needs to be escaped in the final expression + // '<' Match the starting tag + // '(' Create a matching group + // '?!' Use negative lookup + // we only want to match the tags that are not in the allowedTags array + // '\s*?' Optional match of any white space charater before optional / + // '\/?' Matches / zero to one time for the closing tag + // '\s*?' Optional match of any white space charater after optional / + // '(${allowedTags.join('\s*|')})>' matching group of the allowed tags + // ')' close the matching group of negative lookup + // '\w*[^<>]*' matches any word that isn't in the excluded group + // '>' Match closing tagq + `<(?!\\s*\/?\\s*(${allowedTags.join('|')})>)\\w*[^<>]*>` : + // Strips all tags + '<(\/?\\w*)\\w*[^<>]*>'; + + const regExp = new RegExp(expression, 'gm'); return htmlString.replace(regExp, ''); }; diff --git a/src/utils.test.js b/src/utils.test.js index 099c20e..c24dbfa 100644 --- a/src/utils.test.js +++ b/src/utils.test.js @@ -14,5 +14,26 @@ describe('Utils', () => { const expected = 'An html
string'; expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); }); + + it('Strips input tags', () => { + const allowedTags = ['strong', 'i']; + const given = '

An input field

'; + const expected = 'An input field'; + expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); + }); + + it('Strips similar tags', () => { + const allowedTags = ['p']; + const given = 'Test1 Test2'; + const expected = 'Test1 Test2'; + expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); + }); + + it('Considers whitespaces', () => { + const allowedTags = ['p']; + const given = '

Test1

Test2'; + const expected = '

Test1

Test2'; + expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); + }); }); }); From 6c469220bf2db2c99423c6df7fcff9464c6d00d0 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Thu, 3 Nov 2022 13:56:45 +0100 Subject: [PATCH 02/10] Fix explanation comment --- src/utils.js | 2 +- src/utils.test.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils.js b/src/utils.js index e5a4f63..8a33f27 100644 --- a/src/utils.js +++ b/src/utils.js @@ -19,7 +19,7 @@ export const sanitizeHTML = (htmlString, allowedTags = []) => { // '\s*?' Optional match of any white space charater before optional / // '\/?' Matches / zero to one time for the closing tag // '\s*?' Optional match of any white space charater after optional / - // '(${allowedTags.join('\s*|')})>' matching group of the allowed tags + // '(${allowedTags.join('|')})>' matching group of the allowed tags // ')' close the matching group of negative lookup // '\w*[^<>]*' matches any word that isn't in the excluded group // '>' Match closing tagq diff --git a/src/utils.test.js b/src/utils.test.js index c24dbfa..1295139 100644 --- a/src/utils.test.js +++ b/src/utils.test.js @@ -17,7 +17,7 @@ describe('Utils', () => { it('Strips input tags', () => { const allowedTags = ['strong', 'i']; - const given = '

An input field

'; + const given = '

An input field

'; const expected = 'An input field'; expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); }); From 0ce58da80f63df76b994aee47a735f4ce21aad32 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Thu, 3 Nov 2022 14:03:01 +0100 Subject: [PATCH 03/10] Escape optional matching / --- src/utils.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils.js b/src/utils.js index 8a33f27..25d60b6 100644 --- a/src/utils.js +++ b/src/utils.js @@ -23,9 +23,9 @@ export const sanitizeHTML = (htmlString, allowedTags = []) => { // ')' close the matching group of negative lookup // '\w*[^<>]*' matches any word that isn't in the excluded group // '>' Match closing tagq - `<(?!\\s*\/?\\s*(${allowedTags.join('|')})>)\\w*[^<>]*>` : + `<(?!\\s*\\/?\\s*(${allowedTagsWhiteSpaced.join('|')})>)\\w*[^<>]*>` : // Strips all tags - '<(\/?\\w*)\\w*[^<>]*>'; + '<(\\/?\\w*)\\w*[^<>]*>'; const regExp = new RegExp(expression, 'gm'); return htmlString.replace(regExp, ''); From f14c0942b857fa3e2ef27b7129cf416ed12d0986 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Thu, 3 Nov 2022 14:03:33 +0100 Subject: [PATCH 04/10] Add test for empty allowed tags --- src/utils.test.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/utils.test.js b/src/utils.test.js index 1295139..681e953 100644 --- a/src/utils.test.js +++ b/src/utils.test.js @@ -35,5 +35,12 @@ describe('Utils', () => { const expected = '

Test1

Test2'; expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); }); + + it('Removes all tags with empty allowed tags', () => { + const allowedTags = []; + const given = '

Test1

Test2 < i>Test3'; + const expected = 'Test1 Test2 Test3'; + expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); + }); }); }); From c1e0dee9c1591080b7b0a1498af12749392a3479 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Thu, 3 Nov 2022 14:09:34 +0100 Subject: [PATCH 05/10] Extend similar tags test --- src/utils.test.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/utils.test.js b/src/utils.test.js index 681e953..bfa17da 100644 --- a/src/utils.test.js +++ b/src/utils.test.js @@ -23,9 +23,9 @@ describe('Utils', () => { }); it('Strips similar tags', () => { - const allowedTags = ['p']; - const given = 'Test1 Test2'; - const expected = 'Test1 Test2'; + const allowedTags = ['p', 'b', 's']; + const given = 'Test1 Test2
quote
'; + const expected = 'Test1 Test2 quote'; expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); }); From 8cf34aeb7f2fe5b2e999641dd55edd4848a0064d Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Fri, 4 Nov 2022 10:41:59 +0100 Subject: [PATCH 06/10] Remove all attributes from html tags --- src/utils.js | 6 +++++- src/utils.test.js | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/utils.js b/src/utils.js index 25d60b6..4cebaf6 100644 --- a/src/utils.js +++ b/src/utils.js @@ -8,6 +8,10 @@ export const sanitizeHTML = (htmlString, allowedTags = []) => { // Add an optional white space to the allowed tags const allowedTagsWhiteSpaced = allowedTags.map((tag) => `${tag}\\s*`); + //const htmlAttributeRegex = new RegExp('<\\w*\\s*(\\w*[-]?\\w*=[\",\'].*[\",\'])>'); + + // Remove tag attributes + const htmlWithoutAttributes = htmlString.replace(/<(\w+)(.|[\r\n])*?>/g, '<$1>'); const expression = (allowedTags.length > 0) ? // Regex explanation @@ -28,5 +32,5 @@ export const sanitizeHTML = (htmlString, allowedTags = []) => { '<(\\/?\\w*)\\w*[^<>]*>'; const regExp = new RegExp(expression, 'gm'); - return htmlString.replace(regExp, ''); + return htmlWithoutAttributes.replace(regExp, ''); }; diff --git a/src/utils.test.js b/src/utils.test.js index bfa17da..4db69a9 100644 --- a/src/utils.test.js +++ b/src/utils.test.js @@ -42,5 +42,12 @@ describe('Utils', () => { const expected = 'Test1 Test2 Test3'; expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); }); + + it('Removes attributes from html', () => { + const allowedTags = ['p']; + const given = '

Test1

Test2'; + const expected = '

Test1

Test2'; + expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected); + }); }); }); From 5db85da6cbe63a1993bd1ba5227b057351daf193 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Fri, 4 Nov 2022 10:43:42 +0100 Subject: [PATCH 07/10] Add credit for tag attribute removal --- src/utils.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils.js b/src/utils.js index 4cebaf6..6b2c763 100644 --- a/src/utils.js +++ b/src/utils.js @@ -11,6 +11,8 @@ export const sanitizeHTML = (htmlString, allowedTags = []) => { //const htmlAttributeRegex = new RegExp('<\\w*\\s*(\\w*[-]?\\w*=[\",\'].*[\",\'])>'); // Remove tag attributes + // The solution for this was found on: + // https://stackoverflow.com/questions/4885891/regex-for-removing-all-attributes-from-a-paragraph const htmlWithoutAttributes = htmlString.replace(/<(\w+)(.|[\r\n])*?>/g, '<$1>'); const expression = (allowedTags.length > 0) ? From 75d8643dcf8149c3a7a55ee3823dc7cc57b774c6 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Fri, 4 Nov 2022 10:45:00 +0100 Subject: [PATCH 08/10] Clean up comment --- src/utils.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils.js b/src/utils.js index 6b2c763..5492f75 100644 --- a/src/utils.js +++ b/src/utils.js @@ -8,7 +8,6 @@ export const sanitizeHTML = (htmlString, allowedTags = []) => { // Add an optional white space to the allowed tags const allowedTagsWhiteSpaced = allowedTags.map((tag) => `${tag}\\s*`); - //const htmlAttributeRegex = new RegExp('<\\w*\\s*(\\w*[-]?\\w*=[\",\'].*[\",\'])>'); // Remove tag attributes // The solution for this was found on: From 80d03136329f939ad1a93fcb2696f90dfd3090c8 Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Fri, 4 Nov 2022 10:45:55 +0100 Subject: [PATCH 09/10] Increase version number --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index b92c40c..5fe7363 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "vue-safe-html", - "version": "2.1.0", + "version": "2.1.1", "description": "A Vue directive which renders sanitised HTML dynamically", "main": "dist/main.js", "repository": "git@github.com:ecosia/vue-safe-html.git", From bf395d1d5760ebe4f7628b985b3c9013156b8e3c Mon Sep 17 00:00:00 2001 From: Markus Waitl Date: Fri, 4 Nov 2022 10:57:01 +0100 Subject: [PATCH 10/10] Bump version by minor --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 5fe7363..11bb48e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "vue-safe-html", - "version": "2.1.1", + "version": "2.2.0", "description": "A Vue directive which renders sanitised HTML dynamically", "main": "dist/main.js", "repository": "git@github.com:ecosia/vue-safe-html.git",