Skip to content

Commit

Permalink
Merge pull request #81 from ecosia/mw-improve-sanitizer
Browse files Browse the repository at this point in the history
Improve sanitizer
  • Loading branch information
axlwaii authored Nov 4, 2022
2 parents 0c3baf9 + bf395d1 commit 24660d2
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 5 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vue-safe-html",
"version": "2.1.0",
"version": "2.2.0",
"description": "A Vue directive which renders sanitised HTML dynamically",
"main": "dist/main.js",
"repository": "[email protected]:ecosia/vue-safe-html.git",
Expand Down
31 changes: 27 additions & 4 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,32 @@
*/
// eslint-disable-next-line import/prefer-default-export
export const sanitizeHTML = (htmlString, allowedTags = []) => {
// Add an optional white space to the allowed tags
const allowedTagsWhiteSpaced = allowedTags.map((tag) => `${tag}\\s*`);

// Remove tag attributes
// The solution for this was found on:
// https://stackoverflow.com/questions/4885891/regex-for-removing-all-attributes-from-a-paragraph
const htmlWithoutAttributes = htmlString.replace(/<(\w+)(.|[\r\n])*?>/g, '<$1>');

const expression = (allowedTags.length > 0) ?
`<(?!((?:/s*)?(?:${allowedTags.join('|')})))([^>])+>` :
'<[^>]*>';
const regExp = new RegExp(expression, 'g');
return htmlString.replace(regExp, '');
// Regex explanation
// Note: \ needs to be escaped in the final expression
// '<' Match the starting tag
// '(' Create a matching group
// '?!' Use negative lookup
// we only want to match the tags that are not in the allowedTags array
// '\s*?' Optional match of any white space charater before optional /
// '\/?' Matches / zero to one time for the closing tag
// '\s*?' Optional match of any white space charater after optional /
// '(${allowedTags.join('|')})>' matching group of the allowed tags
// ')' close the matching group of negative lookup
// '\w*[^<>]*' matches any word that isn't in the excluded group
// '>' Match closing tagq
`<(?!\\s*\\/?\\s*(${allowedTagsWhiteSpaced.join('|')})>)\\w*[^<>]*>` :
// Strips all tags
'<(\\/?\\w*)\\w*[^<>]*>';

const regExp = new RegExp(expression, 'gm');
return htmlWithoutAttributes.replace(regExp, '');
};
35 changes: 35 additions & 0 deletions src/utils.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,40 @@ describe('Utils', () => {
const expected = 'An html<br><strong>string</strong>';
expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
});

it('Strips input tags', () => {
const allowedTags = ['strong', 'i'];
const given = '<p><i>An</i> <strong>input field</strong><input type="button" /></p>';
const expected = '<i>An</i> <strong>input field</strong>';
expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
});

it('Strips similar tags', () => {
const allowedTags = ['p', 'b', 's'];
const given = '<sp>Test1</sp> <sssp>Test2</sssp><script></script> <blockquote>quote</blockquote>';
const expected = 'Test1 Test2 quote';
expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
});

it('Considers whitespaces', () => {
const allowedTags = ['p'];
const given = '<p>Test1</ p><p>Test2</ p>';
const expected = '<p>Test1</ p><p>Test2</ p>';
expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
});

it('Removes all tags with empty allowed tags', () => {
const allowedTags = [];
const given = '<p>Test1</p> <strong >Test2</strong> < i>Test3</i>';
const expected = 'Test1 Test2 Test3';
expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
});

it('Removes attributes from html', () => {
const allowedTags = ['p'];
const given = '<p data-test="test" title="test2">Test1</p> <strong data-test=\'test2\'>Test2</strong>';
const expected = '<p>Test1</p> Test2';
expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
});
});
});

0 comments on commit 24660d2

Please sign in to comment.