diff --git a/README.md b/README.md index b9c8005..04a031f 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,7 @@ Each node is processed in the following sequence: | [opts.allow_attributes_by_tag] | [TagAttributeNameSpec](#TagAttributeNameSpec) | {} | Matching attribute names of a matching node are kept. Other attributes are removed. | | [opts.allow_classes_by_tag] | [TagClassNameSpec](#TagClassNameSpec) | {} | Matching class names of a matching node are kept. Other class names are removed. If no class names are remaining, the class attribute is removed. | | [opts.remove_empty] | boolean | false | Remove nodes which are completely empty or contain only white space. | +| [opts.join_siblings] | [Array.<Tagname>](#Tagname) | [] | Join same-tag sibling nodes of given tag names, unless of course they are separated by non-whitespace textNodes. | diff --git a/package.json b/package.json index 5e54ddf..1459856 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "sanitize-dom", - "version": "1.0.0", + "version": "1.0.1", "description": "", "main": "src/index.js", "directories": { diff --git a/src/sanitize-dom.js b/src/sanitize-dom.js index a4cd6cf..01e224e 100644 --- a/src/sanitize-dom.js +++ b/src/sanitize-dom.js @@ -158,6 +158,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI * @param {TagAttributeNameSpec} [opts.allow_attributes_by_tag={}] - Matching attribute names of a matching node are kept. Other attributes are removed. * @param {TagClassNameSpec} [opts.allow_classes_by_tag={}] - Matching class names of a matching node are kept. Other class names are removed. If no class names are remaining, the class attribute is removed. * @param {boolean} [opts.remove_empty=false] Remove nodes which are completely empty or contain only white space. + * @param {Tagname[]} [opts.join_siblings=[]] Join same-tag sibling nodes of given tag names, unless of course they are separated by non-whitespace textNodes. * */ function sanitizeDom( @@ -190,6 +191,10 @@ function sanitizeDom( if (!opts.allow_attributes_by_tag) opts.allow_attributes_by_tag = {}; if (!opts.allow_classes_by_tag) opts.allow_classes_by_tag = {}; + if (!opts.join_siblings) opts.join_siblings = []; + + + var parents = []; @@ -366,6 +371,48 @@ function sanitizeDom( nd.remove(); } + function joinSiblings(parent, tags) { + let children = childrenOf(parent); + + for (let i = 0; i < children.length; i++) { + let nd = children[i]; + let nd1 = children[i+1]; + let nd2 = children[i+2]; + + if ( + nd1 && + nd.nodeName == nd1.nodeName && + tags.includes(nd.nodeName) && + tags.includes(nd1.nodeName) + ) { + for (let c of childrenOf(nd1)) { + nd.appendChild(c); + } + nd1.remove(); + joinSiblings(parent, tags); // restart from beginning until nothing joinable + return; + } + + + if ( + nd1 && + nd2 && + nd.nodeName == nd2.nodeName && + nd1.nodeType == 3 && + nd1.textContent.match(/^\s+$/) && + tags.includes(nd2.nodeName) + ) { + nd.appendChild(nd1); + for (let c of childrenOf(nd2)) { + nd.appendChild(c); + } + nd2.remove(); + joinSiblings(parent, tags); // restart from beginning until nothing joinable + return; + } + } + } + function sanitizeNode(nd) { if (nd.sanitize_skip) { delete nd.sanitize_skip; @@ -449,6 +496,10 @@ function sanitizeDom( nd.remove(); } } + + if (opts.join_siblings.length > 0) { + joinSiblings(parent, opts.join_siblings); + } } } diff --git a/tests/test.js b/tests/test.js index c0c4281..649c5d6 100644 --- a/tests/test.js +++ b/tests/test.js @@ -41,6 +41,46 @@ describe('initialization', function() { }); +describe('join_siblings', function() { + + it('should join same-tag siblings of specified tags', function() { + assert.equal( + sanitizeHtml('abc def jkl', { + join_siblings: ['B', 'I'], + allow_tags_direct: { + '.*': '.*', + } + }), + 'abc def jkl' + ); + }); + + it('should join same-tag siblings of specified tags and leave children intact', function() { + assert.equal( + sanitizeHtml('abc def ghijkl', { + join_siblings: ['B', 'I'], + allow_tags_direct: { + '.*': '.*', + } + }), + 'abc def ghijkl' + ); + }); + + + it('should not join same-tag siblings when separated by non-whitespace text', function() { + assert.equal( + sanitizeHtml('abc x def ghi jklmno', { + join_siblings: ['B', 'I'], + allow_tags_direct: { + '.*': '.*', + } + }), + 'abc x def ghi jklmno' + ); + }); +}); + describe('allow_tags', function() { it('should flatten all markup by default', function() {