diff --git a/README.md b/README.md
index b9c8005..04a031f 100644
--- a/README.md
+++ b/README.md
@@ -184,6 +184,7 @@ Each node is processed in the following sequence:
| [opts.allow_attributes_by_tag] | [TagAttributeNameSpec
](#TagAttributeNameSpec) | {}
| Matching attribute names of a matching node are kept. Other attributes are removed. |
| [opts.allow_classes_by_tag] | [TagClassNameSpec
](#TagClassNameSpec) | {}
| Matching class names of a matching node are kept. Other class names are removed. If no class names are remaining, the class attribute is removed. |
| [opts.remove_empty] | boolean
| false
| Remove nodes which are completely empty or contain only white space. |
+| [opts.join_siblings] | [Array.<Tagname>
](#Tagname) | []
| Join same-tag sibling nodes of given tag names, unless of course they are separated by non-whitespace textNodes. |
diff --git a/package.json b/package.json
index 5e54ddf..1459856 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "sanitize-dom",
- "version": "1.0.0",
+ "version": "1.0.1",
"description": "",
"main": "src/index.js",
"directories": {
diff --git a/src/sanitize-dom.js b/src/sanitize-dom.js
index a4cd6cf..01e224e 100644
--- a/src/sanitize-dom.js
+++ b/src/sanitize-dom.js
@@ -158,6 +158,7 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
* @param {TagAttributeNameSpec} [opts.allow_attributes_by_tag={}] - Matching attribute names of a matching node are kept. Other attributes are removed.
* @param {TagClassNameSpec} [opts.allow_classes_by_tag={}] - Matching class names of a matching node are kept. Other class names are removed. If no class names are remaining, the class attribute is removed.
* @param {boolean} [opts.remove_empty=false] Remove nodes which are completely empty or contain only white space.
+ * @param {Tagname[]} [opts.join_siblings=[]] Join same-tag sibling nodes of given tag names, unless of course they are separated by non-whitespace textNodes.
*
*/
function sanitizeDom(
@@ -190,6 +191,10 @@ function sanitizeDom(
if (!opts.allow_attributes_by_tag) opts.allow_attributes_by_tag = {};
if (!opts.allow_classes_by_tag) opts.allow_classes_by_tag = {};
+ if (!opts.join_siblings) opts.join_siblings = [];
+
+
+
var parents = [];
@@ -366,6 +371,48 @@ function sanitizeDom(
nd.remove();
}
+ function joinSiblings(parent, tags) {
+ let children = childrenOf(parent);
+
+ for (let i = 0; i < children.length; i++) {
+ let nd = children[i];
+ let nd1 = children[i+1];
+ let nd2 = children[i+2];
+
+ if (
+ nd1 &&
+ nd.nodeName == nd1.nodeName &&
+ tags.includes(nd.nodeName) &&
+ tags.includes(nd1.nodeName)
+ ) {
+ for (let c of childrenOf(nd1)) {
+ nd.appendChild(c);
+ }
+ nd1.remove();
+ joinSiblings(parent, tags); // restart from beginning until nothing joinable
+ return;
+ }
+
+
+ if (
+ nd1 &&
+ nd2 &&
+ nd.nodeName == nd2.nodeName &&
+ nd1.nodeType == 3 &&
+ nd1.textContent.match(/^\s+$/) &&
+ tags.includes(nd2.nodeName)
+ ) {
+ nd.appendChild(nd1);
+ for (let c of childrenOf(nd2)) {
+ nd.appendChild(c);
+ }
+ nd2.remove();
+ joinSiblings(parent, tags); // restart from beginning until nothing joinable
+ return;
+ }
+ }
+ }
+
function sanitizeNode(nd) {
if (nd.sanitize_skip) {
delete nd.sanitize_skip;
@@ -449,6 +496,10 @@ function sanitizeDom(
nd.remove();
}
}
+
+ if (opts.join_siblings.length > 0) {
+ joinSiblings(parent, opts.join_siblings);
+ }
}
}
diff --git a/tests/test.js b/tests/test.js
index c0c4281..649c5d6 100644
--- a/tests/test.js
+++ b/tests/test.js
@@ -41,6 +41,46 @@ describe('initialization', function() {
});
+describe('join_siblings', function() {
+
+ it('should join same-tag siblings of specified tags', function() {
+ assert.equal(
+ sanitizeHtml('abc def jkl', {
+ join_siblings: ['B', 'I'],
+ allow_tags_direct: {
+ '.*': '.*',
+ }
+ }),
+ 'abc def jkl'
+ );
+ });
+
+ it('should join same-tag siblings of specified tags and leave children intact', function() {
+ assert.equal(
+ sanitizeHtml('abc def ghijkl', {
+ join_siblings: ['B', 'I'],
+ allow_tags_direct: {
+ '.*': '.*',
+ }
+ }),
+ 'abc def ghijkl'
+ );
+ });
+
+
+ it('should not join same-tag siblings when separated by non-whitespace text', function() {
+ assert.equal(
+ sanitizeHtml('abc x def ghi jklmno', {
+ join_siblings: ['B', 'I'],
+ allow_tags_direct: {
+ '.*': '.*',
+ }
+ }),
+ 'abc x def ghi jklmno'
+ );
+ });
+});
+
describe('allow_tags', function() {
it('should flatten all markup by default', function() {