You can do it without a regex. It's usually not a good idea to try parsing HTML with regexes, unless the use case is very simple...
The way I implemented stripHtmlElementsMatching, you can pass it any CSS selector and it will strip all matching entities.
Therefore, to remove anything but a, img, iframe you can pass :not(a):not(img):not(iframe).
PS: The htmlstripping-root custom tag is only to avoid creating a parser element that interferes with the passed selector. For instance, if I used div as a parser element and you would pass the selector div > div, all divs would be removed even if they were not nested in your html string.
var stripHtmlElementsMatching = (function(doc) {
doc.registerElement('htmlstripping-root');
return function(text, selector) {
var parser = document.createElement('htmlstripping-root'),
matchingEls, i, len, el;
selector = typeof selector == 'string' ? selector : ':not(*)';
parser.innerHTML = text;
matchingEls = parser.querySelectorAll(selector);
for (i = 0, len = matchingEls.length; i < len; i++) {
el = matchingEls[i];
el.parentNode.replaceChild(newFragFrom(el.childNodes), el);
}
return parser.innerHTML;
};
function newFragFrom(nodes) {
var frag = document.createDocumentFragment();
while (nodes.length) frag.appendChild(nodes[0]);
return frag;
}
})(document);
var text = '<p class="MsoNormal" style="margin-left:202.5pt;line-height:200%;background:white"><b><span style="font-size: 16pt; line-height: 200%; color: rgb(131, 60, 11); background-image: initial; background-attachment: initial; background-size: initial; background-origin: initial; background-clip: initial; background-position: initial; background-repeat: initial;">test</span></b><span style="font-size:16.0pt; line-height:200%;color:#833C0B;letter-spacing:-.15pt;mso-ansi-language:EN-US"><o:p></o:p></span></p>';
var tagsToKeep = ['a', 'img', 'iframe'];
var sanitizeSelector = tagsToKeep.map(function(tag) {
return ':not(' + tag + ')';
}).join('');
var sanitizedText = stripHtmlElementsMatching(text, sanitizeSelector);
document.body.appendChild(document.createTextNode(sanitizedText));
[\s\S]the equivalent of.? Have you tried addingconsole.log(temp)inside yourwhileloop (or setting a break point) to see what is actually happening?var regex = /<(?!a )(?!img )(?!iframe )([\s\S]*?)>/;(note the forward slashes). You are declaring a string, which means[\s\S]ends up as simply[sS]. (To use a string you need to escape the backslashes:"[\\s\\S]".)