javascript check if string contain only html

Question

this is my string

<img class="img" src="a.png"><img class="img" src="a.png"><img class="img" src="a.png">

i want to check if string contain only html tags

dwd<img class="img" src="a.png">dwd<img class="img" src="a.png"><img class="img" src="a.png"> dwd

if contain any string like example above i want to return false

i have some code here for check about thats

function isHTML(str) {
  var a = document.createElement('div');
  a.innerHTML = str;

  for (var c = a.childNodes, i = c.length; i--; ) {
    if (c[i].nodeType == 1) return true; 
  }

  return false;
}


isHTML('<a>this is a string</a>') // true
isHTML('this is a string')        // false
isHTML('this is a <b>string</b>') // true

as we can see in third example its return true and there is some string with html tags so how can i edit that's and make it return true if only there are html tags none text

another method here but same above

var isHTML = RegExp.prototype.test.bind(/(<([^>]+)>)/i);

isHTML('Testing');               // false
isHTML('<p>Testing</p>');        // true
isHTML('<img src="hello.jpg">'); // true
isHTML('My <p>Testing</p> string');   // true (caution!!!)
isHTML('<>');                    // false

its good method but isHTML('My <p>Testing</p> string'); // true (caution!!!)

here i want to return false because there is some string with the html tags

Your logic is sort-of backwards. You should return false; as soon as you see a child node that's not type 1, and then return true at the end. — Pointy
– Pointy, Commented Apr 9, 2019 at 0:41
@Pointy thank you can you answer that so other people can see your answer function isHTMLONLY(str) { var a = document.createElement('div'); a.innerHTML = str; for (var c = a.childNodes, i = c.length; i--; ) { if (c[i].nodeType != 1) return false; } return true; } — daily tube
– daily tube, Commented Apr 9, 2019 at 0:53
@dailytube no... childNodes includes text nodes...type is 3, html comments are 9 if interested in those — charlietfl
– charlietfl, Commented Apr 9, 2019 at 0:53
Not so simple to verify if the input is HTML only (not XML). github.com/sindresorhus/is-html — Miller Cy Chan
– Miller Cy Chan, Commented Apr 9, 2019 at 2:10

ajai Jothi · Accepted Answer · 2019-04-09 05:02:32Z

7

Option 1: With RegExp and string replace:

const isHTML = (str) => !(str || '')
  // replace html tag with content
  .replace(/<([^>]+?)([^>]*?)>(.*?)<\/\1>/ig, '')
  // remove remaining self closing tags
  .replace(/(<([^>]+)>)/ig, '')
  // remove extra space at start and end
  .trim();

console.log(isHTML('Testing'));                         // false
console.log(isHTML('<p>Testing</p>'));                  // true
console.log(isHTML('<img src="hello.jpg">'));           // true
console.log(isHTML('My <p>Testing</p> string'));        // false
console.log(isHTML('<p>Testing</p> <p>Testing</p>'));   // true
console.log(isHTML('<>'));                              // false
console.log(isHTML('<br>'));                            // true

Option 2: With DOM API

const isHTML = (str) => {
  const fragment = document.createRange().createContextualFragment(str);
  
  // remove all non text nodes from fragment
  fragment.querySelectorAll('*').forEach(el => el.parentNode.removeChild(el));
  
  // if there is textContent, then not a pure HTML
  return !(fragment.textContent || '').trim();
}

console.log(isHTML('Testing'));                         // false
console.log(isHTML('<p>Testing</p>'));                  // true
console.log(isHTML('<img src="hello.jpg">'));           // true
console.log(isHTML('My <p>Testing</p> string'));        // false
console.log(isHTML('<p>Testing</p> <p>Testing</p>'));   // true
console.log(isHTML('<>'));                              // false
console.log(isHTML('<br>'));                            // true

answered Apr 9, 2019 at 5:02

ajai Jothi

2,3041 gold badge10 silver badges16 bronze badges

Sign up to request clarification or add additional context in comments.

1 Comment

MiKr13 Over a year ago

First one doesn't work for some complex string like:

<p>Hey there <strong>sir</strong></p><p>How's it <em>going</em>?</p><p>Is everything <u>well and good?</u></p><p><u>Call me </u><s>in the next one.</s></p><blockquote>Comprende?</blockquote><pre class="ql-syntax __qlSyntax--23mc5" spellcheck="false"><code class="hljs language-stylus"><span class="hljs-function"><span class="hljs-title">print</span><span class="hljs-params">(<span class="hljs-string">'see you on the other side'</span>)</span></span> </code></pre>

, 2nd one works flawlessly!

Maheer Ali · Accepted Answer · 2019-04-09 01:02:52Z

2

its good method but isHTML('My <p>Testing</p> string'); // true (caution!!!)

Its a good method just use ^ and $ in start and end of regex and the code will work.

var isHTML = RegExp.prototype.test.bind(/^(<([^>]+)>)$/i);

console.log(isHTML('Testing'));               // false
console.log(isHTML('<p>Testing</p>'));        // true
console.log(isHTML('<img src="hello.jpg">')); // true
console.log(isHTML('My <p>Testing</p> string'));   // true (caution!!!)
console.log(isHTML('<>'));                    // false

answered Apr 9, 2019 at 1:02

Maheer Ali

36.5k8 gold badges53 silver badges83 bronze badges

Comments

Kamrujaman Shohel · Accepted Answer · 2019-12-08 12:09:37Z

All are good. And my solution is

const element = document.querySelector('.test_element');

const setHtml = elem =>{
    let getElemContent = elem.innerHTML;

    // Clean Up whitespace in the element
    // If you don't want to remove whitespace, then you can skip this line
    let newHtml = getElemContent.replace(/[\n\t ]+/g, " ");

    //RegEX to check HTML
    let checkHtml = /<([A-Za-z][A-Za-z0-9]*)\b[^>]*>(.*?)<\/\1>/.test(getElemContent);

    //Check it is html or not
    if (checkHtml){
        console.log('This is an HTML');
        console.log(newHtml.trim());
    }
    else{
        console.log('This is a TEXT');
        console.log(elem.innerText.trim());
    }
}

setHtml(element);

Seph Reed · Accepted Answer · 2019-04-09 14:46:36Z

Here's a quick and tricky way.

What it does is use the built in xml parsing of your browser to handle all the nested stuff (which is generally not easy with js regex). Then it permeates the element and its children searching for any Text nodes.

function isOnlyHTML(testMe) {
  const testMeEl = document.createElement("div");
  testMeEl.innerHTML = testMe;  // browser does the parsing
  return hasNoTextChildren(testMeEl);
}

// recursively check for text elements
function hasNoTextChildren(element) {
  for (let i = 0; i < element.childNodes.length; i++) {
    const child = element.childNodes[i];
    if (child instanceof Text) {
       return false; 
    } else if(hasNoTextChildren(child) === false) {
      return false;
    }
  }
  return true;
}

EDIT: It appears from your tests that you're just trying to find if the string is a lone html element, with no text before or after (but perhaps inside). If that's the case, the other answer about adding ^ and $ to your regex, perhaps doing a trim first, would suffice.

Collectives™ on Stack Overflow

javascript check if string contain only html

4 Answers 4

1 Comment

Comments

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

4 Answers 4

1 Comment

Comments

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related