Javascript truncate HTML text

Question

Does JavaScript have a way of truncating HTML text without all the headaches of matching tags etc etc?

Thank you.

arendjr · Accepted Answer · 2016-09-28 17:45:55Z

26

I know this question is old, but I recently had the same problem. I wrote the following library, which truncates valid HTML safely: https://github.com/arendjr/text-clipper

answered Sep 28, 2016 at 17:45

arendjr

7197 silver badges12 bronze badges

Sign up to request clarification or add additional context in comments.

3 Comments

danii Over a year ago

This is the only robust solution I found to this f*ing problem and should be much, much higher. Can't believe I gave the first upvote!

eballeste Over a year ago

amazing @arendjr!

boatcoder Over a year ago

npm i --save text-clipper for the win

Minouris · Accepted Answer · 2014-06-11 23:38:24Z

I had the same problem, and wound up writing the following to deal with it. It truncates HTML to a give length, cleans up any start / end tags that might have gotten snipped off at the end, and then closes any tags left unclosed:

function truncateHTML(text, length) {
    var truncated = text.substring(0, length);
    // Remove line breaks and surrounding whitespace
    truncated = truncated.replace(/(\r\n|\n|\r)/gm,"").trim();
    // If the text ends with an incomplete start tag, trim it off
    truncated = truncated.replace(/<(\w*)(?:(?:\s\w+(?:={0,1}(["']{0,1})\w*\2{0,1})))*$/g, '');
    // If the text ends with a truncated end tag, fix it.
    var truncatedEndTagExpr = /<\/((?:\w*))$/g;
    var truncatedEndTagMatch = truncatedEndTagExpr.exec(truncated);
    if (truncatedEndTagMatch != null) {
        var truncatedEndTag = truncatedEndTagMatch[1];
        // Check to see if there's an identifiable tag in the end tag
        if (truncatedEndTag.length > 0) {
            // If so, find the start tag, and close it
            var startTagExpr = new RegExp(
                "<(" + truncatedEndTag + "\\w?)(?:(?:\\s\\w+(?:=([\"\'])\\w*\\2)))*>");
            var testString = truncated;
            var startTagMatch = startTagExpr.exec(testString);

            var startTag = null;
            while (startTagMatch != null) {
                startTag = startTagMatch[1];
                testString = testString.replace(startTagExpr, '');
                startTagMatch = startTagExpr.exec(testString);
            }
            if (startTag != null) {
                truncated = truncated.replace(truncatedEndTagExpr, '</' + startTag + '>');
            }
        } else {
            // Otherwise, cull off the broken end tag
            truncated = truncated.replace(truncatedEndTagExpr, '');
        }
    }
    // Now the tricky part. Reverse the text, and look for opening tags. For each opening tag,
    //  check to see that he closing tag before it is for that tag. If not, append a closing tag.
    var testString = reverseHtml(truncated);
    var reverseTagOpenExpr = /<(?:(["'])\w*\1=\w+ )*(\w*)>/;
    var tagMatch = reverseTagOpenExpr.exec(testString);
    while (tagMatch != null) {
        var tag = tagMatch[0];
        var tagName = tagMatch[2];
        var startPos = tagMatch.index;
        var endPos = startPos + tag.length;
        var fragment = testString.substring(0, endPos);
        // Test to see if an end tag is found in the fragment. If not, append one to the end
        //  of the truncated HTML, thus closing the last unclosed tag
        if (!new RegExp("<" + tagName + "\/>").test(fragment)) {
            truncated += '</' + reverseHtml(tagName) + '>';
        }
        // Get rid of the already tested fragment
        testString = testString.replace(fragment, '');
        // Get another tag to test
        tagMatch = reverseTagOpenExpr.exec(testString);
    }
    return truncated;
}

function reverseHtml(str) {
    var ph = String.fromCharCode(206);
    var result = str.split('').reverse().join('');
    while (result.indexOf('<') > -1) {
        result = result.replace('<',ph);
    }
    while (result.indexOf('>') > -1) {
        result = result.replace('>', '<');
    }
    while (result.indexOf(ph) > -1) {
        result = result.replace(ph, '>');
    }
    return result;
}

it broke the HTML provided. Tested with: <h2>Heading 1</h2><h3><br>Heading 2</h3><h3> </h3><h4>Titulo 3<br> </h4><p>O <strong>Lorem Ipsum</strong> é um texto modelo da indústria tipográfica e de impressão. O Lorem Ipsum tem vindo a ser o texto padrão usado por estas indústrias desde o ano de 1500, quando uma misturou os caracteres de um texto para criar um espécime de livro. Este texto não só sobreviveu 5 séculos, mas também o salto para a tipografia electrónica, mantendo-se essencialmente inalterada.</p>

Grzegorz Rożniecki · Accepted Answer · 2017-09-13 19:53:39Z

7

There's nothing built-in javascript. There's a jQuery plugin that you might take a look at.

edited Sep 13, 2017 at 19:53

Grzegorz Rożniecki

28.1k11 gold badges95 silver badges116 bronze badges

answered Sep 29, 2010 at 13:53

Darin Dimitrov

1.0m275 gold badges3.3k silver badges3k bronze badges

2 Comments

Francisc Over a year ago

I have not worked with jQuery yet, is it easy to implement or does it require lots of settings and tweakings?

Jason Over a year ago

jQuery Plugin link is broken.

jigritsn · Accepted Answer · 2025-05-14 21:14:07Z

4

If you want a light-weight solution in vanilla JS, this should do the trick, although it'll leave empty elements around, so it depends on if you care about those. Also note that it mutates the nodes in-place.

function truncateNode(node, limit) {
  if (node.nodeType === Node.TEXT_NODE) {
    node.textContent = node.textContent.substring(0, limit);
    return limit - node.textContent.length;
  }

  node.childNodes.forEach((child) => {
    limit = truncateNode(child, limit);
    if (child.textContent.length === 0) {
      node.removeChild(child);
    }
  });

  return limit;
}

const span = document.createElement('span');
span.innerHTML = '<b>foo</b><i>bar</i><u>baz</u>';
truncateNode(span, 5);
expect(span.outerHTML).toEqual('<span><b>foo</b><i>ba</i><u></u></span>');

edited May 14 at 21:14

jigritsn

752 silver badges8 bronze badges

answered Jul 18, 2019 at 14:51

Alec

2,8622 gold badges22 silver badges32 bronze badges

1 Comment

jigritsn May 2 at 0:33

Thanks, did exactly what I needed without resorting to a library. I added a minor update to remove tags with empty text content

abidibo · Accepted Answer · 2011-07-26 08:59:44Z

2

There's a mootools plugin which does exactly what you need: mooReadAll at mootools forge

answered Jul 26, 2011 at 8:59

abidibo

4,2972 gold badges29 silver badges34 bronze badges

Comments

Seglinglin · Accepted Answer · 2020-03-11 09:04:12Z

None of the above solutions corresponded perfectly to my use case, so I created myself a small vanilla javascript function. It leaves empty elements but it could be corrected easily.

const truncateWithHTML = (string, length) => {
    // string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>"

    const noHTML = string.replace(/<[^>]*>/g, '');

    // if the string does not need to be truncated
    if (noHTML.length <= max){
        return string;
    }

    // if the string does not contains tags
    if (noHTML.length === string.length){
        // add <span title=""> to allow complete string to appear on hover
        return `<span title="${string}">${string.substring(0, max).trim()}…</span>`;
    }

    const substrings =  string.split(/(<[^>]*>)/g).filter(Boolean);
    // substrings = ["<span class='className'>","My long string that","</span>"," I want shorter","<span>"," but just a little bit","</span>"]

    let count = 0;
    let truncated = [];
    for (let i = 0; i < substrings.length; i++) {
        let substr = substrings[i];
        // if the substring isn't an HTML tag
        if (! substr.startsWith("<")){
            if (count > length){
                continue;
            } else if (substr.length > (length-count-1)){
                truncated.push(substr.substring(0, (length-count) - 1) + '…');
            } else {
                truncated.push(substr);
            }
            count += substr.length;
        } else {
            truncated.push(substr);
        }
    }

    return `<span title="${noHTML}">${truncated.join("")}…</span>`;
}

Examples:

string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>";

truncateWithHTML(string,10); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long s…</span><span></span></span>"
truncateWithHTML(string,22); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long string that</span> I…<span></span></span>"

Albion S. · Accepted Answer · 2022-10-26 17:11:28Z

This works with multiple nesting levels:

let truncate = (content, maxLength = 255, append = '…') => {
    let container = document.createElement('div');
    container.innerHTML = content;

    let limitReached = false;
    let counted = 0;

    let nodeHandler = node => {
        if ( limitReached ) {
            node.remove();
            return;
        }

        let childNodes = Array.from( node.childNodes );
        if ( childNodes.length ) {
            childNodes.forEach( childNode => nodeHandler( childNode ) );
        } else {
            counted += node.textContent.length;
            if ( counted >= maxLength ) {
                limitReached = true;
                if ( counted > maxLength ) {
                    node.textContent = node.textContent.slice( 0, -(counted - maxLength) )
                }

                node.textContent += append;
            }
        }
    };

    nodeHandler( container );

    return container.innerHTML;
};

Malcolm · Accepted Answer · 2011-08-30 22:57:31Z

0

I just recently finished a jQuery function to do this using the width & height of the container. Test it out and see if it works for you. I'm not yet sure of all the compatibility issues, bugs, or limitations but I've tested it in FF, Chrome, and IE7.

answered Aug 30, 2011 at 22:57

Malcolm

1

Comments

TK. · Accepted Answer · 2010-09-29 13:55:50Z

-4

That's quite challenging.

If you don't have any HTML markup, the following might be useful.

http://www.barelyfitz.com/projects/truncate/

answered Sep 29, 2010 at 13:55

TK.

28.4k20 gold badges66 silver badges72 bronze badges

1 Comment

Francisc Over a year ago

Sadly, it's FULL of HTML markup otherwise it would not be an issue.

Collectives™ on Stack Overflow

Javascript truncate HTML text

9 Answers 9

3 Comments

1 Comment

2 Comments

1 Comment

Comments

Comments

Comments

Comments

1 Comment

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

9 Answers 9

3 Comments

1 Comment

2 Comments

1 Comment

Comments

Comments

Comments

Comments

1 Comment

Your Answer

Sign up or log in

Post as a guest

Linked

Related