String iteration algorithms performance in jQuery Terminal

Question

I have a JavaScript library jQuery Terminal and I have internal formatting that is used to change colors and style of the text that look like this:

[[b;red;green]this is bold red text with green background]

I have a main function that is used to process strings with formatting. The main function used by other functions is interate_formatting and functions that use it are split_equal and substring.

$.terminal = {
    // ...

    // ---------------------------------------------------------------------
    // :: split text into lines with equal length so each line can be
    // :: rendered separately (text formatting can be longer then a line).
    // ---------------------------------------------------------------------
    split_equal: function split_equal(str, length, options) {
        if (typeof options === 'boolean') {
            options = {
                keepWords: options
            };
        }
        var settings = $.extend({
            trim: false,
            keepWords: false
        }, options);
        var prev_format = '';
        var result = [];
        var array = $.terminal.normalize(str).split(/\n/g);
        var have_formatting = $.terminal.have_formatting(str);
        for (var i = 0, len = array.length; i < len; ++i) {
            if (array[i] === '') {
                result.push('');
                continue;
            }
            var line = array[i];
            var get_next_character = make_next_char_fun(line);
            var first_index = 0;
            var output;
            var line_length = line.length;
            var last_bracket = /\[\[[^\]]+\](?:[^\][]|\\\])+\]$/.test(line);
            var leading_spaces = /^(&nbsp;|\s)/.test(line);
            if (!have_formatting && line_length < length) {
                result.push(line);
                continue;
            }
            $.terminal.iterate_formatting(line, function callback(data) {
                var chr, substring;
                if (data.length >= length || data.last ||
                    (data.length === length - 1 &&
                     strlen(line[data.index + 1]) === 2)) {
                    var can_break = false;
                    // TODO: this need work
                    if (settings.keepWords && data.space !== -1) {
                        // replace html entities with characters
                        var stripped = text(line).substring(data.space_count);
                        // real length, not counting formatting
                        stripped = stripped.slice(0, length).replace(/\s+$/, '');
                        var text_len = strlen(stripped);
                        if (space_re.test(stripped) || text_len < length) {
                            can_break = true;
                        }
                    }
                    // if words is true we split at last space and make next loop
                    // continue where the space where located
                    var after_index = data.index + data.size;
                    if (last_bracket) {
                        after_index += 1;
                    }
                    var new_index;
                    if (settings.keepWords && data.space !== -1 &&
                        after_index !== line_length && can_break) {
                        output = line.slice(first_index, data.space);
                        new_index = data.space - 1;
                    } else {
                        substring = line.slice(data.index);
                        chr = get_next_character(substring);
                        output = line.slice(first_index, data.index) + chr;
                        if (data.last && last_bracket && chr !== ']') {
                            output += ']';
                        }
                        new_index = data.index + chr.length - 1;
                    }
                    if (settings.trim || settings.keepWords) {
                        output = output.replace(/(&nbsp;|\s)+$/g, '');
                        if (!leading_spaces) {
                            output = output.replace(/^(&nbsp;|\s)+/g, '');
                        }
                    }
                    first_index = (new_index || data.index) + 1;
                    if (prev_format) {
                        var closed_formatting = /^[^\]]*\]/.test(output);
                        output = prev_format + output;
                        if (closed_formatting) {
                            prev_format = '';
                        }
                    }
                    var matched = output.match(format_re);
                    if (matched) {
                        var last = matched[matched.length - 1];
                        if (last[last.length - 1] !== ']') {
                            prev_format = last.match(format_begin_re)[1];
                            output += ']';
                        } else if (format_end_re.test(output)) {
                            output = output.replace(format_end_re, '');
                            prev_format = last.match(format_begin_re)[1];
                        }
                    }
                    result.push(output);
                    // modify loop by returing new data
                    return {index: new_index, length: 0, space: -1};
                }
            });
        }
        return result;
    },
    // ---------------------------------------------------------------------
    // :: formatting aware substring function
    // ---------------------------------------------------------------------
    substring: function substring(string, start_index, end_index) {
        var chars = $.terminal.split_characters(string);
        if (!chars.slice(start_index, end_index).length) {
            return '';
        }
        if (!$.terminal.have_formatting(string)) {
            return chars.slice(start_index, end_index).join('');
        }
        var start = 0;
        var end;
        var start_formatting = '';
        var end_formatting = '';
        var prev_index;
        var offset = 1;
        $.terminal.iterate_formatting(string, function callback(data) {
            if (start_index && data.count === start_index + 1) {
                start = data.index;
                if (data.formatting) {
                    start_formatting = data.formatting;
                }
            }
            if (end_index && data.count === end_index) {
                end_formatting = data.formatting;
                prev_index = data.index;
                offset = data.size;
            }
            if (data.count === end_index + 1) {
                end = data.index;
                if (data.formatting) {
                    end = prev_index + offset;
                }
            }
        });
        if (start_index && !start) {
            return '';
        }
        if (end === undefined) {
            end = string.length;
        }
        string = start_formatting + string.slice(start, end);
        if (end_formatting) {
            string = string.replace(/(\[\[^\]]+)?\]$/, '');
            string += ']';
        }
        return string;
    },
    // ---------------------------------------------------------------------
    // :: helper function used by substring and split_equal it loop over
    // :: string and execute callback with text count and other data
    // ---------------------------------------------------------------------
    iterate_formatting: function iterate_formatting(string, callback) {
        function is_any_space(str) {
            return str === ' ' || str === '\t' || str === '\n';
        }
        // ----------------------------------------------------------------
        function is_space(i) {
            if (!have_entities) {
                return is_any_space(string[i - 1]);
            }
            return string.slice(i - 6, i) === '&nbsp;' ||
                is_any_space(string[i - 1]);
        }
        // ----------------------------------------------------------------
        function match_entity(index) {
            if (!have_entities) {
                return null;
            }
            return string.slice(index).match(entity_re);
        }
        // ----------------------------------------------------------------
        function is_open_formatting(i) {
            return string[i] === '[' && string[i + 1] === '[';
        }
        // ----------------------------------------------------------------
        function is_escape_bracket(i) {
            return string[i - 1] !== '\\' && string[i] === '\\' &&
                string[i + 1] === ']';
        }
        // ----------------------------------------------------------------
        function is_bracket(i) {
            return string[i] === ']' || string[i] === '[';
        }
        // ----------------------------------------------------------------
        function is_text(i) {
            return (not_formatting && !opening &&
                    ((string[i] !== ']' && !closing_formatting) ||
                     !have_formatting)) || (in_text && !formatting);
        }
        // ----------------------------------------------------------------
        // :: function will skip to next character in main loop
        // :: TODO: improve performance of emoji regex and check whole
        // :: string it's complex string if not use simple function
        // ----------------------------------------------------------------
        var get_next_character = make_next_char_fun(string);
        function next_iteration() {
            var char = get_next_character(substring);
            if (char.length > 1 && $.terminal.length(substring) > 1) {
                return char.length - 1;
            }
            return 0;
        }
        // ----------------------------------------------------------------
        function is_next_space() {
            return (is_space(i) && (not_formatting || opening)) &&
                (space === -1 && prev_space !== i || space !== -1);
        }
        // ----------------------------------------------------------------
        // :: last iteration or one before closing formatting
        // ----------------------------------------------------------------
        var last = false;
        function is_last() {
            if (i === string.length - 1 && !last) {
                last = true;
            } else {
                last = formatting && !!substring.match(/^.]$/);
            }
            return last;
        }
        // ----------------------------------------------------------------
        var have_formatting = $.terminal.have_formatting(string);
        var have_entities = entity_re.test(string);
        var formatting = '';
        var in_text = false;
        var count = 0;
        var match;
        var space = -1;
        var space_count = -1;
        var prev_space;
        var length = 0;
        var offset = 0;
        var re_ent = /(&[^;]+);$/;
        for (var i = 0; i < string.length; i++) {
            var substring = string.slice(i);
            var closing_formatting = false;
            match = substring.match(format_start_re);
            if (match) {
                formatting = match[1];
                in_text = false;
            } else if (formatting) {
                if (string[i] === ']') {
                    closing_formatting = in_text;
                    if (in_text) {
                        formatting = '';
                        in_text = false;
                    } else {
                        in_text = true;
                    }
                }
            } else {
                in_text = true;
            }
            var not_formatting = (formatting && in_text) || !formatting;
            var opening = is_open_formatting(i);
            if (is_next_space()) {
                space = i;
                space_count = count;
            }
            var braket = is_bracket(i);
            offset = 0;
            if (not_formatting) {
                // treat entity as one character
                if (string[i] === '&') {
                    match = match_entity(i);
                    if (match) {
                        i += match[1].length - 2; // 2 because continue adds 1 to i
                        continue;
                    }
                    ++count;
                    ++length;
                } else if (is_escape_bracket(i)) {
                    // escape \] and \\ counts as one character
                    ++count;
                    ++length;
                    offset = 1;
                    i += 1;
                } else if (!braket || !have_formatting || (in_text && !formatting)) {
                    ++count;
                    ++length;
                }
            }
            if (is_text(i)) {
                if (strlen(string[i]) === 2) {
                    length++;
                }
                var char = get_next_character(substring);
                var size = char.length;
                // begining of enity that we've skipped, we are at the end
                if (char === ';') {
                    match = string.slice(0, i + 1).match(re_ent);
                    if (match) {
                        offset = match[1].length;
                        size = offset + 1;
                    }
                }
                var data = {
                    last: is_last(),
                    count: count,
                    index: i - offset,
                    formatting: formatting,
                    length: length,
                    text: in_text,
                    size: size,
                    space: space,
                    space_count: space_count
                };
                var ret = callback(data);
                if (ret === false) {
                    break;
                } else if (ret) {
                    if (ret.count !== undefined) {
                        count = ret.count;
                    }
                    if (ret.length !== undefined) {
                        length = ret.length;
                    }
                    if (ret.space !== undefined) {
                        prev_space = space;
                        space = ret.space;
                    }
                    if (ret.index !== undefined) {
                        i = ret.index;
                        continue;
                    }
                }
            } else if (i === string.length - 1 && !last) {
                // last iteration, if formatting have last bracket,
                // from formatting, then last iteration
                // was already called (in if) #550
                callback({
                    last: true,
                    count: count + 1,
                    index: i,
                    formatting: formatting,
                    length: 0,
                    text: in_text,
                    space: space
                });
            }
            // handle emoji, suroggate pairs and combine characters
            if (in_text) {
                i += next_iteration();
            }
        }
    },
    // ...
};

I need some advice on how to optimize that code. The code works fine for short strings but it the strings get longer it's really slow and when I have an animation with 0 delay is visible that the library slows down when it needs to process longer strings.

split_equal function split the string into lines with optional whitespace wrapping. substring is slower when it needs to return characters from the end of the string.

I need general advice on how to optimize this code to make it faster. I was thinking maybe of splitting the string into individual formatting and text between and process which may be faster because I can find the right position in the string without the need to iterate over the whole string.

I know that there is a lot of code but maybe someone can help.

The issue of performance is tracked on GitHub: https://github.com/jcubic/jquery.terminal/issues/820

NOTE I was suggested to ask here.

Jess · Accepted Answer · 2023-06-02 23:57:45Z

The linked bug gave a good repro case, so I ran the profiler.

Apologies for the wide image, but it shows what's taking the most time in a slow call to split_equal. The leaf nodes on this graph are mostly next_char calling test_re, definitions below:

    function starts_with(match) {
        return match && match.index === 0;
    }

    function make_re_fn(re) {
        return function test_re(string) {
            var m = string.match(re);
            if (starts_with(m)) {
                return m[1];
            }
        };
    }

    function make_next_char_fun(string) {
        var tests = [];
        [
            entity_re,
            emoji_re,
            combine_chr_re
        ].forEach(function(re) {
            if (re.test(string)) {
                tests.push(make_re_fn(re));
            }
        });
        if (astral_symbols_re.test(string)) {
            tests.push(function test_astral(string) {
                var m1 = string.match(astral_symbols_re);
                if (starts_with(m1)) {
                    var m2 = string.match(combine_chr_re);
                    if (m2 && m2.index === 1) {
                        return string.slice(0, 3);
                    }
                    return m1[1];
                }
            });
        }
        return function next_char(string) {
            for (var i = 0; i < tests.length; ++i) {
                var test = tests[i];
                var ret = test(string);
                if (ret) {
                    return ret;
                }
            }
            return string[0];
        };
    }

It does seem that test_re is getting longer and longer strings with more iterations, which is possibly the most important thing to fix (do you really need to process thousands of characters each char you print?).

But I'll note what you're doing with the regex and these big input strings is:

Match the entire string against a regex
Check if the match is at the start of the string
If so, return the first match group

Performance-wise you'll probably be better served using a regex starting with ^, which might not be correct in all usages here, but I copied emoji_re and a thousand characters of text into a benchmarking tool and it became an order of magnitude faster when I added the caret.

Awesome thank you. Will check how it performs I hope that the animation will be usable. If not maybe I will just substring the initial string with like 10 characters it should be plenty to cover all emoji. — jcubic
– jcubic, Commented Jun 4, 2023 at 11:17
I just tested, and after refactoring regular expressions my animation took 29 seconds to run from the original 77 seconds. And it doesn't trigger a Violation warning anymore. It still slows down but only once after a few lines. — jcubic
– jcubic, Commented Jun 7, 2023 at 20:58

Stack Exchange Network

String iteration algorithms performance in jQuery Terminal

1 Answer 1

You must log in to answer this question.

Hot Network Questions

String iteration algorithms performance in jQuery Terminal

1 Answer 1

You must log in to answer this question.

Related

Hot Network Questions