2

I'm currently learning Node.js, javascript and so on. I come from C++.

I need to parse an array such as:

======================================================================================================
No.    Name                   Cask                Current         Latest          Auto-Update  State
======================================================================================================
1/38   5KPlayer               5kplayer            latest          latest
2/38   Adobe Photoshop CC     adobe-photoshop-cc  16              16
3/38   Alfred                 alfred              3.3.1_806       3.3.2_818       Y            ignored
4/38   AppCleaner             appcleaner          3.4             3.4             Y            ignored
5/38   Github Atom            atom                1.15.0          1.15.0          Y            ignored
6/38   BetterZipQL            betterzipql         latest          latest
7/38   Boom                   boom                1.6,1490693621  1.6,1490693621
8/38   CheatSheet             cheatsheet          1.2.7           1.2.7
9/38   Cyberduck              cyberduck           5.4.0.23761     5.4.0.23761
10/38  Dropbox                dropbox             21.4.25         latest          Y            ignored

This is a list of apps installed on an Mac, 1 line per app.

If the app is outdated ('current' != 'latest'), I keep the line and make an object out of it for later treatement.

I came up with a dirty -yet working- solution :

function parseBrewCUArray(array) {
    var toUpdate = [];
    var lines = array.split('\n');

    //remove useless lines
    lines = lines.slice(3);
    for (var i=0; i<lines.length; i++) {
        splittedLine = lines[i].split(/[ ]{2,}/);
        if (splittedLine[3] != splittedLine[4]) {
            toUpdate.push(splittedLine)
            console.log(splittedLine);
        }
    }
}

But there must be a very much better solution out there! Can someone optimize this a bit, making this piece of code more beautiful?

1 Answer 1

1

your code can be simplified to the following:

//keeps only the header and the rows where Current !== Latest 
function parseBrewCUArray(str) {
    return str.split('\n').filter((row, index) => {
        if(index < 3) return true;
        
        var cols = row.split(/ {2,}/);
        return cols[3] !== cols[4]
    }).join("\n");
}

var s = `======================================================================================================
No.    Name                   Cask                Current         Latest          Auto-Update  State
======================================================================================================
1/38   5KPlayer               5kplayer            latest          latest
2/38   Adobe Photoshop CC     adobe-photoshop-cc  16              16
3/38   Alfred                 alfred              3.3.1_806       3.3.2_818       Y            ignored
4/38   AppCleaner             appcleaner          3.4             3.4             Y            ignored
5/38   Github Atom            atom                1.15.0          1.15.0          Y            ignored
6/38   BetterZipQL            betterzipql         latest          latest
7/38   Boom                   boom                1.6,1490693621  1.6,1490693621
8/38   CheatSheet             cheatsheet          1.2.7           1.2.7
9/38   Cyberduck              cyberduck           5.4.0.23761     5.4.0.23761
10/38  Dropbox                dropbox             21.4.25         latest          Y            ignored`;

console.log(parseBrewCUArray(s));

But usually I'd prefer to parse the string first into some usable datastructure and then proceed with that

// first the utilities:

//most of the time I want null and undefined to be cast to an empty String not to "null"/"undefined".
var string = value => value == null? "": String(value);

//a utility to define replacements
var replace = (pattern, replacement="") => value => string(value).replace(pattern, replacement);

//escapes special chars that have a special meaning in Regular expressions
var escapeForRegex = replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");

//RegExp#exec() is awful to be used manually
//too much boilerplate code and I ended too many times in an infinite loop
function matchAll(haystack, needle){
    var str = string(haystack), 
        regex = needle instanceof RegExp?
            needle:
            new RegExp(escapeForRegex(needle), "g"),
        results = [], 
        lastMatchIndex = NaN,
        match;
    
    while((match = regex.exec(str)) && lastMatchIndex !== match.index){
        results.push(match);
        lastMatchIndex = match.index;
    }
    return results;
}


//a generic function that takes a table where columns ain't defined by a seperator but by their alignment
//removes every row that doesn't contain at least one letter or number
//parses the first row to determine the column names and their offsets
//returns an array of objects with the column names as properties
//doesn't handle tabs, because there are too many standards about how wide a tab may be
function parseTableByAlignment(str, allowSingleSpacesInTitle=false){
    var end,
        rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g),
        removeTrailingDots = replace(/[\.:\s]+$/, ""),
        parseRow = new Function("row", "return {\n" + matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g)
            .reduceRight((acc, match) => {
                var row = JSON.stringify( removeTrailingDots( match[0] ) ) + ": row.substring(" + match.index + ", " + end + ").trim()";
                end = match.index;
                return "    " + row + ",\n" + acc;
        }, "}"));

    return rows.map(parseRow);
}

var s = `
======================================================================================================
No.    Name                   Cask                Current         Latest          Auto-Update  State
======================================================================================================
1/38   5KPlayer               5kplayer            latest          latest
2/38   Adobe Photoshop CC     adobe-photoshop-cc  16              16
3/38   Alfred                 alfred              3.3.1_806       3.3.2_818       Y            ignored
4/38   AppCleaner             appcleaner          3.4             3.4             Y            ignored
5/38   Github Atom            atom                1.15.0          1.15.0          Y            ignored
6/38   BetterZipQL            betterzipql         latest          latest
7/38   Boom                   boom                1.6,1490693621  1.6,1490693621
8/38   CheatSheet             cheatsheet          1.2.7           1.2.7
9/38   Cyberduck              cyberduck           5.4.0.23761     5.4.0.23761
10/38  Dropbox                dropbox             21.4.25         latest          Y            ignored
`;

var data = parseTableByAlignment(s);
console.log(data.filter(item => item.Current !== item.Latest));

Using the Function constructor and evaluating a string as code ... well, you get mostly negative responses on that. So I add a second implementation of parseTableByAlignment() without this Function constructor. The result is still the same:

function parseTableByAlignment(str, allowSingleSpacesInTitle=false){
    var previousColumn,
        rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g),
        removeTrailingDots = replace(/[\.:\s]+$/, ""),
        columns = matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g)
            .map(match => {
                if(previousColumn) previousColumn.end = match.index;
                return previousColumn = {
                    name: removeTrailingDots( match[0] ),
                    start: match.index,
                    end: undefined
                };
            });

    return rows.map(row => columns.reduce((obj, column) => {
        obj[column.name] = row.substring(column.start, column.end).trim();
        return obj;
    }, {}));
}
Sign up to request clarification or add additional context in comments.

1 Comment

Thanks a lot for this answer, way beyond what I expected. I really like the JSON-ized approach.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.