2

I need to parse 10 webpages, and catch their main contents. So I'm using node-readability and don't want to rewrite same functions(only url changes) 10 times. Finally I've to count up content lengths. How I can do it using loop or any other ideas? Now it looks like:

for(var i=0; i<catchedUrl.length; i++){
    var data = {length: 0, count: 0};
    (function(i) {
        read(catchedUrl[i], function(err, article, meta){
            if(err) throw err;

            var content = article.content;
            content = content.split(' ');
            article.close();
            data.count += 1;
            data.length += length;
            // Send data to callback when functions done
        });
    })(i);
}

2 Answers 2

3

You can use async module to simplify looping. Also please take a look to .bind() function bind documentation

So code sample to such case may look something like that

var async = require('async');

function step(number, callback) {
     [enter code here]
     callback();
}

module.exports = (job, done) => {
    var _pages = [URLS];
        async.eachSeries(_pages, (link, callback)=> {
            step(link, callback);
        }, ()=> done());
    });

};

Best regards, Egor

Sign up to request clarification or add additional context in comments.

Comments

2

Egor's answer works great.

You could also make use of co to get rid of asynchronicity:

$ npm i --save co thunkify

var co = require('co');
var read = require('node-readability');
var thunkify = require('thunkify');

var cachedUrls = [
    'http://stackoverflow.com/questions/34414539/elasticsearch-filtering-mulitple-documents-with-same-term',
    'http://stackoverflow.com/questions/34414537/selecting-multiple-values-with-multiple-where-clauses',
    'http://stackoverflow.com/questions/34414536/how-to-create-functional-test-directory-in-grails',
    'http://stackoverflow.com/questions/34414534/azure-active-directory-application-key-renewal',
    'http://stackoverflow.com/questions/34414532/store-facebook-credential-in-android-for-google-smart-lock-password',
    'http://stackoverflow.com/questions/34414531/ssis-read-flat-file-skip-first-row',
    'http://stackoverflow.com/questions/34414529/set-non-database-attribute-for-rails-model-without-attr-accessor',
    'http://stackoverflow.com/questions/34414525/excel-code-blocking-other-excel-sheets-to-open',
    'http://stackoverflow.com/questions/34414522/app-crash-when-network-connection-gone',
    'http://stackoverflow.com/questions/34414520/nest-input-inside-label-with-simple-form-and-rails-4'
];

co(function *() {

    var data = { 
        length: 0, 
        count: 0
    };

    for (var i = 0, n = cachedUrls.length; i < n; i++) {

        let response = yield thunkify(read)(cachedUrls[i]);

        data.length += response['0'].content.split(' ').length;
        data.count++;       
    }

    return data;

}).then(function(value) {
    console.log('final value:', value);
});

2 Comments

You're making use of co here. The generator function alone doesn't help anything.
@Bergi, I phrased it differently.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.