4

I am working on a project that needs an async function that's roughly equivalent to the following

async function task(url) {

    var r1 = await fetch(url).then(resp => resp.text());

    var r2 = await fetch(url + "/" + r1).then(resp => resp.json());

    //r2 is an array of urls

    var total = 0;
    for (var u of r2) {
        tmp = await fetch(u).then(resp => resp.text());
        total += parseInt(tmp)
    }
    return total
}

The issue is that there are hundreds of elements in r2, each of the element is an URL. If I do it sequentially, this function will take a loooong time to complete. I would like to run 10 URLs concurrently (could be adjusted to other numbers), wonder how would I rewrite the async function.

1
  • is r2 an array or a plain object? Commented Jun 23, 2018 at 5:06

4 Answers 4

3

Chunk the initial array into pieces of 10, then wait for each chunk to complete with Promise.all before starting the next one:

async function getTotal(urlPart, subArr) {
  const resps = await Promise.all(subArr.map(url =>
    fetch(url).then(resp => resp.json())
  ))
  return resps.reduce((a, b) => a + b);
}

async function task(url) {
  const r1 = await fetch(url).then(resp => resp.text());
  const r2 = await fetch(url + "/" + r1).then(resp => resp.json());

  const chunks = [];
  const { length } = r2
  for (let i = 0; i < length; i += 10) {
    chunks.push(r2.slice(i, i + 10));
  }
  let total = 0;
  for (const subArr of chunks) {
    total += await getTotal(urlPart, subarr);
  }
  return total;
}
Sign up to request clarification or add additional context in comments.

Comments

2

Here's some code I created years ago that allows you to create a "parallel" queue

const makeQueue = length => {
    length = (isNaN(length) || length < 1) ? 1 : length;
    const q = Array.from({length}, () => Promise.resolve());
    let index = 0;
    const add = cb => {
        index = (index + 1) % length;
        return (q[index] = q[index].then(() => cb()));
    };
    return add;
};

This will allow up to 10 simultaneous requests (or whatever you pass in as the argument)

In your code, I guess you could use it like

async function task(url) {
    const q = makeQueue(10); // 10 requests at a time

    var r1 = await fetch(url).then(resp => resp.text());

    var r2 = await fetch(url + "/" + r1).then(resp => resp.json());

    return Promise.all(r2.map(u => q(() => fetch(u).then(resp => resp.text())))).then(v => v.map(parseInt).reduce((a, b) => a+b));
}

the return can also be

return Promise.all(r2.map(u => q(() => fetch(u).then(resp => resp.text()).then(parseInt)))).then(v => v.reduce((a, b) => a+b));

broken down that is equivalent of

const fetch1 = u => fetch(u).then(resp => resp.text()).then(parseInt);

const promises = r2.map(u => q(() => fetch1(u)));

return Promise.all(promises).then(v => v.reduce((a, b) => a+b));

The benefit of this method is that there should be 10 requests "on the go" for a maximum amount of time

Note, browsers tend to limit the number of simultaneous requests per host, so you may not see any improvement with queue size greater than 6 (I think that's the most common limit)

1 Comment

I probably left that in there accidentally :p originally had total = await Promise.all(...); return total;
1

Appreciate all the good answers here! I studied them and come up with the following solution which I think is slightly simpler (for many of us beginners) :-)

This solution doesn't divid all the url-fetching jobs in the beginning because it's uncertain how much time each url-fetching will take. Instead it makes each worker go through all the urls, if a url is assigned to another worker, it will just move on to next one.

var tasks
var total = 0
var gId = 0
var workerId
manager(4)
async function manager(numOfWorkers) {
    var workers = []
    tasks = r2.map(function(u) {return {id: gId++, assigned: -1, url: u }})
    for (var i=0; i<numOfWorkers; i++) { workers.push(worker()) }
    await Promise.all(workers)
    console.log(total)
}
async function worker() {
    var wid = workerId; workerId ++;
    var tmp;
    for (var u of tasks) {
        if (u.assigned == -1) {
            u.assigned = wid;
            console.log("unit " + u.id + " assigned to " + wid)
            tmp = await fetch(u.url).then(r=>r.text())
            total += parseInt(tmp);
        }
    }
}

Comments

0

In short, ditch the await. By using await, you are literally telling it to wait here until it is done with this one thing.

If you want to parallelize them, make use of Promise.all(). Any async function returns a Promise which can still be used like a normal Promise. Promise.all() accepts an array of Promise objects, and will call then() once all of those requests are done, giving you an array of the results from each.

You could do something like this:

const urls = [/* bunch of URLs */];

Promise.all(
 urls.map(url => 
   fetch(url).then(res => res.text())
 )
).then(results => /* do something with results */)

In this case, results will be an array of the results from your various requests, in the same order as they were passed in.

Now, if you want to be able to have a specific number of them running at a time, you'd want to change it up a bit and have some limits on what's going on.

I usually use a technique which just uses a simple counter to keep track of how many are active, and then fires off more when they are done.

You can do something like this:

// dummy fetch for example purposes, resolves between .2 and 3 seconds
const fakeFetch = url => new Promise(resolve => setTimeout(() => resolve(url), Math.random() * 2800 + 200));

const inputUrls = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
const limit = 2; // this sets the limit of how many can run at once, set to 10 to run 10 concurrently
const delay = 100; // delay in ms between each batch starting

function fetchAll(urls) {
  let active = 0;
  let queue = urls.slice(0); // clone urls
  
  // inner function so urls and results can be shared with all calls
  function fetchAllInner() {
    if (active < limit && queue.length) {
      const count = Math.min(limit - active, queue.length);
      const urlsThisBatch = queue.slice(0, count);
      queue = queue.slice(count); // remaining
      
      return Promise.all(
        urlsThisBatch.map(url => {
          active++; // increment active
          console.log('start', url);
          return fakeFetch(url)
            .then(r => {
              console.log('done', url);
              active--; // decrement active
              return new Promise(resolve => // new Promise to promisify setTimeout 
                setTimeout(() => 
                  resolve(fetchAllInner() // kicks off run again when one finishes
                    .then(fetchR => [].concat(r, fetchR)) // combine them
                  ), delay
                )
              );
            })
        })
      ).then(r => r.reduce((a, u) => [].concat(u, a), [])); // flatten from Promise.all()
    }
    
    return Promise.resolve([]); // final resolve
  }
  
  return fetchAllInner();
}

fetchAll(inputUrls)
  .then(results => console.log('all done', results));

In a nutshell, what this is doing is it'll create a Promise.all() for a batch (however many we can start up until we hit our limit). Then, when one finishes, it'll set a timeout to start up another batch by recursively calling the same function. It's wrapped in another function simply to avoid having to have some variables be global.

This also has an added delay if you want, so you can throttle how many requests you'll make and not hammer the system too bad. If you don't want to use a delay, you can just set it to 0 or remove the new Promise(resolve => setTimeout bit.

The above version is a bit verbose to make it easier to understand. Here is a more "production-ready" version (be sure to switch fakeFetch to fetch and handle calling res.text())

const fakeFetch = url => new Promise(resolve => setTimeout(() => resolve(url), Math.random() * 2800 + 200));

function fetchAll(urls, limit = 10, delay = 200) {
  let active = 0;
  const queue = urls.splice(0);

  function fetchAllInner() {
    if (active >= limit || !queue.length) {
      return Promise.resolve([]);
    }
    
    const count = Math.min(limit - active, queue.length);
    active = limit;
    
    return Promise.all(
      queue.splice(0, count)
        .map(url => fakeFetch(url)
          .then(r => {
            active--;
            return new Promise(resolve =>
              setTimeout(() => resolve(
                fetchAllInner().then(fetchR => [].concat(r, fetchR))
              ), delay)
            );
          })
        )
      ).then(r => 
        r.reduce((a, u) => [].concat(u, a), []));
  }
  
  return fetchAllInner();
}

console.log('give it a few seconds');
fetchAll(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
  .then(r => console.log('all done', r))

2 Comments

That is what my code does. Just changed the limit from 2 to 10. I used 2 for the demo since I didn't want to add 100 "urls". EDIT: Added a comment to make it clearer what to change for 10.
yeah, sorry, didn't see that because I was looking for 10 :p

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.