13

I am basically trying to show all records of an index type. Now, if you use match_all() in query elasticsearch shows 10 results by default. One can show all results using scroll. I am trying to implement scroll api, but can't get it to work. It is showing only 10 results, my code:

module.exports.searchAll = function (searchData, callback) {

client.search({
    index: 'test',
    type: 'records',
    scroll: '10s',
    //search_type: 'scan', //if I use search_type then it requires size otherwise it shows 0 result
    body: {
        query: {
            "match_all": {}
        }
    }
}, function (err, resp) {
    client.scroll({
        scrollId: resp._scroll_id,
        scroll: '10s'
    }, callback(resp.hits.hits));
});
}

Can anyone help, please?

7 Answers 7

24

You need to repeatedly call client.scroll until no more records are returned. There's a good example in the elasticsearch documentation. I've reproduced their example code below, slightly modified to match your question

var allRecords = [];

// first we do a search, and specify a scroll timeout
client.search({
  index: 'test',
  type: 'records',
  scroll: '10s',
  body: {
     query: {
         "match_all": {}
     }
  }
}, function getMoreUntilDone(error, response) {
  // collect all the records
  response.body.hits.hits.forEach(function (hit) {
    allRecords.push(hit);
  });

  if (response.body.hits.total.value !== allRecords.length) {
    // now we can call scroll over and over
    client.scroll({
      scroll_id: response.body._scroll_id,
      scroll: '10s'
    }, getMoreUntilDone);
  } else {
    console.log('all done', allRecords);
  }
});
Sign up to request clarification or add additional context in comments.

6 Comments

I tried this before asking this question here, but it didn't work. I figured it out though. If I use search_type: 'scan' then nothing shows up in output, if you omit that then this works
Good investigation @Jane. I've updated the code so that future googlers can get some working code from the outset.
Which param is correct, scrollId or scroll_id in scroll function?
Very helpful !! Simple and running perfectly.
This query can only handle small datsets, if you know your querying a large dataset, you should split the query according to the documentation: elastic.co/guide/en/elasticsearch/reference/6.7/…
|
14

Thanks @Ceilingfish. Here's a modified ES6 version of the above using await

let allRecords = [];

// first we do a search, and specify a scroll timeout
var { _scroll_id, hits } = await esclient.search({
    index: 'test',
    type: 'records',
    scroll: '10s',
    body: {
        query: {
            "match_all": {}
        },
        _source: false
    }
})

while(hits && hits.hits.length) {
    // Append all new hits
    allRecords.push(...hits.hits)

    console.log(`${allRecords.length} of ${hits.total}`)

    var { _scroll_id, hits } = await esclient.scroll({
        scrollId: _scroll_id,
        scroll: '10s'
    })
}

console.log(`Complete: ${allRecords.length} records retrieved`)

2 Comments

Which param is correct, scrollId or scroll_id in scroll function?
i checked and for me, scroll_id is working
4

Query for getting all data from elastic search using Node.js client using scroll with async/await.

const elasticsearch = require('@elastic/elasticsearch');
async function esconnection(){
  let es =  await new elasticsearch.Client({
    node: "http://192.168.1.1:7200"
  });
  return es;
}
async function getAllUserList(){
    try{
        let userArray = [];
        let query ={
            "query":{
                "match_all": {}
            }
        }   
        let es = await esconnection();
        let {body}=  await es.search({
                    index: 'esIndex',
                    type :"esIndexType",           
                    scroll :'2m', //# Specify how long a consistent view of the index should be maintained for scrolled search
                    size: 100,    //  # Number of hits to return (default: 10)
                    body: query
                    });
        let sid = body['_scroll_id']
        let scroll_size = body['hits']['total']
        let dataLength = body['hits']['hits'].length
        while (scroll_size > 0){
        for(let i=0; i<dataLength;i++){
            if(body['hits']['hits'][i])
            {
            let userData = (body['hits']['hits'][i]['_source'])
            userArray.push(userData)
            }
        }
        sid = body['_scroll_id']
        body = await es.scroll({
            scrollId: sid,
            scroll: '10s'
        })
        body=body.body
        scroll_size = (body['hits']['hits']).length;
        }
        es.close();
        return userArray;
    }  catch(error){
        console.log("Code not working properly: ",`${error}`)
    }
}

Comments

1

NodeJS failed when elastic had over 10000 results. This is how i used scroll.

async function getResultsFromElastic() {
    let responseAll = {};
    responseAll["hits"] = {};
    responseAll.hits.hits = [];
    const responseQueue = [];

    searchQuery = {
                    index: 'test',
                    type: 'records',
                    body: { 
                            query: {
                               "match_all": {}
                            }
                    }
    }
    searchQuery.scroll='10s';
    searchQuery.size=10000;

    responseQueue.push(await esclient.search(searchQuery));

    while (responseQueue.length) {
      const response = responseQueue.shift();

      responseAll.hits.hits = responseAll.hits.hits.concat(response.hits.hits);

      if (response.hits.total == responseAll.hits.hits.length) {
        break;
      }

      // get the next response if there are more to fetch
      responseQueue.push(
        await esclient.scroll({
          scrollId: response._scroll_id,
          scroll: '30s'
        })
      );
    }

    return responseAll;
}

Comments

1

I guess we can use yield as well, E.g.

/**
 * 
 * @param {object} elasticClient 
 * @param {{index: string, scroll: string, size: number: body: object}} searchQuery 
 */
async function* getRecords(elasticClient, searchQuery) {
  const response = await elasticClient.search(searchQuery);  
  const responseQueue = [];
  let counter = 0;
  
  responseQueue.push(response);
  while(responseQueue.length) {
    const { body } = responseQueue.shift();

    counter += body.hits.hits.length;
    for(const hit of body.hits.hits) {
      yield hit;
    }

    if (body.hits.total.value === counter) {
      break;
    }
    
    responseQueue.push(
      await elasticClient.scroll({
        scrollId: body._scroll_id,
        scroll: searchQuery.scroll
      })
    )
  }
}

then your query

const body = { query: {"match_all": {}} } };
for await (const record of getRecords(elasticClient, {index: 'test', scroll: '30s', size: 100, body})) {
    console.log(record);
}

1 Comment

this one rocks!
0

This is what i use with Promises

var EsHelper = function() {
    this.esUrl = esUrl;
    this.indexName = "myIndex";
    this.type = "myIndexType";
    this.elasticClient = new elasticsearch.Client({
        host: esUrl
    });
};

EsHelper.prototype.scrollData = function(response, allHits) {
    return new Promise((resolve, reject) => {
        response.hits.hits.forEach((hit) => allHits.push(hit));
        if (response.hits.total !== allHits.length) {
            this.elasticClient.scroll({
                scroll_id: response._scroll_id,
                scroll: '10s',
            }).then((response) => {
                resolve(this.scrollData(response, allHits));
            }).catch((error) => reject(error));
        } else {
            resolve(allHits);
        }
    });
};

EsHelper.prototype.runSearchWithScroll = function(query) {
    var allHits = [];
    return this.elasticClient.search({
            index: this.indexName,
            type: this.type,
            scroll: '10s',
            body: query
        })
        .then((response) => (this.scrollData(response, allHits)))
        .then((result) => {
            return result;
        });
};

Any better way ?

Comments

0

There are many well-written answers here that solve the problem. But if someone is looking for an out of the box solution, they can head over here and use this package - https://github.com/alcacoop/elasticsearch-scroll-stream

The usage is pretty simple and it just works beautifully. Below is an example I took from their official documentation.

const elasticsearch = require('elasticsearch');
const ElasticsearchScrollStream = require('elasticsearch-scroll-stream');

const client = new elasticsearch.Client();

const es_stream = new ElasticsearchScrollStream(client, {
  index: 'your-index',
  type: 'your-type',
  scroll: '10s',
  size: '50',
  _source: ['name'],
  q: 'name:*'
});

es_stream.pipe(process.stdout);

es_stream.on('data', function(data) {
  // Process your results here
});

es_stream.on('end', function() {
  console.log("End");
});

3 Comments

I tried, but no data come in the block of >>es_stream.on('data', function(data) { // Process your results here });
What is the elasticsearch version you are using?
elasticsearch version = "^16.4.0"

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.