query for elasticsearch returning count

Question

I am struggling to create the query/rule that will help me create an alerting script. I want to query the elasticsearch API for counts on a specific index so that I can get alerted when the count reaches a certain threshold. The following query is an attempt as I have no experience with this:

{
"query": {
 "filtered": {
  "query": {
    "query_string": {
      "analyze_wildcard": true,
      "query": "*"
    }
  },
  "filter": {
    "bool": {
      "must": [
        {
          "query": {
            "match": {
              "PStream": {
                "query": "*",
                "type": "phrase"
              }
            }
          }
        },
        {
          "range": {
            "@timestamp": {
              "gte": 1447789445320,
              "lte": 1447793045320
            }
          }
        }
      ],
      "must_not": []
     }
    }
   }
  },
   "highlight": {
   "pre_tags": [
   "@kibana-highlighted-field@"
  ],
   "post_tags": [
   "@/kibana-highlighted-field@"
  ],
  "fields": {
    "*": {}
   },
  "fragment_size": 2147483647
 },
  "size": 500,
   "sort": [
  {
  "@timestamp": {
    "order": "desc",
    "unmapped_type": "boolean"
  }
 }
],
"aggs": {
 "2": {
  "date_histogram": {
    "field": "@timestamp",
    "interval": "1m",
    "pre_zone": "-05:00",
    "pre_zone_adjust_large_interval": true,
    "min_doc_count": 0,
    "extended_bounds": {
      "min": 1447789445317,
      "max": 1447793045317
    }
  }
 }
},

The field PStream is the field that I am focused on

EDIT:

An example of the data going to the index:

{
 "_index": "logstash-2015.11.17",
 "_type": "logs",
 "_id": "AVEXMKu2YVnF1NOjr9YT",
 "_score": null,
 "_source": {
 "authorUrl": "",
 "postUrl": "",
 "pubDate": "2015-11-17T15:18:24",
 "scrapeDate": "2015-11-17T15:44:03",
 "clientId": "136902834",
 "query": "Jenny Balatsinou",
 "PType": "post",
 "tLatency": 1539,
 "PLang": "en",
 "PStream": "864321",
 "PName": "xStackOverflow",
 "@version": "1",
 "@timestamp": "2015-11-17T20:44:03.400Z"
},
"fields": {
"@timestamp": [
  1447793043400
],
"pubDate": [
  1447773504000
],
"scrapeDate": [
  1447775043000
  ]
 },
"sort": [
 1447793043400
]

there are about 20 million of these messages getting indexed daily into Elasticsearch. I have created a dashboard in Kibana where I view this data and stats. I would like to write the proper query that I can use in a java program that periodically runs and checks this index using this query. It should return the hourly total count grouped by the PStream variable which has multiple values. So anytime the value is 0 it will send an alert.

Eg. Output:

"result": {
  "total": 74,
  "successful": 63,
  "failed": 11,
    {
         {
        "index": "logstash-2015.11.08",
        "PStream": "37647338933",
        "Count":   1234532
          },
          {
        "index": "logstash-2015.11.08",
        "PStream": "45345343566",
        "Count":   156532
          },

It's not clear to me what you are trying to do here. Could you post your mapping (or at least the relevant portions of it), some sample docs, and an example of the output you are hoping to achieve? — Sloan Ahrens
– Sloan Ahrens, Commented Nov 17, 2015 at 21:09
I added an example message and tried to explain my dilemma a little better — vbNewbie
– vbNewbie, Commented Nov 17, 2015 at 21:36
Hmm, I can't think of way to get ES to tell you which combinations of values don't exist. I mean, you could do a "PStream" terms aggregation with a date histogram inside, or the other way around. Or you could filter by a date-time range and get a list of "PStream" values in that range, but it's only going to tell you which terms do exist, not which ones don't. — Sloan Ahrens
– Sloan Ahrens, Commented Nov 17, 2015 at 21:54
I guess I could do it that way. Take a aggregation with date and when ever count = 0, then thats what trigger the alert. Thanks for taking the time to look at this. — vbNewbie
– vbNewbie, Commented Nov 17, 2015 at 22:22
Yeah, it makes the client-side code more involved, for sure. — Sloan Ahrens
– Sloan Ahrens, Commented Nov 17, 2015 at 22:27

Sloan Ahrens · Accepted Answer · 2015-11-17 23:04:21Z

As a quick example (per comments above), I just set up a trivial index:

DELETE /test_index

PUT /test_index

added some (simplified) data:

PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"PStream": "864321","@timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":2}}
{"PStream": "864321","@timestamp": "2015-11-17T21:44:03.400Z"}
{"index":{"_id":3}}
{"PStream": "864321","@timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":4}}
{"PStream": "864322","@timestamp": "2015-11-17T21:44:03.400Z"}

And now I can get the "PStream" terms inside an hour histogram:

POST /test_index/_search
{
    "size": 0, 
     "aggs" : {
        "timestamp_histogram" : {
            "date_histogram" : {
                "field" : "@timestamp",
                "interval" : "hour"
            },
            "aggs": {
                "pstream_terms": {
                    "terms": {
                        "field": "PStream"
                    }
                }
            }
        }
    }
}
...
{
   "took": 6,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "timestamp_histogram": {
         "buckets": [
            {
               "key_as_string": "2015-11-17T20:00:00.000Z",
               "key": 1447790400000,
               "doc_count": 2,
               "pstream_terms": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                     {
                        "key": "864321",
                        "doc_count": 2
                     }
                  ]
               }
            },
            {
               "key_as_string": "2015-11-17T21:00:00.000Z",
               "key": 1447794000000,
               "doc_count": 2,
               "pstream_terms": {
                  "doc_count_error_upper_bound": 0,
                  "sum_other_doc_count": 0,
                  "buckets": [
                     {
                        "key": "864321",
                        "doc_count": 1
                     },
                     {
                        "key": "864322",
                        "doc_count": 1
                     }
                  ]
               }
            }
         ]
      }
   }
}

or the other way around:

POST /test_index/_search
{
   "size": 0,
   "aggs": {
      "pstream_terms": {
         "terms": {
            "field": "PStream"
         },
         "aggs": {
            "timestamp_histogram": {
               "date_histogram": {
                  "field": "@timestamp",
                  "interval": "hour"
               }
            }
         }
      }
   }
}
...
{
   "took": 5,
   "timed_out": false,
   "_shards": {
      "total": 5,
      "successful": 5,
      "failed": 0
   },
   "hits": {
      "total": 4,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "pstream_terms": {
         "doc_count_error_upper_bound": 0,
         "sum_other_doc_count": 0,
         "buckets": [
            {
               "key": "864321",
               "doc_count": 3,
               "timestamp_histogram": {
                  "buckets": [
                     {
                        "key_as_string": "2015-11-17T20:00:00.000Z",
                        "key": 1447790400000,
                        "doc_count": 2
                     },
                     {
                        "key_as_string": "2015-11-17T21:00:00.000Z",
                        "key": 1447794000000,
                        "doc_count": 1
                     }
                  ]
               }
            },
            {
               "key": "864322",
               "doc_count": 1,
               "timestamp_histogram": {
                  "buckets": [
                     {
                        "key_as_string": "2015-11-17T21:00:00.000Z",
                        "key": 1447794000000,
                        "doc_count": 1
                     }
                  ]
               }
            }
         ]
      }
   }
}

Here's the code I used:

http://sense.qbox.io/gist/6c0c30db1cf0fb8529bcfec21c0ce5c02a5ae94c

Collectives™ on Stack Overflow

query for elasticsearch returning count

1 Answer 1

1 Comment

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

1 Comment

Your Answer

Sign up or log in

Post as a guest

Related