0

I have following structure in elastic search index

      {
    "_index" : "hotel",
    "_type" : "_doc",
    "_id" : "13171",
    "_score" : 6.072218,
    "_source" : {
      "_class" : "hotel",
      "id" : 13171,
      "places" : [
        {
          "type" : "MAIN_LOCATION",
          "placeId" : 2032
        }
      ],
      "numberOfRecommendations" : 0
    }
  },
  {
    "_index" : "hotel",
    "_type" : "_doc",
    "_id" : "7146",
    "_score" : 6.072218,
    "_source" : {
      "_class" : "hotel",
      "id" : 7146,
      "places" : [
        {
          "type" : "MAIN_LOCATION",
          "placeId" : 2032
        }
      ],
      "numberOfRecommendations" : 1
    }  
  },
  {
    "_index" : "hotel",
    "_type" : "_doc",
    "_id" : "7146",
    "_score" : 6.072218,
    "_source" : {
      "_class" : "hotel",
      "id" : 7146,
      "places" : [
        {
          "type" : "AFFILIATE",
          "placeId" : 2032
        }
      ],
      "numberOfRecommendations" : 3
    }  
  }

Note the places is nested type and has two types "Main Location" and affiliate. I am creating an aggregation to count the hotels for a particular place and total number of recommendations for main location.

In the above example for main location i should get hotels 2 and numberOfRecommendations 1

I am using java and have created the following code

  public List<PlaceHotelStats> getHotelOfferStats() {

// Create aggregation filter for considering only places with PlaceType from filter(in current
// case main location)
String placeFilterAggregationName = "placeFilter";
BoolQueryBuilder nestedPlaceQuery = boolQuery();
nestedPlaceQuery.must(termQuery("places.type", "MAIN_LOCATION"));
nestedPlaceQuery.must(termsQuery("places.placeId", filter.getPlaceIds()));
AggregationBuilder placeAggregationFilter =
    AggregationBuilders.filters(placeFilterAggregationName, nestedPlaceQuery);

// Add Terms filter to group by field placeId and then add sub aggregation for
// totalRecommendations to have buckets
String aggregationGroupByPlaceId = "group_by_place_id";
var includedPlaceIds = filter.getPlaceIds().stream().mapToLong(l -> l).toArray();
TermsAggregationBuilder aggregationBuilders =
    AggregationBuilders.terms(aggregationGroupByPlaceId)
        .field("places.placeId")
        .size(filter.getPlaceIds().size())
        .includeExclude(new IncludeExclude(includedPlaceIds, null))
        .subAggregation(
            AggregationBuilders.sum("totalRecommendationsForPlace")
                .field("numberOfRecommendations"));

// Add place term aggregation along with recommendation to Filter aggregation
placeAggregationFilter.subAggregation(aggregationBuilders);

// The final aggregration which has filter first then subaggregation of place terms with buckets
// and review counts
var nestedPlacesAggregation =
    AggregationBuilders.nested(NESTED_PLACES_AGGREGATION_NAME, PLACES)
        .subAggregation(placeAggregationFilter);
var query =
    new NativeSearchQueryBuilder()
        .withQuery(builder.query())
        .addAggregation(nestedPlacesAggregation)
        .build();

var result = elasticsearchOperations.search(query, EsHotel.class, ALIAS_COORDS);

if (!result.hasAggregations()) {
  throw new IllegalStateException("No aggregations found after query with aggregations!");
}

ParsedFilters aggregationParsedFilters =
    ((ParsedNested) result.getAggregations().get(NESTED_PLACES_AGGREGATION_NAME))
        .getAggregations()
        .get(placeFilterAggregationName);
var buckets =
    ((ParsedTerms)
            aggregationParsedFilters
                .getBuckets()
                .get(0)
                .getAggregations()
                .get(aggregationGroupByPlaceId))
        .getBuckets();

List<PlaceHotelStats> placeHotelStats= new ArrayList<>();
buckets.forEach(
    bucket ->
        placeHotelStats.add(
            new PlaceHotelStats(
                bucket.getKeyAsNumber().longValue(),
                Math.toIntExact(bucket.getDocCount()),
                getTotalRecommendationsForPlace(bucket))));

return placeOfferStats;

}

  private int getTotalRecommendationsForPlace(Terms.Bucket bucket) {
    var aggregationTotalRecommendation =
        bucket.getAggregations().get("totalRecommendationsForPlace");
    if (aggregationTotalRecommendation != null) {
      return (int) ((ParsedSum) aggregationTotalRecommendation).getValue();
    }
    return 0;
  }

This gives me correct count of total places but not correct sum of all recommendations

I check elastic search query and it looks something like this

{
  "query": {
  "bool" : {
    "must" : [
      {
        "nested" : {
          "query" : {
            "bool" : {
              "must" : [
                {
                  "term" : {
                    "places.type" : {
                      "value" : "MAIN_LOCATION",
                      "boost" : 1.0
                    }
                  }
                },
                {
                  "terms" : {
                    "places.placeId" : [
                      7146
                    ],
                    "boost" : 1.0
                  }
                }
              ],
              "adjust_pure_negative" : true,
              "boost" : 1.0
            }
          },
          "path" : "places",
          "ignore_unmapped" : false,
          "score_mode" : "min",
          "boost" : 1.0
        }
      },
      
      
      {
        "nested" : {
          "query" : {
            "exists" : {
              "field" : "places",
              "boost" : 1.0
            }
          },
          "path" : "places",
          "ignore_unmapped" : false,
          "score_mode" : "none",
          "boost" : 1.0
        }
      }
    ],
    "adjust_pure_negative" : true,
    "boost" : 1.0
  }
},
"aggs": {
  "nestedPlaces":{
    "nested":{"path":"places"},
    "aggregations":{
      "placeFilter":{
        "filters":{
          "filters":[{
            "bool":{
              "must":[{
                "term":{"places.type":{"value":"MAIN_LOCATION","boost":1.0}}},
                {"terms":{"places.placeId":[7146],"boost":1.0}}],
                "adjust_pure_negative":true,
                "boost":1.0}
            
          }],
          "other_bucket":false,
          "other_bucket_key":"_other_"},
          "aggregations":{
            "group_by_place_id":{
              "terms":{
                "field":"places.placeId",
                "size":193,
                "min_doc_count":1,
                "shard_min_doc_count":0,
                "show_term_doc_count_error":false,
                "order":[
                  {"_count":"desc"},
                  {"_key":"asc"}],
                  "include":["7146"]},
                  "aggregations":{
                    "totalRecommendationsForPlace":{
                      "sum":{
                        "field":"numberOfRecommendations"
                        
                      }
                      
                    }
                    
                  }
              
            }
            
          }
        
      }
      
    }
    
  }
  
}
}

Current output of the query is where total hotels are corrent but totalrecommendations are wrong and is always 0 which means subaggregation is not working as expected

"aggregations" : {
    "nestedPlaces" : {
      "doc_count" : 7,
      "placeFilter" : {
        "buckets" : [
          {
            "doc_count" : 3,
            "group_by_place_id" : {
              "doc_count_error_upper_bound" : 0,
              "sum_other_doc_count" : 0,
              "buckets" : [
                {
                  "key" : 2032,
                  "doc_count" : 3,
                  "totalRecommendationsForPlace" : {
                    "value" : 0.0
                  }
                }
              ]
            }
          }
        ]
      }
    }
  }

Not sure where I went wrong

2
  • It's easier to help you out if you show the raw DSL query ;-) Commented May 16, 2023 at 11:02
  • @Val I added the query Commented May 16, 2023 at 11:06

1 Answer 1

1

Your query is basically correct up to the point where you try to get the sum of numberOfRecommendations. Since that field is at the root-level of the document and not inside the nested document itself, you need to first add a reverse_nested aggregation to get back to the top-level document, and then only you can have your sum aggregation, like this:

        "group_by_place_id": {
          "terms": {
            "field": "places.placeId",
            "size": 193,
            "min_doc_count": 1,
            "shard_min_doc_count": 0,
            "show_term_doc_count_error": false,
            "order": [
              {
                "_count": "desc"
              },
              {
                "_key": "asc"
              }
            ],
            "include": [
              "7146"
            ]
          },
          "aggregations": {
            "back_to_root": {               <----- add this
              "reverse_nested": {},         <----- add this
              "aggs": {
                "totalRecommendationsForPlace": {
                  "sum": {
                    "field": "numberOfRecommendations"
                  }
                }
              }
            }
          }
        }

PS: if you can have a different number of recommendations based on the type (main location or affiliate), then you should have that number at the nested level and your query would work as is.

Sign up to request clarification or add additional context in comments.

1 Comment

Ofcourse.. Makes sense now. Thank you for pointer.. Upvoted and marked as answer

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.