Boosting buckets in aggregation ElasticSearch

33 Views Asked by At

I have an elasticsearch query that I am using to group by results by a text field called category in my documents. One more key in my _docs is called id of type integer. Now, there are two requirements: (1) that my aggregated results (bucket response) must contain the buckets/categories to which certain "id"s belong and then the rest of the _docs can follow within the same category / other categories; and (2) that the results must be sorted as per the priority and itemPrioirty fields which are also integers. Here is my query that returns 3 categories at a time and 10 _docs within each category at a time:

{
    "size": 0,
    "sort": [
        {
            "itemPriority": "asc"
        },
        {
            "priority": "asc"
        }
    ],
    "query": {
        "bool": {
            "should": [
                {
                    "terms": {
                        "id": [
                            8061848,
                            8061847,
                            8061846,
                            8061845,
                            8061844,
                            8061843,
                            8061842
                        ]
                    }
                }
            ],
            "must": [
                {
                    "match_all": {}
                },
                {
                    "nested": {
                        "path": "zipData.zipDistribution",
                        "query": {
                            "bool": {
                                "must_not": [
                                    {
                                        "match_phrase_prefix": {
                                            "zipData.zipDistribution.flags.itemStatus": "Removed from catalog"
                                        }
                                    },
                                    {
                                        "match_phrase_prefix": {
                                            "zipData.zipDistribution.flags.itemStatus": "Out of Stock"
                                        }
                                    }
                                ],
                                "must": [
                                    {
                                        "match": {
                                            "zipData.zipDistribution.zip": "55311"
                                        }
                                    }
                                ]
                            }
                        }
                    }
                }
            ]
        }
    },
    "aggs": {
        "categories": {
            "terms": {
                "field": "category",
                "size": 100
            },
            "aggs": {
                "filtered_docs": {
                    "top_hits": {
                        "_source": {
                            "includes": [
                                "id",
                                "name"
                            ]
                        },
                        "from": 0,
                        "size": 10
                    }
                },
                "bucket_sort": {
                    "bucket_sort": {
                        "from": 0,
                        "size": 3
                    }
                }
            }
        }
    }
}

...the issue is that this does not re-arrange the results of aggregation as per the id's array I have added in the terms clause. Moreover, If instead of using should I push the term clause within must, I get only the buckets with these id and no other _doc is returned. To summarise, I need a solution where I first get the categories with these ids and then the rest of the _docs/categories can follow. And the entire data needs to be sorted at the end based on the two fields mentioned above, priority and itemPriority. Please help!

1

There are 1 best solutions below

0
G0l0s On

I satisfied your first requirement with a runtime field

Mapping simplified

PUT /categories
{
    "mappings": {
        "properties": {
            "id": {
                "type": "integer"
            },
            "category": {
                "type": "keyword"
            }
        }
    }
}

Documents

PUT /categories/_bulk
{"create":{"_id":1}}
{"id": 8001, "category": "1"}
{"create":{"_id":2}}
{"id": 8002, "category": "2"}
{"create":{"_id":3}}
{"id": 8003, "category": "3"}
{"create":{"_id":4}}
{"id": 8004, "category": "2"}
{"create":{"_id":5}}
{"id": 8005, "category": "1"}
{"create":{"_id":6}}
{"id": 8006, "category": "1"}

Aggregation query with a script

GET /categories/_search?filter_path=aggregations
{
    "runtime_mappings": {
        "category_filterable": {
            "type": "keyword",
            "script": {
                "source": """
                    long longDocumentId = doc['id'].value;
                    Integer documentId = Integer.valueOf((int) longDocumentId);
                    List certainIds = params.certain_ids;
                    String documentCategory = doc['category'].value;
                    
                    if (certainIds.contains(documentId)) {
                        emit(documentCategory);
                    } else {
                        String categoryNameFormat = params.other_category_name_format;
                        def[] args = [documentCategory].toArray();
                        emit(String.format(categoryNameFormat, args));
                    }
                """,
                "params": {
                    "certain_ids": [
                        8001,
                        8002,
                        8004,
                        8005
                    ],
                    "other_category_name_format": "%s_other_doc"
                }
            }
        }
    },
    "aggs": {
        "per_category": {
            "terms": {
                "field": "category_filterable"
            }
        }
    }
}

Response

{
    "aggregations" : {
        "per_category" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
                {
                    "key" : "1",
                    "doc_count" : 2
                },
                {
                    "key" : "2",
                    "doc_count" : 2
                },
                {
                    "key" : "1_other_doc",
                    "doc_count" : 1
                },
                {
                    "key" : "3_other_doc",
                    "doc_count" : 1
                }
            ]
        }
    }
}