I'm using Elasticsearch 6.6 and have an index (1 shard, 1 replica) with the geonames (https://www.geonames.org/) dataset indexed (indexsize =1.3 gb, 11.8 mio geopoints).
I was playing around a bit with the geo distance sorting query, sorting the whole index for some origin points. So after some testing I saw that sorting ascending is always faster than sorting descending. here is an example query:
POST /geonames/_search?request_cache=false
{
"size":1,
"sort" : [
{
"_geo_distance" : {
"location" : [8, 49],
"order" : "asc",
"unit" : "m",
"mode" : "min",
"distance_type" : "arc",
"ignore_unmapped": true
}
}
]
}
Here is the answer for ascending sorting (with explain and profile True):
{
"took" : 1374,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 11858060,
"max_score" : null,
"hits" : [
{
"_shard" : "[geonames][0]",
"_node" : "qXTymyB9QLmxhPtGEtA_mA",
"_index" : "geonames",
"_type" : "doc",
"_id" : "L781LmkBrQo0YN4qP48D",
"_score" : null,
"_source" : {
"id" : "3034701",
"name" : "Forêt de Wissembourg",
"location" : {
"lat" : "49.00924",
"lon" : "8.01542"
}
},
"sort" : [
1523.4121312414704
],
"_explanation" : {
"value" : 1.0,
"description" : "*:*",
"details" : [ ]
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[qXTymyB9QLmxhPtGEtA_mA][geonames][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time_in_nanos" : 265223567,
"breakdown" : {
"score" : 0,
"build_scorer_count" : 54,
"match_count" : 0,
"create_weight" : 10209,
"next_doc" : 253091268,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 11858087,
"score_count" : 0,
"build_scorer" : 263948,
"advance" : 0,
"advance_count" : 0
}
}
],
"rewrite_time" : 1097,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 1044167746,
"children" : [
{
"name" : "SimpleFieldCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 508296683
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
and here for descending, just switched the parameter from asc to desc (also with profile and explain):
{
"took" : 2226,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 11858060,
"max_score" : null,
"hits" : [
{
"_shard" : "[geonames][0]",
"_node" : "qXTymyB9QLmxhPtGEtA_mA",
"_index" : "geonames",
"_type" : "doc",
"_id" : "Mq80LmkBrQo0YN4q11bA",
"_score" : null,
"_source" : {
"id" : "4036351",
"name" : "Bollons Seamount",
"location" : {
"lat" : "-49.66667",
"lon" : "-176.16667"
}
},
"sort" : [
1.970427111052182E7
],
"_explanation" : {
"value" : 1.0,
"description" : "*:*",
"details" : [ ]
}
}
]
},
"profile" : {
"shards" : [
{
"id" : "[qXTymyB9QLmxhPtGEtA_mA][geonames][0]",
"searches" : [
{
"query" : [
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time_in_nanos" : 268521404,
"breakdown" : {
"score" : 0,
"build_scorer_count" : 54,
"match_count" : 0,
"create_weight" : 9333,
"next_doc" : 256458664,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 11858087,
"score_count" : 0,
"build_scorer" : 195265,
"advance" : 0,
"advance_count" : 0
}
}
],
"rewrite_time" : 1142,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time_in_nanos" : 1898324618,
"children" : [
{
"name" : "SimpleFieldCollector",
"reason" : "search_top_hits",
"time_in_nanos" : 1368306442
}
]
}
]
}
],
"aggregations" : [ ]
}
]
}
}
So my question is, why is it like this ? As I understood Es calculates the distance from the origin point to every other point and then sorts them. So why is the descending sorting so much slower ?