Elasticsearch sort returns incorrect results?

Why elasticsearch sort returns incorrect results?

Mappings

{
  "mappings": {
    "_doc": {
      "properties": {
        "name": {
          "type": "keyword",
          "fields": {
            "sort": {
              "type": "icu_collation_keyword",
              "index": false,
              "language": "tr",
              "country": "TR"
            }
          }
        }
      }
    }
  }
}

Settings

{
  "settings": {
    "index": {
      "routing": {
        "allocation": {
          "include": {
            "_tier_preference": "data_content"
          }
        }
      },
      "number_of_shards": "1",
      "provided_name": "users",
      "creation_date": "1676401943168",
      "number_of_replicas": "1",
      "uuid": "s7VK9J1FQNi1k6yChiTLlg",
      "version": {
        "created": "7170899"
      }
    }
  },
  "defaults": {
    "index": {
      "flush_after_merge": "512mb",
      "final_pipeline": "_none",
      "max_inner_result_window": "100",
      "unassigned": {
        "node_left": {
          "delayed_timeout": "1m"
        }
      },
      "max_terms_count": "65536",
      "rollup": {
        "source": {
          "name": "",
          "uuid": ""
        }
      },
      "lifecycle": {
        "name": "",
        "parse_origination_date": "false",
        "step": {
          "wait_time_threshold": "12h"
        },
        "indexing_complete": "false",
        "rollover_alias": "",
        "origination_date": "-1"
      },
      "routing_partition_size": "1",
      "force_memory_term_dictionary": "false",
      "max_docvalue_fields_search": "100",
      "merge": {
        "scheduler": {
          "max_thread_count": "2",
          "auto_throttle": "true",
          "max_merge_count": "7"
        },
        "policy": {
          "floor_segment": "2mb",
          "max_merge_at_once_explicit": "30",
          "max_merge_at_once": "10",
          "max_merged_segment": "5gb",
          "expunge_deletes_allowed": "10.0",
          "segments_per_tier": "10.0",
          "deletes_pct_allowed": "33.0"
        }
      },
      "max_refresh_listeners": "1000",
      "max_regex_length": "1000",
      "load_fixed_bitset_filters_eagerly": "true",
      "number_of_routing_shards": "1",
      "write": {
        "wait_for_active_shards": "1"
      },
      "verified_before_close": "false",
      "mapping": {
        "coerce": "false",
        "nested_fields": {
          "limit": "50"
        },
        "depth": {
          "limit": "20"
        },
        "field_name_length": {
          "limit": "9223372036854775807"
        },
        "total_fields": {
          "limit": "1000"
        },
        "nested_objects": {
          "limit": "10000"
        },
        "ignore_malformed": "false",
        "dimension_fields": {
          "limit": "16"
        }
      },
      "source_only": "false",
      "soft_deletes": {
        "enabled": "true",
        "retention": {
          "operations": "0"
        },
        "retention_lease": {
          "period": "12h"
        }
      },
      "max_script_fields": "32",
      "query": {
        "default_field": [
          "*"
        ],
        "parse": {
          "allow_unmapped_fields": "true"
        }
      },
      "format": "0",
      "frozen": "false",
      "sort": {
        "missing": [],
        "mode": [],
        "field": [],
        "order": []
      },
      "priority": "1",
      "codec": "default",
      "max_rescore_window": "10000",
      "max_adjacency_matrix_filters": "100",
      "analyze": {
        "max_token_count": "10000"
      },
      "gc_deletes": "60s",
      "top_metrics_max_size": "10",
      "optimize_auto_generated_id": "true",
      "max_ngram_diff": "1",
      "hidden": "false",
      "translog": {
        "generation_threshold_size": "64mb",
        "flush_threshold_size": "512mb",
        "sync_interval": "5s",
        "retention": {
          "size": "-1",
          "age": "-1"
        },
        "durability": "REQUEST"
      },
      "auto_expand_replicas": "false",
      "mapper": {
        "dynamic": "true"
      },
      "recovery": {
        "type": ""
      },
      "requests": {
        "cache": {
          "enable": "true"
        }
      },
      "data_path": "",
      "highlight": {
        "max_analyzed_offset": "1000000"
      },
      "routing": {
        "rebalance": {
          "enable": "all"
        },
        "allocation": {
          "include": {
            "_tier": ""
          },
          "disk": {
            "watermark": {
              "ignore": "false"
            }
          },
          "exclude": {
            "_tier": ""
          },
          "require": {
            "_tier": ""
          },
          "enable": "all",
          "total_shards_per_node": "-1"
        }
      },
      "search": {
        "slowlog": {
          "level": "TRACE",
          "threshold": {
            "fetch": {
              "warn": "-1",
              "trace": "-1",
              "debug": "-1",
              "info": "-1"
            },
            "query": {
              "warn": "-1",
              "trace": "-1",
              "debug": "-1",
              "info": "-1"
            }
          }
        },
        "idle": {
          "after": "30s"
        },
        "throttled": "false"
      },
      "fielddata": {
        "cache": "node"
      },
      "default_pipeline": "_none",
      "max_slices_per_scroll": "1024",
      "shard": {
        "check_on_startup": "false"
      },
      "xpack": {
        "watcher": {
          "template": {
            "version": ""
          }
        },
        "version": "",
        "ccr": {
          "following_index": "false"
        }
      },
      "percolator": {
        "map_unmapped_fields_as_text": "false"
      },
      "allocation": {
        "max_retries": "5",
        "existing_shards_allocator": "gateway_allocator"
      },
      "refresh_interval": "1s",
      "indexing": {
        "slowlog": {
          "reformat": "true",
          "threshold": {
            "index": {
              "warn": "-1",
              "trace": "-1",
              "debug": "-1",
              "info": "-1"
            }
          },
          "source": "1000",
          "level": "TRACE"
        }
      },
      "compound_format": "0.1",
      "blocks": {
        "metadata": "false",
        "read": "false",
        "read_only_allow_delete": "false",
        "read_only": "false",
        "write": "false"
      },
      "max_result_window": "10000",
      "store": {
        "stats_refresh_interval": "10s",
        "type": "",
        "fs": {
          "fs_lock": "native"
        },
        "preload": [],
        "snapshot": {
          "snapshot_name": "",
          "index_uuid": "",
          "cache": {
            "prewarm": {
              "enabled": "true"
            },
            "enabled": "true",
            "excluded_file_types": []
          },
          "repository_uuid": "",
          "uncached_chunk_size": "-1b",
          "index_name": "",
          "partial": "false",
          "blob_cache": {
            "metadata_files": {
              "max_length": "64kb"
            }
          },
          "repository_name": "",
          "snapshot_uuid": ""
        }
      },
      "queries": {
        "cache": {
          "enabled": "true"
        }
      },
      "shard_limit": {
        "group": "normal"
      },
      "warmer": {
        "enabled": "true"
      },
      "max_shingle_diff": "3",
      "query_string": {
        "lenient": "false"
      }
    }
  }
}

Query

GET users/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "name": {
        "order": "asc"
      }
    }
  ]
}

And Result

{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 5,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [
      {
        "_index" : "users",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : null,
        "_source" : {
          "name" : "Haydar"
        },
        "sort" : [
          "Haydar"
        ]
      },
      {
        "_index" : "users",
        "_type" : "_doc",
        "_id" : "3",
        "_score" : null,
        "_source" : {
          "name" : "Rasim"
        },
        "sort" : [
          "Rasim"
        ]
      },
      {
        "_index" : "users",
        "_type" : "_doc",
        "_id" : "4",
        "_score" : null,
        "_source" : {
          "name" : "zeynep"
        },
        "sort" : [
          "zeynep"
        ]
      },
      {
        "_index" : "users",
        "_type" : "_doc",
        "_id" : "2",
        "_score" : null,
        "_source" : {
          "name" : "Ümit"
        },
        "sort" : [
          "Ümit"
        ]
      },
      {
        "_index" : "users",
        "_type" : "_doc",
        "_id" : "5",
        "_score" : null,
        "_source" : {
          "name" : "İsmail"
        },
        "sort" : [
          "İsmail"
        ]
      }
    ]
  }
}

Hi @Fatih_Erol1 and welcome to the community!

Looking at the mappings and response, that sorting is happening alphabetically (ASCII) and correctly on the "name" field resulting in the sort order of

"Haydar, Rasim, zeynep, Ümit, İsmail"

In your mappings, you have created a second field called name.sort which has the Turkey language and country.

If you choose to sort by the name.sort field, it gives a different sort order - is this one more aligned to your expectations? I'm not entirely familiar with the turkey language and the character sets:

Query:

GET users/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "name.sort": {
        "order": "asc"
      }
    }
  ]
}

Results:

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 5,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "users",
        "_id": "vqmzVYYBU8q3txBYWS3x",
        "_score": null,
        "_source": {
          "name": "Haydar"
        },
        "sort": [
          """ᰕᚌՉ䀐倎瀤"""
        ]
      },
      {
        "_index": "users",
        "_id": "wqmzVYYBU8q3txBYWS3x",
        "_score": null,
        "_source": {
          "name": "İsmail"
        },
        "sort": [
          """ᴧႊ䝈倎ᠤ"""
        ]
      },
      {
        "_index": "users",
        "_id": "v6mzVYYBU8q3txBYWS3x",
        "_score": null,
        "_source": {
          "name": "Rasim"
        },
        "sort": [
          """☕ᎎ䡀႐໠ """
        ]
      },
      {
        "_index": "users",
        "_id": "wamzVYYBU8q3txBYWS3x",
        "_score": null,
        "_source": {
          "name": "Ümit"
        },
        "sort": [
          """⦁ႎ䨀ႀะᰀ"""
        ]
      },
      {
        "_index": "users",
        "_id": "wKmzVYYBU8q3txBYWS3x",
        "_score": null,
        "_source": {
          "name": "zeynep"
        },
        "sort": [
          """⸙ᚑى倈⠀"""
        ]
      }
    ]
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.