Elasticsearch - how to make shorter phrase more relevant in result

Hi can somebody explain me please how to achieve higher score in search result for shorter phrase in compare with longer phrase? E.g. if I have two words ABCXXX and ABCXXXXX with edge ngram tokenizer and if I want to find ABC token score is the same . I would like to get higher score for ABCXXX and lower for ABCXXXXXX. Mapping looks like:

    {
        "settings": {
            "index": {
                "refresh_interval": "1m",
                "number_of_shards": "1",
                "number_of_replicas": "1",
                "analysis": {
                    "filter": {
                        "autocomplete_filter": {
                            "type": "edge_ngram",
                            "min_gram": "1",
                            "max_gram": "20"
                        }
                    },
                    "analyzer": {
                        "autocomplete": {
                            "filter": [
                                "lowercase",
                                "asciifolding",
                                "autocomplete_filter"
                            ],
                            "type": "custom",
                            "tokenizer": "standard"
                        },
                        "default": {
                            "filter": [
                                "lowercase",
                                "asciifolding"
                            ],
                            "type": "custom",
                            "tokenizer": "standard"
                        }
                    }
                }
            }
        },
        "mappings": {
            "doc": {
                "dynamic": "strict",
                "_all": {
                    "enabled": false
                },
                "properties": {
                    "normalized2": {
                        "type": "text",
                        "analyzer": "autocomplete"
                    }
                }
            }
        }
    }

and the query looks like:

{
    "sort": {
        "_score": "desc"
    },
    "query": {
        "bool": {
            "should": [
                {
                    "term": {
                        "normalized2": {
                            "value": "abc",
                            "boost": 2
                        }
                    }
                }
            ]
        }
    }
}

This post has a good solution: instead of using the edge_ngram filter, use the edge_ngram tokenizer:

  "settings": {
    "index": {
      "refresh_interval": "1m",
      "number_of_shards": "1",
      "number_of_replicas": "1",
      "analysis": {
        "tokenizer": {
          "autocomplete_tokenizer": {
            "type": "edge_ngram",
            "min_gram": "1",
            "max_gram": "20"
          }
        },
        "analyzer": {
          "autocomplete": {
            "filter": [
              "lowercase",
              "asciifolding"
            ],
            "type": "custom",
            "tokenizer": "autocomplete_tokenizer"
          },
          "default": {
            "filter": [
              "lowercase",
              "asciifolding"
            ],
            "type": "custom",
            "tokenizer": "standard"
          }
        }
      }
    }
  }

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.