Suggester: How to return the top weighted documents


(Dries Cleymans) #1

Our elastic version:

{
  "name" : "instance-0000000001",
  "cluster_name" : "d90d7063ab1ce029fff8ae497e45b933",
  "cluster_uuid" : "nPbWqX0bSzGMEh84Y793Qg",
  "version" : {
    "number" : "5.5.2",
    "build_hash" : "b2f0c09",
    "build_date" : "2017-08-14T12:33:14.154Z",
    "build_snapshot" : false,
    "lucene_version" : "6.6.0"
  },
  "tagline" : "You Know, for Search"
}

When using the context suggester with weighted results, the search suggest api does not return the top weighted documents for a search term.

I notice a difference in the top ranked documents when I request more documents.

We have an index with artist documents and song documents, all for autocompletion purpose. The artists mapping looks like this:

{
  "autocompletion": {
    "mappings": {
      "autocompletion_artist": {
        "properties": {
          "artist-autocompletion": {
            "type": "completion",
            "analyzer": "t3_autocompletion_index_analyzer",
            "search_analyzer": "t3_autocompletion_search_analyzer",
            "preserve_separators": false,
            "preserve_position_increments": false,
            "max_input_length": 50,
            "contexts": [
              {
                "name": "r",
                "type": "CATEGORY",
                "path": "region-code"
              }
            ]
          },
          "artist-autocompletion-output": {
            "type": "keyword"
          },
          "group-id": {
            "type": "integer"
          },
          "group_id": {
            "type": "keyword"
          },
          "region-code": {
            "type": "keyword"
          }
        }
      }
    }
  }
}

When I request the top 3 suggestions for artist documents:

{
	"_source": ["artist-autocompletion-output"],
	"suggest": {
		"artists": {
			"completion": {
				"field": "artist-autocompletion",
				"size": 3,
				"contexts": {
					"r": ["w", "vl"]
				}
			}
		},
		"text": "prince"
	}
}

I get these results:

{
	"took": 1,
	"timed_out": false,
	"_shards": {
		"total": 1,
		"successful": 1,
		"failed": 0
	},
	"hits": {
		"total": 0,
		"max_score": 0.0,
		"hits": []
	},
	"suggest": {
		"artists": [{
			"text": "prince",
			"offset": 0,
			"length": 6,
			"options": [{
				"text": "Prince",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "510",
				"_score": 9659.0,
				"_source": {
					"artist-autocompletion-output": "Prince"
				},
				"contexts": {
					"r": ["w"]
				}
			}, {
				"text": "Prince",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "551",
				"_score": 9477.0,
				"_source": {
					"artist-autocompletion-output": "Prince & The Revolution"
				},
				"contexts": {
					"r": ["w"]
				}
			}, {
				"text": "Prince",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "7306",
				"_score": 6114.0,
				"_source": {
					"artist-autocompletion-output": "DJ Jazzy Jeff & The Fresh Prince"
				},
				"contexts": {
					"r": ["w"]
				}
			}]
		}]
	}
}

When I request 5 documents, the top 3 is already different:

{
	"took": 2,
	"timed_out": false,
	"_shards": {
		"total": 1,
		"successful": 1,
		"failed": 0
	},
	"hits": {
		"total": 0,
		"max_score": 0.0,
		"hits": []
	},
	"suggest": {
		"artists": [{
			"text": "prince",
			"offset": 0,
			"length": 6,
			"options": [{
				"text": "Prince",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "510",
				"_score": 9659.0,
				"_source": {
					"artist-autocompletion-output": "Prince"
				},
				"contexts": {
					"r": ["w"]
				}
			}, {
				"text": "Prince",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "551",
				"_score": 9477.0,
				"_source": {
					"artist-autocompletion-output": "Prince & The Revolution"
				},
				"contexts": {
					"r": ["w"]
				}
			}, {
				"text": "Prince Royce",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "40078",
				"_score": 8032.0,
				"_source": {
					"artist-autocompletion-output": "Prince Royce"
				},
				"contexts": {
					"r": ["w"]
				}
			}, {
				"text": "Prince",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "2415",
				"_score": 7913.0,
				"_source": {
					"artist-autocompletion-output": "Will Smith"
				},
				"contexts": {
					"r": ["w"]
				}
			}, {
				"text": "Prince",
				"_index": "autocompletion",
				"_type": "autocompletion_artist",
				"_id": "7306",
				"_score": 6114.0,
				"_source": {
					"artist-autocompletion-output": "DJ Jazzy Jeff & The Fresh Prince"
				},
				"contexts": {
					"r": ["w"]
				}
			}]
		}]
	}
}

As you can see, the result is correctly ordered by weight, but it are not the top weighted documents. It looks like the suggester matches the first x items and then orders those by weight.

How can make the suggester to return the top weighted documents?


(system) #2

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.