Hi everyone!
When I execute an MLT query with an artificial document the "fields" field is ignored and all the fields from the artificial document are used for scoring.
For instance I built an index with these settings:
PUT /myindex
{
"mappings": {
"properties": {
"body": {
"type": "text",
"analyzer": "rebuilt_italian",
"term_vector": "yes"
},
"url": {
"type": "keyword"
},
"title": {
"type": "text",
"analyzer": "rebuilt_italian",
"term_vector": "yes"
}
}
},
"settings": {
"index": {
"analysis": {
"filter": {
"italian_stemmer": {
"type": "stemmer",
"language": "italian"
},
"italian_elision": {
"type": "elision",
"articles": [
"c",
"l",
"all",
"dall",
"dell",
"nell",
"sull",
"coll",
"pell",
"gl",
"agl",
"dagl",
"degl",
"negl",
"sugl",
"un",
"m",
"t",
"s",
"v",
"d"
],
"articles_case": "true"
},
"italian_stop": {
"type": "stop",
"stopwords": "_italian_"
}
},
"analyzer": {
"rebuilt_italian": {
"filter": [
"italian_elision",
"lowercase",
"italian_stop",
"italian_stemmer"
],
"tokenizer": "standard"
}
}
}
}
}
}
And then I add this document:
PUT /myindex/_doc/1
{
"title": "New document title",
"body": "New document body",
"url": "http://example.com"
}
When I explain the query with these parameters:
POST /myindex/_explain/1
{
"query": {
"more_like_this": {
"fields": [
"title",
"url"
],
"like": [
{
"doc": {
"title": "Document title",
"body": "Document body",
"url": "https://example.com"
}
}
],
"min_term_freq": 1,
"min_doc_freq": 1,
"max_query_terms": 12
}
}
}
I can see that the "body" field is used for scoring even if I didn't set it in the "fields".
Is this the correct behavior?
Cheers,
Mirco
edit: I'm using the version 7.15.0 of Elasticsearch