I'm using es 5.4 for searching purpose. I test it on single-node cluster and I've done the warmup to take advantage of the filesystem cache. After using profile to track the query, I found that the bottleneck is the time that collector took. But I know nothing about Lucene, so is there someone who can help me find out the reason?
Here is some information that might help:
yellow open ag2 EcWa1psbQTGFQMXXEb7cxw 5 1 1858557 0 21.4gb 21.4gb
yellow open ag4 zYl27gx4SE6sxYWmKvhlbg 5 1 1858557 0 21.9gb 21.9gb
new_mappings = {
"properties": {
"title": {"type": "string", "index": "analyzed", "fielddata": {"loading": "eager"}},
"title_embed": {"type": "double", "index": "no"},
"original_title": {"type": "string", "index": "analyzed", "analyzer": "standard", "fielddata": {"loading": "eager"}},
"did3": {"type": "string", "index": "not_analyzed"},
}
}
"query" : [
{
"type" : "BooleanQuery",
"description" : "(title:布洛芬 title:怎么 title:吃) #*:*",
"time" : "92.79337100ms",
"time_in_nanos" : 92793371,
"breakdown" : {
"score" : 29564125,
"build_scorer_count" : 19,
"match_count" : 0,
"create_weight" : 617567,
"next_doc" : 61540491,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 371324,
"score_count" : 371305,
"build_scorer" : 328539,
"advance" : 0,
"advance_count" : 0
},
"children" : [
{
"type" : "BooleanQuery",
"description" : "title:布洛芬 title:怎么 title:吃",
"time" : "1.002629000ms",
"time_in_nanos" : 1002629,
"breakdown" : {
"score" : 97239,
"build_scorer_count" : 19,
"match_count" : 0,
"create_weight" : 584356,
"next_doc" : 0,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 0,
"score_count" : 274,
"build_scorer" : 223048,
"advance" : 97410,
"advance_count" : 282
},
"children" : [
{
"type" : "TermQuery",
"description" : "title:布洛芬",
"time" : "0.3505020000ms",
"time_in_nanos" : 350502,
"breakdown" : {
"score" : 39183,
"build_scorer_count" : 19,
"match_count" : 0,
"create_weight" : 228549,
"next_doc" : 0,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 0,
"score_count" : 209,
"build_scorer" : 48728,
"advance" : 33598,
"advance_count" : 215
}
},
{
"type" : "TermQuery",
"description" : "title:怎么",
"time" : "0.2092470000ms",
"time_in_nanos" : 209247,
"breakdown" : {
"score" : 9412,
"build_scorer_count" : 19,
"match_count" : 0,
"create_weight" : 173662,
"next_doc" : 0,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 0,
"score_count" : 38,
"build_scorer" : 19015,
"advance" : 7058,
"advance_count" : 42
}
},
{
"type" : "TermQuery",
"description" : "title:吃",
"time" : "0.1768110000ms",
"time_in_nanos" : 176811,
"breakdown" : {
"score" : 8493,
"build_scorer_count" : 19,
"match_count" : 0,
"create_weight" : 150380,
"next_doc" : 0,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 0,
"score_count" : 28,
"build_scorer" : 12977,
"advance" : 4881,
"advance_count" : 32
}
}
]
},
{
"type" : "MatchAllDocsQuery",
"description" : "*:*",
"time" : "20.80319200ms",
"time_in_nanos" : 20803192,
"breakdown" : {
"score" : 0,
"build_scorer_count" : 19,
"match_count" : 0,
"create_weight" : 12729,
"next_doc" : 20354734,
"match" : 0,
"create_weight_count" : 1,
"next_doc_count" : 371324,
"score_count" : 0,
"build_scorer" : 64385,
"advance" : 0,
"advance_count" : 0
}
}
]
}
],
"rewrite_time" : 5252,
"collector" : [
{
"name" : "CancellableCollector",
"reason" : "search_cancelled",
"time" : "108.8903900ms",
"time_in_nanos" : 108890390,
"children" : [
{
"name" : "SimpleTopScoreDocCollector",
"reason" : "search_top_hits",
"time" : "72.30302100ms",
"time_in_nanos" : 72303021
}
]
}
]
}
]
I don't know whether it is because the size of single doc is too large so that it took too much time to collect documents from the disk.