Hi! I'm having hard times to make shard cache work for my aggregations. I've spent hours to figure out what the cause and I'm afraid I'm out of ideas. My indices.requests.cache.size: 10%
and there is 4 machines dedicated for ES with 32GB heap and total 64GB memory available per machine.
It is highly possible that most of the searches are unique so they cause so huge eviction rate? But it's not like there is lots of them. Barely 1.2M per day (googlebot & co goes through products aggregation website).
Typical Request uses multi-search agg + hits query ( I expect agg query to be cached/fetched from cache – works locally on a single machine but not on production )
(Poor) Cache usage stats:
Nodes stats:
Fields: https://www.elastic.co/guide/en/elasticsearch/reference/6.5/cat-nodes.html
$ curl -X GET "localhost:9200/_cat/nodes?v&h=id,v,m,dup,hp,hm,rp,rm,fm,fe,qcm,qce,rcm,rce,rchc,rcmc,ft,idc,idto,iic,iito,iif,mc,mt,mtd,rto,scrcc"
id v m dup hp hm rp rm fm fe qcm qce rcm rce rchc rcmc ft idc idto iic iito iif mc mt mtd rto scrcc
E-Tc 6.2.4 - 14.22 38 31.8gb 99 62.9gb 1.4mb 0 28.3mb 426320 3.1gb 55784 290 121309 40 0 0 0 57 0 0 3 239 271 4
8krG 6.2.4 - 14.30 40 31.8gb 99 62.9gb 1.4mb 0 30.4mb 537357 3.1gb 50737 302 121381 42 0 0 0 51 0 0 1 542 274 4
8jT2 6.2.4 * 14.36 40 31.8gb 99 62.9gb 1.4mb 0 29mb 535770 3.1gb 56134 322 121574 39 0 0 0 60 0 0 1 389 266 4
yDct 6.2.4 - 14.26 40 31.8gb 99 62.9gb 1.4mb 0 30.1mb 527464 3.1gb 51137 332 121294 37 0 0 0 42 0 0 3 1659 254 4
jUjg 6.2.4 - 13.99 40 31.8gb 97 62.9gb 1.4mb 0 31.1mb 436395 3.1gb 50837 273 121193 43 0 0 0 60 0 0 2 5140 283 4
Cluster stats:
$ curl -X GET "localhost:9200/_cluster/stats?human&pretty"
{
"_nodes" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"cluster_name" : "elasticsearch",
"timestamp" : 1547182159269,
"status" : "green",
"indices" : {
"count" : 10,
"shards" : {
"total" : 150,
"primaries" : 50,
"replication" : 2.0,
"index" : {
"shards" : {
"min" : 15,
"max" : 15,
"avg" : 15.0
},
"primaries" : {
"min" : 5,
"max" : 5,
"avg" : 5.0
},
"replication" : {
"min" : 2.0,
"max" : 2.0,
"avg" : 2.0
}
}
},
"docs" : {
"count" : 8247168,
"deleted" : 3755256
},
"store" : {
"size" : "171.4gb",
"size_in_bytes" : 184121016371
},
"fielddata" : {
"memory_size" : "7.3mb",
"memory_size_in_bytes" : 7691488,
"evictions" : 0
},
"query_cache" : {
"memory_size" : "146.8mb",
"memory_size_in_bytes" : 153961819,
"total_count" : 140669771,
"hit_count" : 57522292,
"miss_count" : 83147479,
"cache_size" : 24892,
"cache_count" : 2135173,
"evictions" : 2110281
},
"completion" : {
"size" : "0b",
"size_in_bytes" : 0
},
"segments" : {
"count" : 2798,
"memory" : "546.3mb",
"memory_in_bytes" : 572872596,
"terms_memory" : "424.1mb",
"terms_memory_in_bytes" : 444726409,
"stored_fields_memory" : "33mb",
"stored_fields_memory_in_bytes" : 34607912,
"term_vectors_memory" : "0b",
"term_vectors_memory_in_bytes" : 0,
"norms_memory" : "9.3mb",
"norms_memory_in_bytes" : 9805824,
"points_memory" : "11.7mb",
"points_memory_in_bytes" : 12332739,
"doc_values_memory" : "68mb",
"doc_values_memory_in_bytes" : 71399712,
"index_writer_memory" : "0b",
"index_writer_memory_in_bytes" : 0,
"version_map_memory" : "0b",
"version_map_memory_in_bytes" : 0,
"fixed_bit_set" : "0b",
"fixed_bit_set_memory_in_bytes" : 0,
"max_unsafe_auto_id_timestamp" : -1,
"file_sizes" : { }
}
},
"nodes" : {
"count" : {
"total" : 5,
"data" : 5,
"coordinating_only" : 0,
"master" : 5,
"ingest" : 5
},
"versions" : [
"6.2.4"
],
"os" : {
"available_processors" : 80,
"allocated_processors" : 80,
"names" : [
{
"name" : "Linux",
"count" : 5
}
],
"mem" : {
"total" : "314.5gb",
"total_in_bytes" : 337764552704,
"free" : "4.2gb",
"free_in_bytes" : 4548927488,
"used" : "310.3gb",
"used_in_bytes" : 333215625216,
"free_percent" : 1,
"used_percent" : 99
}
},
"process" : {
"cpu" : {
"percent" : 135
},
"open_file_descriptors" : {
"min" : 1356,
"max" : 1365,
"avg" : 1361
}
},
"jvm" : {
"max_uptime" : "42.3m",
"max_uptime_in_millis" : 2542560,
"versions" : [
{
"version" : "1.8.0_191",
"vm_name" : "OpenJDK 64-Bit Server VM",
"vm_version" : "25.191-b12",
"vm_vendor" : "Oracle Corporation",
"count" : 5
}
],
"mem" : {
"heap_used" : "55.9gb",
"heap_used_in_bytes" : 60060317832,
"heap_max" : "159.4gb",
"heap_max_in_bytes" : 171231805440
},
"threads" : 886
},
"fs" : {
"total" : "2.1tb",
"total_in_bytes" : 2342573998080,
"free" : "1.9tb",
"free_in_bytes" : 2128665124864,
"available" : "1.8tb",
"available_in_bytes" : 2009311944704
},
"plugins" : [
{
"name" : "analysis-icu",
"version" : "6.2.4",
"description" : "The ICU Analysis plugin integrates Lucene ICU module into elasticsearch, adding ICU relates analysis components.",
"classname" : "org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin",
"extended_plugins" : [ ],
"has_native_controller" : false,
"requires_keystore" : false
},
{
"name" : "analysis-stempel",
"version" : "6.2.4",
"description" : "The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.",
"classname" : "org.elasticsearch.plugin.analysis.stempel.AnalysisStempelPlugin",
"extended_plugins" : [ ],
"has_native_controller" : false,
"requires_keystore" : false
}
],
"network_types" : {
"transport_types" : {
"netty4" : 5
},
"http_types" : {
"netty4" : 5
}
}
}
}
and finally current tasks are (Index is not updating at the moment):
Thanks for any suggestions!