I'm experiencing an issue on 5.6.4 during a bulk search composed of several match queries, over an index that is not being written to.
Almost all queries in the batch end up timing out (after 1h), and I can't grasp the cause of this, the cluster has enough heap and RAM, CPU usage is below 25% and path.data points to a RAID0 mount point that iotop reports reading at 600MB/s during the search.
The log has several entries for RemoteTransportException (...) Caused by: SearchContextMissingException: No search context found for id..._
Here's a log excerpt:
[2019-01-08T22:39:00,784][INFO ][o.e.n.Node ] [esnode1] started [2019-01-08T22:39:00,999][INFO ][o.e.g.GatewayService ] [esnode1] recovered [1] indices into cluster_state [2019-01-08T22:39:10,035][INFO ][o.e.c.r.a.AllocationService] [esnode1] Cluster health status changed from [RED] to [GREEN] (reason: [shards started [[wiki1][3]] ...]). [2019-01-08T22:40:32,824][INFO ][o.e.m.j.JvmGcMonitorService] [esnode1] [gc][95] overhead, spent [253ms] collecting in the last [1s] [2019-01-08T23:04:20,700][DEBUG][o.e.a.s.TransportSearchAction] [esnode1] [919] Failed to execute fetch phase org.elasticsearch.transport.RemoteTransportException: [esnode1][127.0.0.1:9300][indices:data/read/search[phase/fetch/id]] Caused by: org.elasticsearch.search.SearchContextMissingException: No search context found for id [919] at org.elasticsearch.search.SearchService.findContext(SearchService.java:447) ~[elasticsearch-5.6.4.jar:5.6.4] at org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:414) ~[elasticsearch-5.6.4.jar:5.6.4] at org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:407) ~[elasticsearch-5.6.4.jar:5.6.4] at org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:404) ~[elasticsearch-5.6.4.jar:5.6.4] at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:69) ~[elasticsearch-5.6.4.jar:5.6.4] at org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:654) [elasticsearch-5.6.4.jar:5.6.4] at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:638) [elasticsearch-5.6.4.jar:5.6.4] at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) [elasticsearch-5.6.4.jar:5.6.4] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_191] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_191] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_191] [2019-01-08T23:06:06,766][DEBUG][o.e.a.s.TransportSearchAction] [esnode1] [775] Failed to execute fetch phase org.elasticsearch.transport.RemoteTransportException: [esnode1][127.0.0.1:9300][indices:data/read/search[phase/fetch/id]] Caused by: org.elasticsearch.search.SearchContextMissingException: No search context found for id [775] at org.elasticsearch.search.SearchService.findContext(SearchService.java:447) ...
Cluster stats:
{
"_nodes": {
"total": 1,
"successful": 1,
"failed": 0
},
"cluster_name": "escluster1",
"timestamp": 1546987424259,
"status": "green",
"indices": {
"count": 1,
"shards": {
"total": 10,
"primaries": 10,
"replication": 0.0,
"index": {
"shards": {
"min": 10,
"max": 10,
"avg": 10.0
},
"primaries": {
"min": 10,
"max": 10,
"avg": 10.0
},
"replication": {
"min": 0.0,
"max": 0.0,
"avg": 0.0
}
}
},
"docs": {
"count": 5746677,
"deleted": 0
},
"store": {
"size": "181.5gb",
"size_in_bytes": 194959969930,
"throttle_time": "0s",
"throttle_time_in_millis": 0
},
"fielddata": {
"memory_size": "0b",
"memory_size_in_bytes": 0,
"evictions": 0
},
"query_cache": {
"memory_size": "0b",
"memory_size_in_bytes": 0,
"total_count": 0,
"hit_count": 0,
"miss_count": 0,
"cache_size": 0,
"cache_count": 0,
"evictions": 0
},
"completion": {
"size": "0b",
"size_in_bytes": 0
},
"segments": {
"count": 293,
"memory": "897.2mb",
"memory_in_bytes": 940811637,
"terms_memory": "893.8mb",
"terms_memory_in_bytes": 937313273,
"stored_fields_memory": "3.2mb",
"stored_fields_memory_in_bytes": 3415152,
"term_vectors_memory": "0b",
"term_vectors_memory_in_bytes": 0,
"norms_memory": "54.9kb",
"norms_memory_in_bytes": 56256,
"points_memory": "0b",
"points_memory_in_bytes": 0,
"doc_values_memory": "26.3kb",
"doc_values_memory_in_bytes": 26956,
"index_writer_memory": "0b",
"index_writer_memory_in_bytes": 0,
"version_map_memory": "0b",
"version_map_memory_in_bytes": 0,
"fixed_bit_set": "0b",
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {
}
}
},
"nodes": {
"count": {
"total": 1,
"data": 1,
"coordinating_only": 0,
"master": 1,
"ingest": 1
},
"versions": ["5.6.4"],
"os": {
"available_processors": 4,
"allocated_processors": 4,
"names": [{
"name": "Linux",
"count": 1
}],
"mem": {
"total": "15.3gb",
"total_in_bytes": 16430301184,
"free": "171.7mb",
"free_in_bytes": 180113408,
"used": "15.1gb",
"used_in_bytes": 16250187776,
"free_percent": 1,
"used_percent": 99
}
},
"process": {
"cpu": {
"percent": 7
},
"open_file_descriptors": {
"min": 214,
"max": 214,
"avg": 214
}
},
"jvm": {
"max_uptime": "1h",
"max_uptime_in_millis": 3900907,
"versions": [{
"version": "1.8.0_191",
"vm_name": "OpenJDK 64-Bit Server VM",
"vm_version": "25.191-b12",
"vm_vendor": "Oracle Corporation",
"count": 1
}],
"mem": {
"heap_used": "1gb",
"heap_used_in_bytes": 1154171376,
"heap_max": "7.9gb",
"heap_max_in_bytes": 8555069440
},
"threads": 45
},
"fs": {
"total": "294.9gb",
"total_in_bytes": 316734971904,
"free": "113.3gb",
"free_in_bytes": 121704624128,
"available": "98.3gb",
"available_in_bytes": 105591787520,
"spins": "true"
},
"plugins": [],
"network_types": {
"transport_types": {
"netty4": 1
},
"http_types": {
"netty4": 1
}
}
}
}