ES 6.2.3 performance issues vs ES 2.3.4

We are facing severe performance issue with ES 6.2.3 in comparison to ES 2.3.4. Not sure, what we are missing. We were planning to shift to ES6 and have setup a new 6.2.3 cluster with 20 m5d.4xlarge data, 9 m5.4xlarge client & 5 c5.xlarge master nodes. Indexes from current ES2 cluster were copied and delta updates pipeline setup is inplace. We moved 5% of prod search traffic to this new ES6 cluster and found continuous spikes in p95 latency graphs. Also, the latencies are very high in comparison to ES2.

ES6 client and data nodes have 26GB of max jvm allocation.

BTW, ES 2.3.4 has 60 data c5.4xlarge and 9 master c5.large nodes.

ES 6.2.3 /_cluster/stats:
{

"_nodes" : {

	"total" : 34,

	"successful" : 34,

	"failed" : 0

},

"cluster_name" : "se_cluster",

"timestamp" : 1539674434541,

"status" : "green",

"indices" : {

	"count" : 5,

	"shards" : {

		"total" : 85,

		"primaries" : 30,

		"replication" : 1.8333333333333333,

		"index" : {

			"shards" : {

				"min" : 10,

				"max" : 30,

				"avg" : 17.0

			},

			"primaries" : {

				"min" : 5,

				"max" : 10,

				"avg" : 6.0

			},

			"replication" : {

				"min" : 1.0,

				"max" : 4.0,

				"avg" : 1.8

			}

		}

	},

	"docs" : {

		"count" : 112875644,

		"deleted" : 51467647

	},

	"store" : {

		"size_in_bytes" : 281022181476

	},

	"fielddata" : {

		"memory_size_in_bytes" : 24608071400,

		"evictions" : 0

	},

	"query_cache" : {

		"memory_size_in_bytes" : 3484131424,

		"total_count" : 902027508,

		"hit_count" : 714578426,

		"miss_count" : 187449082,

		"cache_size" : 100056,

		"cache_count" : 29192312,

		"evictions" : 29092256

	},

	"completion" : {

		"size_in_bytes" : 0

	},

	"segments" : {

		"count" : 1188,

		"memory_in_bytes" : 703497198,

		"terms_memory_in_bytes" : 528131307,

		"stored_fields_memory_in_bytes" : 107156920,

		"term_vectors_memory_in_bytes" : 0,

		"norms_memory_in_bytes" : 9819136,

		"points_memory_in_bytes" : 32723979,

		"doc_values_memory_in_bytes" : 25665856,

		"index_writer_memory_in_bytes" : 1347092314,

		"version_map_memory_in_bytes" : 13714760,

		"fixed_bit_set_memory_in_bytes" : 62525048,

		"max_unsafe_auto_id_timestamp" : -1,

		"file_sizes" : { }

	}

},

"nodes" : {

	"count" : {

		"total" : 34,

		"data" : 20,

		"coordinating_only" : 9,

		"master" : 5,

		"ingest" : 20

	},

	"versions" : [

		"6.2.3"

	],

	"os" : {

		"available_processors" : 484,

		"allocated_processors" : 484,

		"names" : [

			{

				"name" : "Linux",

				"count" : 34

			}

		],

		"mem" : {

			"total_in_bytes" : 1954498506752,

			"free_in_bytes" : 690253582336,

			"used_in_bytes" : 1264244924416,

			"free_percent" : 35,

			"used_percent" : 65

		}

	},

	"process" : {

		"cpu" : {

			"percent" : 48

		},

		"open_file_descriptors" : {

			"min" : 927,

			"max" : 1229,

			"avg" : 1148

		}

	},

	"jvm" : {

		"max_uptime_in_millis" : 1367385507,

		"versions" : [

			{

				"version" : "1.8.0_131",

				"vm_name" : "Java HotSpot(TM) 64-Bit Server VM",

				"vm_version" : "25.131-b11",

				"vm_vendor" : "Oracle Corporation",

				"count" : 29

			},

			{

				"version" : "1.8.0_181",

				"vm_name" : "Java HotSpot(TM) 64-Bit Server VM",

				"vm_version" : "25.181-b13",

				"vm_vendor" : "Oracle Corporation",

				"count" : 5

			}

		],

		"mem" : {

			"heap_used_in_bytes" : 318745528344,

			"heap_max_in_bytes" : 827613904896

		},

		"threads" : 5068

	},

	"fs" : {

		"total_in_bytes" : 7380778016768,

		"free_in_bytes" : 7022425059328,

		"available_in_bytes" : 6660544962560

	},

	"plugins" : [

		{

			"name" : "repository-s3",

			"version" : "6.2.3",

			"description" : "The S3 repository plugin adds S3 repositories",

			"classname" : "org.elasticsearch.repositories.s3.S3RepositoryPlugin",

			"extended_plugins" : [ ],

			"has_native_controller" : false,

			"requires_keystore" : false

		}

	],

	"network_types" : {

		"transport_types" : {

			"netty4" : 34

		},

		"http_types" : {

			"netty4" : 34

		}

	}

}

}

ES6 sample data:
{
	"took": 75,
	"timed_out": false,
	"_shards": {
		"total": 10,
		"successful": 10,
		"skipped": 0,
		"failed": 0
	},
	"hits": {
		"total": 43986609,
		"max_score": 1,
		"hits": [
			{
				"_index": "catalog",
				"_type": "dd",
				"_id": "1718708443456",
				"_score": 1,
				"_source": {
					"id": 1718708443456,
					"name": "New Women Long sheet",
					"bd": "Default",
					"bid": 43973023,
					"sku": "XYI443-4",
					"v_id": 212,
					"cid": 742366,
					"promo_text": null,
					"url_key": "nwdwdw",
					"tag": null,
					"pvc": 2,
					"oc": 0,
					"p_updated_at": "2018-09-08T18:30:00.000Z",
					"description": " some text",
					"price": 603,
					"mrp": 1270,
					"is_in_stock": 1,
					"search_weight": 0.05542687223333736,
					"brw": 0,
					"cwt": 2,
					"cname": "LA",
					"cfp": " Women->LS-> LA",
					"child_product_names": [
						"some name"
					],
					"child_product_ids": [
						11111111
					],
					"child_instock_product_ids": [
						22222222
					],
					"cats": [
						555555,
						52404444
					],
					"child_merchants": [
						777777
					],
					"authorisation_level": [
						3
					],
					"meta_keyword": [
						
					],
					"attributes_list": [
						"Black"
					],
					"attributes": [
						
					],
					"cccs": [
						0
					],
					"created_at": "2018-04-10T06:56:52.000Z",
					"index_created_at": "2018-10-04T01:31:50.547Z",
					"variants": [
						{
							"id": 11111111,
							"merchant_id": 77777,
							"is_in_stock": 1,
							"price": 603
						}
					],
					"filter_color": [
						"Black"
					],
					"filter_sz": [
						"Free"
					]
				}
			}
		]
	}
}

Let me know if you need any more details.

Why do you have 5 masters?
Do you have Monitoring installed and enabled? If not then that'd be the first recommendation as it'll give you better insights.

Thanks @warkolm for reverting back. I missed to add my datadog graphs. Have added it which shows latency & traffic in comparison to ES2.

We have 5 masters for better availability. Our master nodes are non-coordinating nodes for better cluster stability.

What is your use-case? What types of queries are you using? What does the workload look like? What does your data and mappings look like?

@Christian_Dahlqvist : pls find info below

What is your use-case?
We store product catalog
On ES6 : 10 shards & RF2.
size: 86.7Gi (265Gi)
docs: 112,350,368 (515,716,624)

On ES2: 20 shards & RF2.
size: 183Gi (563Gi)
docs: 112,355,838 (529,896,167)

What types of queries are you using?
We are majorly doing search queries which had boolean query with filters + function_score + some aggregations

What does the workload look like?
Search traffic : 1K/min ES6 & 18K/min on ES2
Indexing traffic : 8K/min on both ES2 & ES6

How are you indexing? Is it new documents or updates? Are you using bulk requests?

One thing that has changed between Elasticsearch 2.x and 6.x is that the transaction log is synced more frequently in order to improve resiliency. This means that you might experience higher I/O if you are using small batches or indexing individual documents, which I assume could affect performance.

Are you monitoring disk I/O on the clusters?

If I am reading this right it also seems like the 6.x cluster has more data per node than the 2.x cluster. Is this correct?

Our indexing contains full updates, partial updates & deletes as well. We are using bulk for indexing.
We are using m5d series which has instance storage.

Write IOPS is no more than 200 considering the peak as 60K IO in 5m.


Strange that read IOPS is zero mostly.

"If I am reading this right it also seems like the 6.x cluster has more data per node than the 2.x cluster. Is this correct?"

You are right. We have 3 times less data nodes than 2.x but search traffic on 6.x is ~5% only. 1K/min ES6 & 18K/min on ES2

Also, ES6 write performance is slightly better than ES2 (bulk write traffic is same in both clusters). It could be due to instance storage in ES6 which is EBS for ES2.

What's causing the ES6 read performance to be so bad is mystery as of now

What kind of queries are you running? Another thing that changed between Elasticsearch 2.x and 6.x was the removal of the _all field, which is now replaced by an all_fields query, which can be slower if you have a large number of fields.

This is our query

{
  "from": 0,
  "size": 30,
  "sort": [
    {
      "ins": {
        "order": "desc"
      }
    },
    "_score"
  ],
  "query": {
    "bool": {
      "filter": {
        "bool": {
          "must": [
            
          ],
          "should": [
            {
              "bool": {
                "must_not": {
                  "exists": {
                    "field": "child_d"
                  }
                }
              }
            },
            {
              "term": {
                "child_d": "1"
              }
            }
          ]
        }
      },
      "must": {
        "function_score": {
          "query": {
            "bool": {
              "filter": {
                "bool": {
                  "must": [
                    {
                      "exists": {
                        "field": "pr"
                      }
                    },
                    {
                      "exists": {
                        "field": "bid"
                      }
                    },
                    {
                      "exists": {
                        "field": "sw"
                      }
                    }
                  ],
                  "must_not": [
                    {
                      "ids": {
                        "values": [
                          21,
                          22,
                          23
                        ]
                      }
                    },
                    {
                      "terms": {
                        "me": [
                          1,
                          2
                        ]
                      }
                    },
                    {
                      "terms": {
                        "v_id": [
                          
                        ]
                      }
                    }
                  ]
                }
              },
              "must": {
                "bool": {
                  "should": [
                    {
                      "multi_match": {
                        "query": "samsung",
                        "analyzer": "word_delimiter_analyzer",
                        "fields": [
                          "c_name^2",
                          "br^3",
                          "cfp^1.1",
                          "na^1.2",
                          "chil",
                          "desc",
                          "attr",
                          "attri_l^1.2"
                        ],
                        "type": "cross_fields",
                        "tie_breaker": 0.3,
                        "minimum_should_match": "100%"
                      }
                    }
                  ]
                }
              }
            }
          },
          "functions": [
            {
              "script_score": {
                "script": {
                  "lang": "expression",
                  "inline": "0 + (log10((doc['pvc'].value * pow(0.9, doc['pm_updated_at'].value ? ( 1537813800000 - doc['pm_updated_at'].value)/(24*60*60*1000): 0))+1)*(2))"
                }
              }
            }
          ],
          "boost_mode": "sum",
          "score_mode": "sum"
        }
      }
    }
  },
  "post_filter": {
    "bool": {
      "must": [
        {
          "terms": {
            "bid": [
              "222222"
            ]
          }
        }
      ]
    }
  },
  "aggs": {
    "categories": {
      "filter": {
        "bool": {
          "must": [
            {
              "exists": {
                "field": "pri"
              }
            },
            {
              "terms": {
                "bid": [
                  "2222"
                ]
              }
            }
          ]
        }
      },
      "aggs": {
        "cats": {
          "terms": {
            "field": "cid",
            "order": {
              "_count": "desc"
            },
            "size": 50
          }
        }
      }
    },
    "bra": {
      "filter": {
        "bool": {
          "must": [
            {
              "exists": {
                "field": "pr"
              }
            },
            {
              "terms": {
                "bid": [
                  "2222"
                ]
              }
            }
          ],
          "must_not": [
            {
              "terms": {
                "bid": [
                  3333                ]
              }
            }
          ]
        }
      },
      "aggs": {
        "brds": {
          "terms": {
            "field": "bid",
            "order": {
              "_count": "desc"
            },
            "size": 50
          }
        }
      }
    },
    "min_pr": {
      "filter": {
        "bool": {
          "must": [
            {
              "exists": {
                "field": "pr"
              }
            },
            {
              "terms": {
                "bid": [
                  "3333"
                ]
              }
            }
          ]
        }
      },
      "aggs": {
        "min_pr": {
          "min": {
            "field": "pr"
          }
        }
      }
    },
    "max_pr": {
      "filter": {
        "bool": {
          "must": [
            {
              "exists": {
                "field": "pr"
              }
            },
            {
              "terms": {
                "bid": [
                  "333"
                ]
              }
            }
          ]
        }
      },
      "aggs": {
        "max_pr": {
          "max": {
            "field": "pr"
          }
        }
      }
    }
  }
}

Also find ES6 index mappings:

{
"cat" : {
	"mappings" : {
		"re" : {
			"_all" : {
				"enabled" : false
			},
			"dynamic_templates" : [
				{
					"filter" : {
						"match" : "filter_*",
						"match_mapping_type" : "string",
						"mapping" : {
							"analyzer" : "analyzer_keyword",
							"eager_global_ordinals" : true,
							"fielddata" : true,
							"fields" : {
								"raw" : {
									"type" : "keyword"
								}
							},
							"type" : "text"
						}
					}
				}
			],
			"properties" : {
				"attributes" : {
					"type" : "text",
					"analyzer" : "word_delimiter_analyzer_indexer",
					"search_analyzer" : "word_delimiter_analyzer"
				},
				"attributes_list" : {
					"type" : "text",
					"analyzer" : "word_delimiter_analyzer_indexer",
					"search_analyzer" : "word_delimiter_analyzer"
				},
				"authorisation_level" : {
					"type" : "long"
				},
				"bd" : {
					"type" : "text",
					"analyzer" : "word_delimiter_analyzer_indexer",
					"search_analyzer" : "word_delimiter_analyzer"
				},
				"bid" : {
					"type" : "long"
				},
				"bwt" : {
					"type" : "integer"
				},
				"cfp" : {
					"type" : "text",
					"analyzer" : "word_delimiter_analyzer_indexer",
					"search_analyzer" : "word_delimiter_analyzer"
				},
				"cid" : {
					"type" : "long"
				},
				"cnm" : {
					"type" : "text",
					"analyzer" : "word_delimiter_analyzer_indexer",
					"search_analyzer" : "word_delimiter_analyzer"
				},
				"cwt" : {
					"type" : "integer"
				},
				"cats" : {
					"type" : "long"
				},
				"cpids" : {
					"type" : "long"
				},
				"cpnames" : {
					"type" : "text",
					"fields" : {
						"singular" : {
							"type" : "text",
							"analyzer" : "custom_english_indexer",
							"search_analyzer" : "custom_english"
						}
					},
					"analyzer" : "word_delimiter_analyzer_indexer",
					"search_analyzer" : "word_delimiter_analyzer"
				},
				"csite" : {
					"type" : "long"
				},
				"conids" : {
					"type" : "long"
				},
				"created_at" : {
					"type" : "date",
					"format" : "dateOptionalTime"
				},
				"curr" : {
					"type" : "long"
				},
				"description" : {
					"type" : "text",
					"analyzer" : "custom_english_indexer",
					"search_analyzer" : "custom_english"
				},
				"model_0" : {
					"type" : "float"
				},
				"filter_3D" : {
					"type" : "text",
					"eager_global_ordinals" : true,
					"fields" : {
						"raw" : {
							"type" : "keyword"
						}
					},
					"analyzer" : "analyzer_keyword",
					"fielddata" : true
				},
				"filter_np" : {
					"type" : "text",
					"eager_global_ordinals" : true,
					"fields" : {
						"raw" : {
							"type" : "keyword"
						}
					},
					"analyzer" : "analyzer_keyword",
					"fielddata" : true
				},
				"filter_z" : {
					"type" : "text",
					"eager_global_ordinals" : true,
					"fields" : {
						"raw" : {
							"type" : "keyword"
						}
					},
					"analyzer" : "analyzer_keyword",
					"fielddata" : true
				},
				"id" : {
					"type" : "long"
				},
				"index_created_at" : {
					"type" : "date",
					"format" : "dateOptionalTime"
				},
				"ins" : {
					"type" : "long"
				},
				"last_update_timestamp" : {
					"type" : "date",
					"format" : "dateOptionalTime"
				},
				"meta_keyword" : {
					"type" : "text",
					"analyzer" : "custom_english_indexer",
					"search_analyzer" : "custom_english"
				},
				"mrp" : {
					"type" : "long",
					"index" : false
				},
				"name" : {
					"type" : "text",
					"fields" : {
						"singular" : {
							"type" : "text",
							"analyzer" : "custom_english_indexer",
							"search_analyzer" : "custom_english"
						}
					},
					"analyzer" : "word_delimiter_with_number_analyzer_indexer",
					"search_analyzer" : "word_delimiter_analyzer"
				},
				"newurl" : {
					"type" : "text"
				},
				"oc" : {
					"type" : "long"
				},
				"psk" : {
					"type" : "keyword"
				},
				"pdp" : {
					"type" : "long"
				},
				"price" : {
					"type" : "long"
				},
				"psm" : {
					"properties" : {
						"1" : {
							"type" : "long"
						},
						"100" : {
							"type" : "long"
						},
						"98" : {
							"type" : "long"
						}
					}
				},
				"product_metrics_updated_at" : {
					"type" : "date"
				},
				"pt" : {
					"type" : "text",
					"index" : false
				},
				"rid" : {
					"type" : "text",
					"fields" : {
						"keyword" : {
							"type" : "keyword",
							"ignore_above" : 256
						}
					}
				},
				"script" : {
					"properties" : {
						"inline" : {
							"type" : "text",
							"fields" : {
								"keyword" : {
									"type" : "keyword",
									"ignore_above" : 256
								}
							}
						},
						"params" : {
							"properties" : {
								"doc" : {
									"properties" : {
										"cats" : {
											"type" : "long"
										}
									}
								}
							}
						}
					}
				},
				"sw" : {
					"type" : "double"
				},
				"spid" : {
					"type" : "long"
				},
				"sppku" : {
					"type" : "keyword"
				},
				"tag" : {
					"type" : "text",
					"index" : false
				},
				"thumbnail" : {
					"type" : "text",
					"index" : false
				},
				"updated_at" : {
					"type" : "date",
					"format" : "dateOptionalTime"
				},
				"url_key" : {
					"type" : "keyword"
				},
				"var" : {
					"type" : "nested",
					"properties" : {
						"dis" : {
							"type" : "long"
						},
						"id" : {
							"type" : "long"
						},
						"ins" : {
							"type" : "integer"
						},
						"mar" : {
							"type" : "long"
						},
						"price" : {
							"type" : "long"
						},
						"str" : {
							"type" : "long"
						}
					}
				},
				"vid" : {
					"type" : "long"
				}
			}
		}
	}
}

}

@Christian_Dahlqvist / ES support team - Any other suggestions on this?

1 Like

I experienced similar problem before and managed figuring out the root cause and solution. I believe it has something to do with numbers being indexed differently since ES 5.0.

Prior to ES 5.0, numeric fields are still inverted index and query speed is typically very fast if you do term/terms filters. Since ES 5.0 , Block K-D Tree is introduced as the index structure for numeric fields, which is optimized for range queries (searching-numb3rs-in-5.0). There is a negative effect however, that if you do exact matches like term/terms filters on numeric fields, the performance could degraded severely subject to the cardinality of the fields.

By looking at your case, filters like below example are the main culprit that hurts the query performance:

"terms": {
     "me": [
               1,
               2
         ]
 }

To solve the issue, you'd need to define these fields as "keyword" in the mapping and reindex the data. Please also refer to doc at this link: consider_mapping_identifiers_as_literal_keyword_literal

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.