Elastic query takes over 1 minute due to time spent in "HighlightPhase"

Some elastic queries are slow, taking more than a minute to execute.

  • The query input is simple: just a single, numeric account identifier (i.e. "123456789").
  • The query takes 68 seconds to execute and returns 6 hits.
  • The total size of the JSON query output is 583KB.
  • The Elastic query profiler says that 67 seconds is spent in the "HighlightPhase".
  • Elastic is using the default (Unified) highlighter.
  • One of the documents returned as a hit is very large. If we retrieve its entire JSON data, its size is 191MB.
  • This document is large due to one particular field it possesses.
  • However, we are neither querying this field nor performing highlighting on it.
  • We are using Elastic 7.17 under Linux.
  • If I disable highlighting entirely, the query is fast (about 1 second).

Isn't this mysterious that Elastic seems to spend a lot of time "highligting" a field that is neither highlighted nor returned as part of the result?

I can provide more details as needed.

Thanks!

Hi @David_Avant,

Welcome to the community! Thanks for sharing the details you are seeing from the profiler. Can you share your query and which highlighter you are using?

Without knowing much about the query and potential optimisations there, it might be worth taking a look at alternative highlighters, as covered in the documentation.

What is the mapping for the field being highlighted? As outlined in the documentation it needs to be parsed from source, where your huge field will impact performance, if it is not explicitly stored at the field level.

Thanks Carly/Christian,

Here is the query:

{
	"from": 0,
	"size": 20,
	"profile": true,
	"query": {
		"bool": {
			"must": [
				{
					"query_string": {
						"query": "147490666442",
						"fields": [
							"all_text^1.0",
							"identifier^1.0"
						],
						"type": "best_fields",
						"default_operator": "or",
						"max_determinized_states": 10000,
						"enable_position_increments": true,
						"fuzziness": "AUTO",
						"fuzzy_prefix_length": 0,
						"fuzzy_max_expansions": 50,
						"phrase_slop": 0,
						"escape": false,
						"auto_generate_synonyms_phrase_query": true,
						"fuzzy_transpositions": true,
						"boost": 1.0
					}
				}
			],
			"filter": [
				{
					"query_string": {
						"query": "module:\"Alerts\"",
						"fields": [
						],
						"type": "best_fields",
						"default_operator": "or",
						"max_determinized_states": 10000,
						"enable_position_increments": true,
						"fuzziness": "AUTO",
						"fuzzy_prefix_length": 0,
						"fuzzy_max_expansions": 50,
						"phrase_slop": 0,
						"escape": false,
						"auto_generate_synonyms_phrase_query": true,
						"fuzzy_transpositions": true,
						"boost": 1.0
					}
				}
			],
			"adjust_pure_negative": true,
			"boost": 1.0
		}
	},
	"_source": {
		"includes": [
		],
		"excludes": [
			"attachment",
			"audit",
			"audits",
			"note",
			"notes",
			"children",
			"alert_xml"
		]
	},
	"aggregations": {
		"state": {
			"terms": {
				"field": "state.keyword",
				"size": 10,
				"min_doc_count": 1,
				"shard_min_doc_count": 0,
				"show_term_doc_count_error": false,
				"order": [
					{
						"_count": "desc"
					},
					{
						"_key": "asc"
					}
				]
			}
		},
		"category": {
			"terms": {
				"field": "category.keyword",
				"size": 10,
				"min_doc_count": 1,
				"shard_min_doc_count": 0,
				"show_term_doc_count_error": false,
				"order": [
					{
						"_count": "desc"
					},
					{
						"_key": "asc"
					}
				]
			},
			"aggregations": {
				"type": {
					"terms": {
						"field": "type_name.keyword",
						"size": 10,
						"min_doc_count": 1,
						"shard_min_doc_count": 0,
						"show_term_doc_count_error": false,
						"order": [
							{
								"_count": "desc"
							},
							{
								"_key": "asc"
							}
						]
					}
				}
			}
		},
		"business_date_week": {
			"date_range": {
				"field": "business_date",
				"format": "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
				"ranges": [
					{
						"from": "now/d-1w",
						"to": "now"
					}
				],
				"keyed": false
			}
		},
		"business_date_month": {
			"date_range": {
				"field": "business_date",
				"format": "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
				"ranges": [
					{
						"from": "now/d-1M",
						"to": "now"
					}
				],
				"keyed": false
			}
		},
		"business_date_year": {
			"date_range": {
				"field": "business_date",
				"format": "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
				"ranges": [
					{
						"from": "now/d-1y",
						"to": "now"
					}
				],
				"keyed": false
			}
		}
	},
	"highlight": {
		"max_analyzed_offset": 51200,
		"fragment_size": 75,
		"highlight_query": {
			"bool": {
				"must": [
					{
						"query_string": {
							"query": "147490666442",
							"fields": [
								"all_text^1.0",
								"identifier^1.0"
							],
							"type": "best_fields",
							"default_operator": "or",
							"max_determinized_states": 10000,
							"enable_position_increments": true,
							"fuzziness": "AUTO",
							"fuzzy_prefix_length": 0,
							"fuzzy_max_expansions": 50,
							"phrase_slop": 0,
							"escape": false,
							"auto_generate_synonyms_phrase_query": true,
							"fuzzy_transpositions": true,
							"boost": 1.0
						}
					}
				],
				"adjust_pure_negative": true,
				"boost": 1.0
			}
		},
		"require_field_match": false,
		"fields": {
			"*_custom_tl2": {
			},
			"bu_name": {
			},
			"reason": {
			},
			"identifier": {
			},
			"type_name": {
			},
			"owner_name": {
			},
			"*_custom_ti2": {
			},
			"rfi_attachment": {
			},
			"*_custom_td2": {
			},
			"description": {
			},
			"display_name": {
			},
			"type": {
			},
			"type_abbr_name": {
			},
			"priority_identifier": {
			},
			"rfi": {
			},
			"*_custom_txt": {
			},
			"score_field2": {
			},
			"step": {
			},
			"state": {
			},
			"alert_name": {
			}
		}
	}
}

And the mapping...

{
	"david_index": {
		"mappings": {
			"dynamic_templates": [
				{
					"custom_txt": {
						"match": "*_custom_txt",
						"mapping": {
							"analyzer": "index_text_en",
							"copy_to": "all_text",
							"search_analyzer": "search_text",
							"type": "text"
						}
					}
				},
				{
					"custom_ti": {
						"match": "*_custom_ti",
						"mapping": {
							"copy_to": [
								"all_text",
								"{name}2"
							],
							"type": "integer"
						}
					}
				},
				{
					"_custom_tl": {
						"match": "*_custom_tl",
						"mapping": {
							"copy_to": [
								"all_text",
								"{name}2"
							],
							"type": "long"
						}
					}
				},
				{
					"_custom_td": {
						"match": "*_custom_td",
						"mapping": {
							"copy_to": [
								"all_text",
								"{name}2"
							],
							"type": "double"
						}
					}
				},
				{
					"_custom_tdt": {
						"match": "*_custom_tdt",
						"mapping": {
							"copy_to": [
								"all_text",
								"{name}2"
							],
							"type": "date"
						}
					}
				},
				{
					"custom_ti2": {
						"match": "*_custom_ti2",
						"mapping": {
							"analyzer": "index_text_en",
							"copy_to": "all_text",
							"search_analyzer": "search_text",
							"store": true,
							"type": "text"
						}
					}
				},
				{
					"custom_tl2": {
						"match": "*_custom_tl2",
						"mapping": {
							"analyzer": "index_text_en",
							"copy_to": "all_text",
							"search_analyzer": "search_text",
							"store": true,
							"type": "text"
						}
					}
				},
				{
					"custom_td2": {
						"match": "*_custom_td2",
						"mapping": {
							"analyzer": "index_text_en",
							"copy_to": "all_text",
							"search_analyzer": "search_text",
							"store": true,
							"type": "text"
						}
					}
				}
			],
			"properties": {
				"alert_name": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"alert_xml": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_html",
					"search_analyzer": "search_text"
				},
				"all_text": {
					"type": "text",
					"analyzer": "index_text_html",
					"search_analyzer": "search_text"
				},
				"attachment": {
					"type": "text",
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"audit": {
					"type": "text",
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"audits": {
					"properties": {
						"comment": {
							"type": "text",
							"fields": {
								"keyword": {
									"type": "keyword",
									"ignore_above": 256
								}
							}
						},
						"date": {
							"type": "date"
						},
						"event": {
							"type": "text",
							"fields": {
								"keyword": {
									"type": "keyword",
									"ignore_above": 256
								}
							}
						},
						"user": {
							"type": "text",
							"fields": {
								"keyword": {
									"type": "keyword",
									"ignore_above": 256
								}
							}
						}
					}
				},
				"bu": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"bu_exact": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"bu_name": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"business_date": {
					"type": "date",
					"copy_to": [
						"all_text"
					]
				},
				"category": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"consolidation_key": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"create_date": {
					"type": "date",
					"copy_to": [
						"all_text"
					]
				},
				"description": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"display_name": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"has": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"id": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"identifier": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "identifier_analyzer",
					"search_analyzer": "search_text"
				},
				"is": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"last_update_date": {
					"type": "date"
				},
				"manual": {
					"type": "boolean"
				},
				"module": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"note": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"owner": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"owner_name": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"priority_identifier": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"read": {
					"type": "boolean"
				},
				"reason": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"rfi": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"rfi_attachment": {
					"type": "text",
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"score_field": {
					"type": "long",
					"copy_to": [
						"all_text",
						"score_field2"
					]
				},
				"score_field2": {
					"type": "text",
					"store": true,
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"state": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword"
						}
					},
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"step": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"tenant": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"type": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"type_abbr_name": {
					"type": "text",
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"type_facet": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword"
						}
					},
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				},
				"type_name": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword"
						}
					},
					"copy_to": [
						"all_text"
					],
					"analyzer": "index_text_en",
					"search_analyzer": "search_text"
				}
			}
		}
	}
}

To summarize the query:

  • It searches two fields: (identifier, all_text).
  • The "identifier" field is just a short/unique value given to each document.
  • The "all_text" field is an aggregate of the values of about 20 field... using the "copy_to" property in the mapping.
  • The "attachment" field is the likely troublemaker, because the value can be very large (over 100MB) for some rare documents.
  • However, the "attachment" field value is not added to "all_text" in the mapping.
  • In the query, the "attachment" field is also excluded from the list of source fields to return.
  • The query also omits the "attachment" field from the list of fields to highlight.

I know that having a potentially huge field like this was a poor design decision. But despite this, Elastic seems to behave strangely. In particular, the "HighlightPhase" seems to be processing a field that doesn't needs to be highlighted.

As you do not seem to have mapped any fields explicitly as stored, Elasticsearch must parse the full source document in order to get the data to highlight. This means that the large field will impact performance as it will take a lot longer to parse the document compared to if this large field was not present.

Elasticsearch is not designed or optimised to handle very large documents, so I would recommend not storing this type of huge data in Elasticsearch.

So in theory, if I changed my mapping so that all of the normal/smaller "highlighted fields" were also stored fields, then this would avoid having to parse the entire source containing the huge field, and the highlighting would be fast :thinking:

I have two other observations that might be worth noting:

  • The "_doc" API fetches the entire source (all 180MB) in 3 seconds. But I suppose this call does not require any parsing to occur.

  • This very slow query only seems to occur in a production environment with ~700GB of data and 66 million documents. If I create an empty index (using the same mapping) on my laptop and create a single document with a ~180MB field, the same query runs in just a few seconds. The production environment has 5 nodes, each having 8 cores and 32GB of heap. If the slowness in the production environment is due entirely to parsing the huge source data, I don't understand why I am unable to reproduce the same slowness on my wimpy laptop.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.