Hi,
I have a query that does an aggregation using a groovy script that when the query returns a large number of documents fails with an array_index_out_of_bounds_exception. Here's the query:
GET logstash-java-megatron-hrp-*/_search
{
"query": {
"bool": {
"must_not": [
{
"wildcard": {
"class": "*dataquality*"
}
},
{
"wildcard": {
"class": "*DataQuality*"
}
}
],
"filter": {
"and": [
{
"match": {
"loglevel": "ERROR"
}
},
{
"prefix": {
"beat.hostname.raw": "eip-a1-p"
}
},
{
"range": {
"@timestamp": {
"gte": "now-16h",
"lte": "now"
}
}
}
]
}
}
},
"aggs": {
"classes": {
"terms": {
"field": "class.raw"
},
"aggs": {
"messages": {
"terms": {
"script": {
"lang": "groovy",
"inline": "String[] msgs = _source.logmessage.split(\"[^\\\\S ]+\"); msgs[0] + \"\\n\" + msgs[1]"
}
}
}
}
}
}
}
Here's the error I get back when there's a large number of documents:
{
"took": 84,
"timed_out": false,
"_shards": {
"total": 150,
"successful": 144,
"failed": 6,
"failures": [
{
"shard": 0,
"index": "logstash-java-megatron-hrp-2017.10.16",
"node": "R2SE6NTCQq6nrrSPHXytWQ",
"reason": {
"type": "script_exception",
"reason": "failed to run inline script [String[] msgs = _source.logmessage.split(\"[^\\\\S ]+\"); msgs[0] + \"\\n\" + msgs[1]] using lang [groovy]",
"caused_by": {
"type": "array_index_out_of_bounds_exception",
"reason": "1"
}
}
}
]
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
},
"aggregations": {
"classes": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
}
Intention is to aggregate by the first line of the log as some are multi-line (stack traces). I didn't write the query but I did help the person who did. Is there a better way? I'm running ES v2.4.1 currently.