Average aggregation seems to be wrong

Hi,
I was running a derivative of average query on a metric of a child element and kind of got what I wanted, but noticed that some calculations does not add up precisely. So I decided to build a query step by step and see where it goes wrong. And on the first step when calculating average of five values I can see that it is inaccurate.
I am running Elastic search 6.0.

I first run this query to get raw data of five minute interval. Data are collected every 1 min so I get five docs with values:

{
	"size": 10000,
	"_source": [
		"tag.agent_host",
		"tag.ifDescr",
		"interface.ifHCInOctets"
	],

	"query": {
		"bool": {
			"filter": [{
					"range": {
						"@timestamp": {
							"gte": "1522330307503",
							"lte": "1522330607503",
							"format": "epoch_millis"
						}
					}
				}, {
					"query_string": {
						"analyze_wildcard": false,
						"query": "tag.agent_host:labrouter1.testdomain.net AND tag.ifDescr:GigabitEthernet*0 AND _exists_:interface.ifHCInOctets"
					}
				}
			]
		}
	}
}

Output:

{
    "took": 54,
    "timed_out": false,
    "_shards": {
        "total": 768,
        "successful": 768,
        "skipped": 744,
        "failed": 0
    },
    "hits": {
        "total": 5,
        "max_score": 0,
        "hits": [
            {
                "_index": "test-index-2018.03.29",
                "_type": "metrics",
                "_id": "QrH1cWIBrVLc4RYS48TM",
                "_score": 0,
                "_source": {
                    "tag": {
                        "agent_host": "labrouter1.testdomain.net",
                        "ifDescr": "GigabitEthernet0/0/0"
                    },
                    "interface": {
                        "ifHCInOctets": 402423415108
                    }
                }
            },
            {
                "_index": "test-index-2018.03.29",
                "_type": "metrics",
                "_id": "BsP4cWIBrVLc4RYSomgl",
                "_score": 0,
                "_source": {
                    "tag": {
                        "agent_host": "labrouter1.testdomain.net",
                        "ifDescr": "GigabitEthernet0/0/0"
                    },
                    "interface": {
                        "ifHCInOctets": 402849967058
                    }
                }
            },
            {
                "_index": "test-index-2018.03.29",
                "_type": "metrics",
                "_id": "x7f2cWIBrVLc4RYSzXP8",
                "_score": 0,
                "_source": {
                    "tag": {
                        "agent_host": "labrouter1.testdomain.net",
                        "ifDescr": "GigabitEthernet0/0/0"
                    },
                    "interface": {
                        "ifHCInOctets": 402533814871
                    }
                }
            },
            {
                "_index": "test-index-2018.03.29",
                "_type": "metrics",
                "_id": "e733cWIBrVLc4RYSt7HE",
                "_score": 0,
                "_source": {
                    "tag": {
                        "agent_host": "labrouter1.testdomain.net",
                        "ifDescr": "GigabitEthernet0/0/0"
                    },
                    "interface": {
                        "ifHCInOctets": 402662297513
                    }
                }
            },
            {
                "_index": "test-index-2018.03.29",
                "_type": "metrics",
                "_id": "wsn5cWIBrVLc4RYSjcrG",
                "_score": 0,
                "_source": {
                    "tag": {
                        "agent_host": "labrouter1.testdomain.net",
                        "ifDescr": "GigabitEthernet0/0/0"
                    },
                    "interface": {
                        "ifHCInOctets": 402989750842
                    }
                }
            }
        ]
    }
}

I have calculated average by my self and it should be: 402691849078

1 Like

Later I run query for same period of time but now I want that Elastic would return me average:

{
	"size": 0,
	"query": {
		"bool": {
			"filter": [{
					"range": {
						"@timestamp": {
							"gte": "1522330307503",
							"lte": "1522330607503",
							"format": "epoch_millis"
						}
					}
				}, {
					"query_string": {
						"analyze_wildcard": true,
						"query": "tag.agent_host:labrouter1.testdomain.net AND tag.ifDescr:GigabitEthernet*0 AND _exists_:interface.ifHCInOctets"
					}
				}
			]
		}
	},
	"aggs": {
		"DBF_Device": {
			"terms": {
				"field": "tag.agent_host",
				"size": 10,
				"order": {
					"_term": "desc"
				},
				"min_doc_count": 1
			},
			"aggs": {
				"DBF_Interface": {
					"terms": {
						"field": "tag.ifDescr",
						"size": 10,
						"order": {
							"_term": "desc"
						},
						"min_doc_count": 1
					},
					"aggs": {
						"DBF_Metric_AVG": {
							"avg": {
								"field": "interface.ifHCInOctets"
							}
						}
					}
				}
			}
		}
	}
}

And get output:

{
    "took": 119,
    "timed_out": false,
    "_shards": {
        "total": 1023,
        "successful": 1023,
        "skipped": 991,
        "failed": 0
    },
    "hits": {
        "total": 5,
        "max_score": 0,
        "hits": []
    },
    "aggregations": {
        "DBF_Device": {
            "doc_count_error_upper_bound": 0,
            "sum_other_doc_count": 0,
            "buckets": [
                {
                    "key": "labrouter1.testdomain.net",
                    "doc_count": 5,
                    "DBF_Interface": {
                        "doc_count_error_upper_bound": 0,
                        "sum_other_doc_count": 0,
                        "buckets": [
                            {
                                "key": "GigabitEthernet0/0/0",
                                "doc_count": 5,
                                "DBF_Metric_AVG": {
                                    "value": 402691843686.4
                                }
                            }
                        ]
                    }
                }
            ]
        }
    }
}

so Elsastic returns me slightly different value of 402691843686.4, even tough time period is the same and doc count is same.

Why it is happening? Or perhaps my query is wrong?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.