Date Histogram empty buckets and number of buckets


(Ilya Anisimov) #1

Hi Guys,

I'm trying to use Elasticsearch 1.4.4 and Aggregation framework to generate data for graphs. I have a strange problem: when I specify interval "day" (query and extended_bounds set to Aug 3rd 2015 to Sep 14th 2015) it generates empty buckets but the first bucket is Sep 1st 2015. Why is that? Can you help me to understand and find a workaround to generate empty buckets for the rest of the date range?

Please find example Request/Response and Mapping below

Thanks a lot!
Ilya

Request:
{
"size" : 0,
"query" : {
"filtered" : {
"query" : {
"match" : {
"application" : {
"query" : "MGU",
"type" : "boolean"
}
}
},
"filter" : {
"range" : {
"timestamp" : {
"from" : 1438585200000,
"to" : 1442214000000,
"include_lower" : true,
"include_upper" : true
}
}
}
}
},
"aggregations" : {
"by_interval" : {
"date_histogram" : {
"field" : "timestamp",
"interval" : "1d",
"min_doc_count" : 0,
"order" : {
"_key" : "asc"
},
"extended_bounds" : {
"min" : 1438585200000,
"max" : 1442214000000
}
}
}
}
}

Response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 10228,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"by_interval" : {
"buckets" : [ {
"key_as_string" : "2015-09-01T00:00:00.000Z",
"key" : 1441065600000,
"doc_count" : 388
}, {
"key_as_string" : "2015-09-02T00:00:00.000Z",
"key" : 1441152000000,
"doc_count" : 105
}, {
"key_as_string" : "2015-09-03T00:00:00.000Z",
"key" : 1441238400000,
"doc_count" : 1766
}, {
"key_as_string" : "2015-09-04T00:00:00.000Z",
"key" : 1441324800000,
"doc_count" : 7653
}, {
"key_as_string" : "2015-09-05T00:00:00.000Z",
"key" : 1441411200000,
"doc_count" : 0
}, {
"key_as_string" : "2015-09-06T00:00:00.000Z",
"key" : 1441497600000,
"doc_count" : 0
}, {
"key_as_string" : "2015-09-07T00:00:00.000Z",
"key" : 1441584000000,
"doc_count" : 0
}, {
"key_as_string" : "2015-09-08T00:00:00.000Z",
"key" : 1441670400000,
"doc_count" : 9
}, {
"key_as_string" : "2015-09-09T00:00:00.000Z",
"key" : 1441756800000,
"doc_count" : 307
} ]
}
}
}


(Ilya Anisimov) #2

Mapping
{
"graylog2_0" : {
"aliases" : {
"graylog2_deflector" : { }
},
"mappings" : {
"message" : {
"dynamic_templates" : [ {
"store_generic" : {
"mapping" : {
"index" : "not_analyzed"
},
"match" : "*"
}
} ],
"_source" : {
"compress" : true
},
"properties" : {
"api" : {
"type" : "string",
"index" : "not_analyzed"
},
"appVersion" : {
"type" : "string",
"index" : "not_analyzed"
},
"application" : {
"type" : "string",
"index" : "not_analyzed"
},
"data" : {
"type" : "string",
"index" : "not_analyzed"
},
"dbname" : {
"type" : "string",
"index" : "not_analyzed"
},
"deviceModel" : {
"type" : "string",
"index" : "not_analyzed"
},
"environment" : {
"type" : "string",
"index" : "not_analyzed"
},
"errorMessage" : {
"type" : "string",
"index" : "not_analyzed"
},
"full_message" : {
"type" : "string",
"analyzer" : "standard"
},
"gl2_remote_ip" : {
"type" : "string",
"index" : "not_analyzed"
},
"gl2_remote_port" : {
"type" : "long"
},
"gl2_source_input" : {
"type" : "string",
"index" : "not_analyzed"
},
"gl2_source_node" : {
"type" : "string",
"index" : "not_analyzed"
},
"level" : {
"type" : "long"
},
"message" : {
"type" : "string",
"analyzer" : "standard"
},
"osName" : {
"type" : "string",
"index" : "not_analyzed"
},
"osVersion" : {
"type" : "string",
"index" : "not_analyzed"
},
"responseCode" : {
"type" : "long"
},
"size" : {
"type" : "long",
"store" : true
},
"source" : {
"type" : "string",
"analyzer" : "analyzer_keyword"
},
"sourceCountry" : {
"type" : "string",
"index" : "not_analyzed"
},
"sourceIp" : {
"type" : "string",
"index" : "not_analyzed"
},
"table" : {
"type" : "string",
"index" : "not_analyzed"
},
"timestamp" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss.SSS"
},
"userid" : {
"type" : "string",
"index" : "not_analyzed"
},
"version" : {
"type" : "string",
"index" : "not_analyzed"
}
}
}
},
"settings" : {
"index" : {
"uuid" : "vp_op6-CSoumPI8jEWTpjQ",
"analysis" : {
"analyzer" : {
"analyzer_keyword" : {
"filter" : "lowercase",
"tokenizer" : "keyword"
}
}
},
"number_of_replicas" : "0",
"number_of_shards" : "4",
"version" : {
"created" : "901399"
}
}
},
"warmers" : { }
}
}


(Ramy) #3

Hi,
I've never used the date histogram aggregation but i have some question to your use case:

Why are you not using the same format like your mapping during your query?
Please take a look at:

  1. https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-daterange-aggregation.html#date-format-pattern
  2. http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html

Why are you using order here? does it make sense for you? I think the date_histogram will return you the data in proper order!


(Ilya Anisimov) #4

Thank you for your reply remram. I'm using UNIX epoch milliseconds because it works well and easy on the front end (no string formatting etc) Anyways using different date-time format doesn't solve my particular problem (just tested) :slight_smile: Do you have other suggestions?

I'm especially looking for help from Elasticsearch development team :slight_smile:


(Ilya Anisimov) #5

Any ideas?


(system) #6