Date histogram with different Timezones with Elasticsearch v.7.13.3

I have to acquire datas from different part of Timezones (example New York -6.00 and Rome +2.00). In the document I have a field 'timestamp' defined as "data" and I have for example create a "date_histogram" for example from 8.00 AM to 9.00 AM. How can I match the USA 8.00-9.00 and the ITA 8.00-9.00 datas in order to compare the two data from the same period?

This is my datas with two different fuse. 2 from USA and 2 from ITA:

"hits" : [
  {
    "_index" : "test-data-2021-8-4",
    "_type" : "_doc",
    "_id" : "9tS4EHsB4Ke8qtFfYqbg",
    "_score" : 1.0,
    "_source" : {
      "id" : "mtKDIsEfSr3I8AwCDE1Gjw_11",
      "value" : 87.2,
      "timestamp" : "2021-08-04T12:32:04+02:00"
    }
  },
  {
    "_index" : "test-data-2021-8-4",
    "_type" : "_doc",
    "_id" : "99S4EHsB4Ke8qtFfYqbg",
    "_score" : 1.0,
    "_source" : {
      "id" : "mtKDIsEfSr3I8AwCDE1Gjw_5",
      "value" : 31.0025,
      "timestamp" : "2021-08-04T12:32:04+02:00"
    }
  },
  {
    "_index" : "test-data-2021-8-4",
    "_type" : "_doc",
    "_id" : "wdOREHsB4Ke8qtFfuZAf",
    "_score" : 1.0,
    "_source" : {
      "id" : "mtKDIsEfSr3I8AwCDE1Gjw_11",
      "value" : 15.1,
      "timestamp" : "2021-08-04T05:49:50-04:00"
    }
  },
  {
    "_index" : "test-data-2021-8-4",
    "_type" : "_doc",
    "_id" : "wtOREHsB4Ke8qtFfuZAg",
    "_score" : 1.0,
    "_source" : {
      "id" : "mtKDIsEfSr3I8AwCDE1Gjw_5",
      "value" : 27.9457,
      "timestamp" : "2021-08-04T05:49:50-04:00"
    }
  }
]

This is my date_histogram query:

 GET /test-data-*/_search?size=10000
{
  "aggs": {
    "agg_sum": {
      "date_histogram": {
        "field": "timestamp",
        "fixed_interval": "1h"
      },
      "aggs": {
        "aggregazione": {
          "sum": {
            "field": "value"
          }
        }
      }
    }
  },
  "size": 0,
  "fields": [
    {
      "field": "timestamp",
      "format": "date_time"
    }
  ],
  "stored_fields": [
    "*"
  ],
  "query": {
    "bool": {
      "must": [],
      "filter": [
        {
          "range": {
            "timestamp": {
              "gte": "2021-08-04T08:00:00.000",
              "lte": "2021-08-04T09:00:00.000",
              "format": "strict_date_optional_time"
            }
          }
        }
      ],
      "should": [],
      "must_not": []
    }
  }
}

Welcome to our community! :smiley:

You will need to write two queries to handle this.

Thanks for your replay. But if I have to do some aggregations on the data (for example sum) how can I do if I have two queries? Do I have to do them "by hand" to the query result?
My data are:

"hits" : [
      {
        "_index" : "myindex-000001",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "my_time" : "2021-08-09T08:00:00.000+02:00",
          "value" : 10.0
        }
      },
      {
        "_index" : "myindex-000002",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "my_time" : "2021-08-09T08:00:00.000-06:00",
          "value" : 10.0
        }
      }
    ]

and I have try this query for sum but the result is not correct :

GET /myindex-00000*/_search?size=10000
{
  "aggs": {
    "agg_sum": {
      "date_histogram": {
        "field": "my_time",
        "fixed_interval": "1h"
      },
      "aggs": {
        "aggregazione": {
          "sum": {
            "field": "value"
          }
        }
      }
    }
  },
  "size": 0,
  "fields": [
    {
      "field": "my_time",
      "format": "date_time"
    }
  ],
  "stored_fields": [
    "*"
  ],
  "query": {
    "bool": {
      "filter": [
        {
                "bool": {
                  "should": [
                    {
                      "range": {
                        "my_time": {
                          "gte": "2021-08-09T07:00:00.000+02:00",
                          "lte": "2021-08-09T09:00:00.000+02:00",
                          "format": "strict_date_optional_time"
                          }
                          }
                          
                    },
                    { 
                      "range": {
                        "my_time": {
                          "gte": "2021-08-09T07:00:00.000-06:00",
                          "lte": "2021-08-09T09:00:00.000-06:00",
                          "format": "strict_date_optional_time"
                          }
                          }
                          }
                          ]
                        }
              }
      ]
    }
  }
}

The result is:

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 2,
      "relation" : "eq"
    },
    "max_score" : 0.0,
    "hits" : [
      {
        "_index" : "myindex-000001",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.0,
        "fields" : {
          "my_time" : [
            "2021-08-09T06:00:00.000Z"
          ]
        }
      },
      {
        "_index" : "myindex-000002",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 0.0,
        "fields" : {
          "my_time" : [
            "2021-08-09T14:00:00.000Z"
          ]
        }
      }
    ]
  },
  "aggregations" : {
    "agg_sum" : {
      "buckets" : [
        {
          "key_as_string" : "2021-08-09T06:00:00.000Z",
          "key" : 1628488800000,
          "doc_count" : 1,
          "aggregazione" : {
            "value" : 10.0
          }
        },
        {
          "key_as_string" : "2021-08-09T07:00:00.000Z",
          "key" : 1628492400000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-08-09T08:00:00.000Z",
          "key" : 1628496000000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-08-09T09:00:00.000Z",
          "key" : 1628499600000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-08-09T10:00:00.000Z",
          "key" : 1628503200000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-08-09T11:00:00.000Z",
          "key" : 1628506800000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-08-09T12:00:00.000Z",
          "key" : 1628510400000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-08-09T13:00:00.000Z",
          "key" : 1628514000000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-08-09T14:00:00.000Z",
          "key" : 1628517600000,
          "doc_count" : 1,
          "aggregazione" : {
            "value" : 10.0
          }
        }
      ]
    }
  }
}


Thanks for the support

Here in easy example :

PUT pippo-000001/_doc/1?refresh
{
  "date": "2021-09-09T08:00:00+02:00",
  "value": 1
}
PUT pippo-000001/_doc/2?refresh
{
  "date": "2021-09-09T08:00:00+00:00",
  "value": 2
}
PUT pippo-000001/_doc/3?refresh
{
  "date": "2021-09-09T08:00:00-06:00",
  "value": 3
}

And my query for the aggregation is

GET pippo-000001/_search?size=0
{
  "aggs": {
    "agg_sum": {
      "date_histogram": {
        "field": "date",
        "fixed_interval": "1h",
        "time_zone": "+2"
      },
      "aggs": {
        "aggregazione": {
          "sum": {
            "field": "value"
          }
        }
      }
    }
  },
  "size": 0,
  "fields": [
    {
      "field": "date",
      "format": "date_time"
    }
  ],
  "stored_fields": [
    "*"
  ],
  "query": {
    "bool": {
      "must": [],
      "filter": [
        {
          "range": {
            "date": {
                "gte": "2021-09-09T00:00:00.000+02:00",
                "lte": "2021-09-09T23:00:00.000-06:00",
              "format": "strict_date_optional_time"
            }
          }
        }
      ]
    }
  }
}

But the result is not correct.

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "agg_sum" : {
      "buckets" : [
        {
          "key_as_string" : "2021-09-09T08:00:00.000+02:00",
          "key" : 1631167200000,
          "doc_count" : 1,
          "aggregazione" : {
            "value" : 1.0
          }
        },
        {
          "key_as_string" : "2021-09-09T09:00:00.000+02:00",
          "key" : 1631170800000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-09-09T10:00:00.000+02:00",
          "key" : 1631174400000,
          "doc_count" : 1,
          "aggregazione" : {
            "value" : 2.0
          }
        },
        {
          "key_as_string" : "2021-09-09T11:00:00.000+02:00",
          "key" : 1631178000000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-09-09T12:00:00.000+02:00",
          "key" : 1631181600000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-09-09T13:00:00.000+02:00",
          "key" : 1631185200000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-09-09T14:00:00.000+02:00",
          "key" : 1631188800000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-09-09T15:00:00.000+02:00",
          "key" : 1631192400000,
          "doc_count" : 0,
          "aggregazione" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2021-09-09T16:00:00.000+02:00",
          "key" : 1631196000000,
          "doc_count" : 1,
          "aggregazione" : {
            "value" : 3.0
          }
        }
      ]
    }
  }
}

I want that at 8:00 the aggregations are the sum of 1,2 and 3.. because the data is added at 8:00 also if in different fuse

Anyone who can help me?

I don't believe Elasticsearch takes timezones into account when it does aggregations like that sorry to say.

2 Likes

I suspect you may need to extract and store the time in the local timezone in a separate field to achieve that.

1 Like

Is it possible ignore the timezone doing the aggregation? I meankeep only 08:00:00.000 ignoring +02:00?

If you don't pass that in, yes.

Timestamps are not stored as strings so you can not simply ignore the timezone and use the time.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.