How to normalize for time in Kibana

In my index, I am tracking the performance of widgets. I have the install date of each widget. I would like to determine which widgets have had the most number of parts replaced on them per time, accounting for the fact that not all widgets were installed at the same time.

How would I enter the formula: (total number of parts replaced on a widget)/(number of days that widget has been in service) in Kibana?

Thanks!

It's a really interesting question. I think we can do this with Timelion, but I would like to ask how many widgets you have and if the installation date is on each document or if it is stored in a single document.

Do you have few simple documents to play with?

Hi Luca, there are about 50 widgets. Each document contains that widget's install date. Is this something that can be done in Kibana? I was looking at the Average Bucket Aggregation - but I'm having a hard time understanding.

If not directly feasible on Kibana I'll propose to use transform Jobs.

I can't right now but I'll come back on this post.

What version are you on?

I am on version 6.8. Thanks!

Demo data

PUT widget?include_type_name=true
{
  "mappings": {
    "_doc": {
      "properties": {
        "widget_id": {
          "type": "long"
        },
        "installation_date": {
          "type": "date"
        },
        "@timestamp": {
          "type": "date"
        },
        "replaced_parts": {
          "type": "short"
        }
      }
    }
  }
}

PUT widget/_doc/1
{
  "widget_id": 1,
  "installation_date": "2020-03-27T07:47:16.316Z",
  "@timestamp": "2020-04-27T07:47:16.316Z",
  "replaced_parts": 0
}

PUT widget/_doc/2
{
  "widget_id": 2,
  "installation_date": "2019-08-07T07:47:16.316Z",
  "@timestamp": "2020-04-27T07:47:16.316Z",
  "replaced_parts": 0
}

PUT widget/_doc/3
{
  "widget_id": 3,
  "installation_date": "2018-08-07T07:47:16.316Z",
  "@timestamp": "2020-04-27T07:47:16.316Z",
  "replaced_parts": 0
}

PUT widget/_doc/4
{
  "widget_id": 1,
  "installation_date": "2020-04-27T07:47:16.316Z",
  "@timestamp": "2020-04-27T07:47:16.316Z",
  "replaced_parts": 1
}

PUT widget/_doc/5
{
  "widget_id": 2,
  "installation_date": "2019-08-07T07:47:16.316Z",
  "@timestamp": "2019-12-27T07:47:16.316Z",
  "replaced_parts": 1
}

PUT widget/_doc/6
{
  "widget_id": 3,
  "installation_date": "2018-08-07T07:47:16.316Z",
  "@timestamp": "2019-04-27T07:47:16.316Z",
  "replaced_parts": 2
}

PUT widget/_doc/7
{
  "widget_id": 1,
  "installation_date": "2020-04-27T07:47:16.316Z",
  "@timestamp": "2020-04-27T07:47:16.316Z",
  "replaced_parts": 0
}

PUT widget/_doc/8
{
  "widget_id": 2,
  "installation_date": "2019-08-07T07:47:16.316Z",
  "@timestamp": "2020-01-27T07:47:16.316Z",
  "replaced_parts": 1
}

PUT widget/_doc/9
{
  "widget_id": 3,
  "installation_date": "2018-08-07T07:47:16.316Z",
  "@timestamp": "2019-04-27T07:47:16.316Z",
  "replaced_parts": 3
}

Elasticsearch DSL

GET widget/_search
{
  "size": 0,
  "aggs": {
    "widget": {
      "terms": {
        "field": "widget_id",
        "size": 100
      },
      "aggs": {
        "installation_date": {
          "min": {
            "field": "installation_date"
          }
        },
        "repaired_parts": {
          "sum": {
            "field": "replaced_parts"
          }
        },
        "ratio": {
          "bucket_script": {
            "buckets_path": {
              "installation_date": "installation_date",
              "repaired_parts": "repaired_parts"
            },
            "script": """
ZonedDateTime now = ZonedDateTime.ofInstant(Instant.ofEpochMilli(new Date().getTime()), ZoneId.of('Z'));
ZonedDateTime installation_date = ZonedDateTime.ofInstant(Instant.ofEpochMilli(params.installation_date.longValue()), ZoneId.of('Z'));
return params.repaired_parts / Duration.between(installation_date, now).toDays();
            """
          }
        }
      }
    }
  }
}

Result

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 9,
    "max_score" : 0.0,
    "hits" : [ ]
  },
  "aggregations" : {
    "widget" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : 1,
          "doc_count" : 3,
          "installation_date" : {
            "value" : 1.585295236316E12,
            "value_as_string" : "2020-03-27T07:47:16.316Z"
          },
          "repaired_parts" : {
            "value" : 1.0
          },
          "ratio" : {
            "value" : 0.029411764705882353
          }
        },
        {
          "key" : 2,
          "doc_count" : 3,
          "installation_date" : {
            "value" : 1.565164036316E12,
            "value_as_string" : "2019-08-07T07:47:16.316Z"
          },
          "repaired_parts" : {
            "value" : 2.0
          },
          "ratio" : {
            "value" : 0.00749063670411985
          }
        },
        {
          "key" : 3,
          "doc_count" : 3,
          "installation_date" : {
            "value" : 1.533628036316E12,
            "value_as_string" : "2018-08-07T07:47:16.316Z"
          },
          "repaired_parts" : {
            "value" : 5.0
          },
          "ratio" : {
            "value" : 0.007911392405063292
          }
        }
      ]
    }
  }
}

Kibana 6.8

Table

If you have to display a table, the closest result I got is the following, which requires defining a scripted field I named installation_since_days in the index pattern:

ZonedDateTime now = ZonedDateTime.ofInstant(Instant.ofEpochMilli(new Date().getTime()), ZoneId.of('Z'));
ZonedDateTime installation_date = ZonedDateTime.ofInstant(Instant.ofEpochMilli(doc['installation_date'][0].getMillis()), ZoneId.of('Z'));
return Duration.between(installation_date, now).toDays();

You will not get the ratio, but the raw numbers.

The enhanced table plugin by @fbaligand should help you compute the ratio.

Vega

{
  "$schema": "https://vega.github.io/schema/vega-lite/v2.json",
  "data": {
    "url": {
      "%context%": true,
      "index": "widget",
      "body": {
        "size": 0,
        "aggs": {
          "widget": {
            "terms": {"field": "widget_id", "size": 100},
            "aggs": {
              "installation_date": {"min": {"field": "installation_date"}},
              "repaired_parts": {"sum": {"field": "replaced_parts"}},
              "ratio": {
                "bucket_script": {
                  "buckets_path": {
                    "installation_date": "installation_date",
                    "repaired_parts": "repaired_parts"
                  },
                  "script": "ZonedDateTime now = ZonedDateTime.ofInstant(Instant.ofEpochMilli(new Date().getTime()), ZoneId.of('Z'));ZonedDateTime installation_date = ZonedDateTime.ofInstant(Instant.ofEpochMilli(params.installation_date.longValue()), ZoneId.of('Z')); return params.repaired_parts / Duration.between(installation_date, now).toDays();"
                }
              }
            }
          }
        }
      }
    },
    "format": {"property": "aggregations.widget.buckets"}
  },
  "mark": "bar",
  "encoding": {
    "x": {"field": "key", "type": "ordinal", "axis": {"title": false}},
    "y": {"field": "ratio.value", "type": "quantitative", "axis": {"title": "Ratio"}}
  }
}

Visual result:

Elasticsearch SQL Variant

Elasticsearch SQL requires a Basic license.

This works only with 7.5+ because the DATE_DIFF was introduced on 7.5.

GET _sql?format=txt
{
  "query": "SELECT widget_id, SUM(replaced_parts)/DATE_DIFF('days', MIN(installation_date), TODAY()) AS ratio FROM widget GROUP BY widget_id"
}

Result

   widget_id   |       ratio        
---------------+--------------------
1              |0.030303030303030304
2              |0.007518796992481203
3              |0.00792393026941363 

This enables you to use Canvas!

Canvas

As a Table:

image

Expression code:

filters
| essql 
  query="SELECT widget_id, SUM(replaced_parts)/DATE_DIFF('days', MIN(installation_date), TODAY()) AS ratio FROM widget GROUP BY widget_id"
| table
| render

Or as a histogram:

image

Expression code:

filters
| essql 
  query="SELECT CAST(widget_id AS INTEGER), SUM(replaced_parts)/DATE_DIFF('days', MIN(installation_date), TODAY()) AS ratio FROM widget GROUP BY widget_id"
| alterColumn "widget_id" type="string"
| pointseries x="widget_id" y="ratio" color="widget_id"
| plot defaultStyle={seriesStyle bars=0.5} legend=false
| render

Other solutions

TSVB (with installation_date as long)

Another solution might exist with TSVB, but it requires to index the installation_date as long, expressed in epoch as min/max aggregations work only on numeric values.

This is an Ingest pipeline to index the installation_date converting it into a long into the field installation_date_long while indexing (you have to pass ?pipeline=convert_to_long):

PUT _ingest/pipeline/convert_to_long
{
  "description": "converts the date to epochmilli (long)",
  "processors": [
    {
      "script": {
        "lang": "painless",
        "source": "ctx.installation_date_long = ZonedDateTime.parse(ctx.installation_date).toInstant().toEpochMilli();"
      }
    }
  ]
}

It would work both for 6.8 and 7.x.

Transform Job

On 7.2+ we introduced Dataframes / Transform Jobs.

It is a way to define a continuous job which turns out time-based data into entity centric indices.

In your case, the entity is the widget.

POST _transform/_preview
{
  "source": {
    "index": [
      "widget"
    ]
  },
  "pivot": {
    "group_by": {
      "widget_id": {
        "terms": {
          "field": "widget_id"
        }
      }
    },
    "aggregations": {
      "installation_date": {
        "min": {
          "field": "installation_date"
        }
      },
      "repaired_parts": {
        "sum": {
          "field": "replaced_parts"
        }
      },
      "ratio" : {
         "bucket_script": {
            "buckets_path": {
              "installation_date": "installation_date",
              "repaired_parts": "repaired_parts"
            },
            "script": """
ZonedDateTime now = ZonedDateTime.ofInstant(Instant.ofEpochMilli(new Date().getTime()), ZoneId.of('Z'));
ZonedDateTime installation_date = ZonedDateTime.ofInstant(Instant.ofEpochMilli(params.installation_date.longValue()), ZoneId.of('Z'));
return params.repaired_parts / Duration.between(installation_date, now).toDays();
            """
          }
      }
    }
  }
}

Full transform:

PUT _transform/widget_process
{
  "source": {
    "index": [
      "widget"
    ]
  },
  "pivot": {
    "group_by": {
      "widget_id": {
        "terms": {
          "field": "widget_id"
        }
      }
    },
    "aggregations": {
      "installation_date": {
        "min": {
          "field": "installation_date"
        }
      },
      "repaired_parts": {
        "sum": {
          "field": "replaced_parts"
        }
      },
      "ratio": {
        "bucket_script": {
          "buckets_path": {
            "installation_date": "installation_date",
            "repaired_parts": "repaired_parts"
          },
          "script": "ZonedDateTime now = ZonedDateTime.ofInstant(Instant.ofEpochMilli(new Date().getTime()), ZoneId.of('Z')); ZonedDateTime installation_date = ZonedDateTime.ofInstant(Instant.ofEpochMilli(params.installation_date.longValue()), ZoneId.of('Z')); return params.repaired_parts / Duration.between(installation_date, now).toDays();"
        }
      }
    }
  },
  "dest": {
    "index": "widget_entity"
  },
  "sync": {
    "time": {
      "field": "@timestamp",
      "delay": "1h"
    }
  }
}

Thank you Luca, this looks super cool!

1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.