What is the meaning of time_in_millis in _node stats

sample output

GET _nodes/dev-iad-cluster-data-4/stats/ingest
      "ingest" : {
        "total" : {
          "count" : 0,
          "time_in_millis" : 0,
          "current" : 0,
          "failed" : 0
        },

what is meaning of time_in_millis
we record at t1, value of time_in_millis, and count
we record at t2, value of time_in_millis and count
and then we compute latency as the diff of time_in_millis devivided by the diff of count
and we get back about 1400

this does not make sense as our event ingestion latency is in the order of 2-3ms, so we can only guess that time_in_millis is in microseconds.

Is this interpretation correct? Can someone who is familiar with apache lucene check if time_in_millis in _nodes/stats is actually in microseconds instead?

Thanks

time_in_millis is the total time spent in ingest pipelines for all the documents that went through it on that node, since the last JVM restart.

Does that make sense? Can you share output of t1 and t2 and we can take a look (and also ensure you did not restart that node in between).

we run the code here in a loop, and the code calculates the latency correctly in unit tests

def create_rates(curr, prev):
    diff = curr[NOW_KEY_NAME] - prev[NOW_KEY_NAME]
    count_diff = curr[COUNT_KEY_NAME] - prev[COUNT_KEY_NAME]
    time_in_millis = curr[TIME_KEY_NAME] - prev[TIME_KEY_NAME]
    rate = count_diff / diff
    load = time_in_millis / diff
    d = {RATE_KEY_NAME: rate,
         LOAD_KEY_NAME: load,
         LATENCY_KEY_NAME: load / rate if rate else 0
         }
    curr.update(d)

    curr_dict = {}
    for i in range(len(curr[PROCESSORS_KEY_NAME])):
        key = list(curr[PROCESSORS_KEY_NAME][i])[0]
        curr_dict[key] = curr[PROCESSORS_KEY_NAME][i][key]

    prev_dict = {}
    for i in range(len(prev[PROCESSORS_KEY_NAME])):
        key = list(prev[PROCESSORS_KEY_NAME][i])[0]
        prev_dict[key] = prev[PROCESSORS_KEY_NAME][i][key]

    now = curr
    for tag in curr_dict:
        if tag not in prev_dict:
            continue
        curr, prev = curr_dict[tag][STATS_KEY_NAME], prev_dict[tag][STATS_KEY_NAME]
        count_diff = curr[COUNT_KEY_NAME] - prev[COUNT_KEY_NAME]
        time_in_millis = curr[TIME_KEY_NAME] - prev[TIME_KEY_NAME]
        rate = count_diff / diff
        load = time_in_millis / diff
        d = {RATE_KEY_NAME: rate,
             LOAD_KEY_NAME: load,
             LATENCY_KEY_NAME: load / rate if rate else 0
             }
        if ":" in tag:
            now.update({tag: d})
            if LAG_KEY_NAME not in now:
                now[LAG_KEY_NAME] = {}
            now[LAG_KEY_NAME][tag] = d[LATENCY_KEY_NAME]


then
                file_path = f"/data/adp_metrics_{curr}_{pipeline_name}.json"
                curr_stats = {NOW_KEY_NAME: time.mktime(now.timetuple()),
                              **d['nodes'][curr]['ingest'][PIPELINE_KEY_NAME][pipeline_name]}
                if os.path.isfile(file_path):
                    with open(file_path) as file:
                        prev_stats = json.load(file)
                        create_rates(curr_stats, prev_stats)
                with open(file_path, "w+") as file:
                    json.dump(curr_stats, file, indent=2)

here are two data points, t1 and t2 are the @timestamp in each records

{
  "_index": "adp-metrics-2020.10.19-000006",
  "_type": "_doc",
  "_id": "65NhQXUBQfB9ex7MdjQy",
  "_version": 1,
  "_score": null,
  "_source": {
    "kubernetes": {
      "container": {
        "image": "iad.ocir.io/internalcloudopssecurity/scse/pylite:1.0.15",
        "name": "adp-tf-metrics"
      },
      "node": {
        "name": "10.128.1.133"
      },
      "pod": {
        "uid": "9a13e04e-86a7-45c7-9325-6d6f1f93626d",
        "name": "metrics-cron-1603119540-vrxz7"
      },
      "namespace": "tf",
      "labels": {
        "job-name": "metrics-cron-1603119540",
        "controller-uid": "0b6283a9-b8e3-4297-af84-8971c1358538"
      }
    },
    "agent": {
      "hostname": "monitoring-filebeat-filebeat-jmtj6",
      "id": "c84ea6ca-454b-4c39-b44b-8c076e9e3ee6",
      "ephemeral_id": "7d460056-f4f2-4e67-a87e-833b26a75336",
      "type": "filebeat",
      "version": "7.7.1"
    },
    "log": {
      "file": {
        "path": "/var/log/containers/metrics-cron-1603119540-vrxz7_tf_adp-tf-metrics-baf63a27fa7bdc4bb1b89a2ab7411569f2573789884b0a38c8e56410e3be788b.log"
      },
      "offset": 59951
    },
    "@metadata": {
      "beat": "filebeat",
      "type": "_doc",
      "version": "7.7.1"
    },
    "oke": {
      "name": "iad-c-prod"
    },
    "logstashhost": "10.244.2.131",
    "type": "clusterlog",
    "adp": {
      "set:event.invesgested_load": 3.156862745098039,
      "set:agent.type_latency": 0.00043089255734732784,
      "set:process.exit_code,EINPROGRESS_load": 0,
      "enrich:dns_ip_enrich_policy_load": 487122.70588235295,
      "enrich:scanners_host_enrich_policy_latency": 297.80717170570296,
      "set:url.siem_latency": 0.0016666666666666666,
      "enrich:scanners_host_enrich_policy_rate": 1611.8169934640523,
      "enrich:scanners_host_enrich_policy_load": 480010.66013071896,
      "enrich:dns_ip_enrich_policy_rate": 1615.3986928104575,
      "set:process.exit_code,EINPROGRESS_latency": 0,
      "lag_set:agent.type": 0.00043089255734732784,
      "lag_set:url.siem": 0.0016666666666666666,
      "set:url.auditd_latency": 0.0024390243902439024,
      "set:url.cronjob_latency": 0.00167479674796748,
      "lag_set:url.auditd": 0.0024390243902439024,
      "set:url.osquery_latency": 0.001475609756097561,
      "set:event.invesgested_rate": 1607.8496732026144,
      "lag_set:event.invesgested": 0.001963406652818484,
      "lag_set:process.exit_code,ENOENT": 0.0004414361389052384,
      "enrich:dns_ip_enrich_policy_latency": 301.54952337794754,
      "set:url.osquery_load": 2.372549019607843,
      "set:url.baseinfo_rate": 1607.8431372549019,
      "count": 2190647057,
      "set:url.osquery_rate": 1607.8431372549019,
      "set:url.baseinfo_load": 2.241830065359477,
      "failed": 0,
      "lag_set:url.network": 0.0034390243902439025,
      "node": "scs-c-prod-iad-adp-elk-hot-14",
      "set:process.exit_code,ENOENT_load": 0.0392156862745098,
      "time_in_millis": 1257311973991,
      "set:process.exit_code,ENOENT_rate": 88.83660130718954,
      "set:event.created_rate": 1607.8496732026144,
      "lag_set:event.created": 0.0013536530339307562,
      "set:url.siem_rate": 1607.8431372549019,
      "rename:source.ip_rate": 1607.8496732026144,
      "rename:source.ip_load": 2.281045751633987,
      "set:process.exit_code,ENOENT_latency": 0.0004414361389052384,
      "set:url.siem_load": 2.679738562091503,
      "set:event.created_load": 2.176470588235294,
      "lag_set:url.osquery": 0.001475609756097561,
      "enrich:scanners_ip_enrich_policy_load": 471912.7124183007,
      "latency": 1199.862326037742,
      "enrich:scanners_ip_enrich_policy_rate": 1616.9738562091504,
      "set:event.invesgested_latency": 0.001963406652818484,
      "set:datastudio.url_load": 0.0718954248366013,
      "set:datastudio.url_rate": 1607.8431372549019,
      "set:agent.type_load": 0.6928104575163399,
      "set:agent.type_rate": 1607.8496732026144,
      "set:url.auditd_rate": 1607.8431372549019,
      "current": 448,
      "load": 1950278.183006536,
      "rate": 1625.4183006535948,
      "set:process.exit_code_EPERM_rate": 1.196078431372549,
      "lag_set:url.baseinfo": 0.001394308943089431,
      "lag_set:process.exit_code_EPERM": 0,
      "lag_set:datastudio.url": 0.000044715447154471546,
      "set:process.exit_code_EPERM_load": 0,
      "set:url.network_rate": 1607.8431372549019,
      "set:url.network_load": 5.529411764705882,
      "set:event.created_latency": 0.0013536530339307562,
      "lag_enrich:dns_ip_enrich_policy": 301.54952337794754,
      "set:url.network_latency": 0.0034390243902439025,
      "set:url.cronjob_rate": 1607.8431372549019,
      "enrich:failed_root_logins_enrich_policy_rate": 1611.2091503267975,
      "enrich:failed_root_logins_enrich_policy_load": 496451.4052287582,
      "enrich:failed_root_logins_enrich_policy_latency": 308.1235016124779,
      "lag_rename:source.ip": 0.0014186934199454473,
      "set:url.baseinfo_latency": 0.001394308943089431,
      "lag_set:url.cronjob": 0.00167479674796748,
      "lag_enrich:scanners_ip_enrich_policy": 291.8493150684931,
      "lag_enrich:scanners_host_enrich_policy": 297.80717170570296,
      "set:url.auditd_load": 3.9215686274509802,
      "set:url.cronjob_load": 2.69281045751634,
      "enrich:scanners_ip_enrich_policy_latency": 291.8493150684931,
      "pipeline": "adp_audit",
      "@timestamp": 1603119705,
      "lag_set:process.exit_code,EINPROGRESS": 0,
      "set:datastudio.url_latency": 0.000044715447154471546,
      "rename:source.ip_latency": 0.0014186934199454473,
      "set:process.exit_code,EINPROGRESS_rate": 3.6013071895424837,
      "lag_enrich:failed_root_logins_enrich_policy": 308.1235016124779,
      "set:process.exit_code_EPERM_latency": 0
    },
    "cloud": {
      "instance": {
        "name": "iad-c-prod-2-cluster"
      }
    },
    "input": {
      "type": "container"
    },
    "lag": {
      "total": 0.21000003814697266
    },
    "@timestamp": "2020-10-19T15:01:46.712Z",
    "ecs": {
      "version": "1.5.0"
    },
    "stream": "stdout",
    "@version": "1",
    "host": {
      "hostname": "monitoring-filebeat-filebeat-jmtj6",
      "os": {
        "kernel": "4.14.35-1902.7.3.1.el7uek.x86_64",
        "codename": "Core",
        "name": "CentOS Linux",
        "family": "redhat",
        "version": "7 (Core)",
        "platform": "centos"
      },
      "containerized": false,
      "ip": [
        "10.241.1.53"
      ],
      "name": "monitoring-filebeat-filebeat-jmtj6",
      "id": "8cc8b83c2903418e8e1fa9d45d4fc3e5",
      "mac": [
        "5e:1b:54:02:6c:98"
      ],
      "architecture": "x86_64"
    },
    "event": {
      "ingested": "2020-10-19T15:01:47.697Z",
      "kind": "metric",
      "created": "2020-10-19T15:01:45.588482",
      "category": "elasticsearch",
      "type": "info",
      "dataset": "pipeline",
      "outcome": "success"
    }
  },
  "fields": {
    "event.ingested": [
      "2020-10-19T15:01:47.697Z"
    ],
    "@timestamp": [
      "2020-10-19T15:01:46.712Z"
    ],
    "event.created": [
      "2020-10-19T15:01:45.588Z"
    ]
  },
  "highlight": {
    "adp.pipeline": [
      "@kibana-highlighted-field@adp_audit@/kibana-highlighted-field@"
    ],
    "adp.node": [
      "@kibana-highlighted-field@scs-c-prod-iad-adp-elk-hot-14@/kibana-highlighted-field@"
    ],
    "event.dataset": [
      "@kibana-highlighted-field@pipeline@/kibana-highlighted-field@"
    ]
  },
  "sort": [
    1603119707697
  ]
}
{
  "_index": "adp-metrics-2020.10.19-000006",
  "_type": "_doc",
  "_id": "7JNiQXUBQfB9ex7M1px_",
  "_version": 1,
  "_score": null,
  "_source": {
    "kubernetes": {
      "container": {
        "image": "iad.ocir.io/internalcloudopssecurity/scse/pylite:1.0.15",
        "name": "adp-tf-metrics"
      },
      "node": {
        "name": "10.128.1.133"
      },
      "pod": {
        "uid": "501a0523-b84b-483a-9f8d-8c5a8a405d81",
        "name": "metrics-cron-1603119660-jgv5x"
      },
      "namespace": "tf",
      "labels": {
        "job-name": "metrics-cron-1603119660",
        "controller-uid": "9af48389-b5b5-4ff0-8924-417107d28e20"
      }
    },
    "agent": {
      "hostname": "monitoring-filebeat-filebeat-jmtj6",
      "id": "c84ea6ca-454b-4c39-b44b-8c076e9e3ee6",
      "type": "filebeat",
      "ephemeral_id": "7d460056-f4f2-4e67-a87e-833b26a75336",
      "version": "7.7.1"
    },
    "log": {
      "file": {
        "path": "/var/log/containers/metrics-cron-1603119660-jgv5x_tf_adp-tf-metrics-a8585a1b5a125050de45d9e48aad190f46d3ef066ce087f13cea1c3aa851cd9d.log"
      },
      "offset": 8168
    },
    "@metadata": {
      "beat": "filebeat",
      "type": "_doc",
      "version": "7.7.1"
    },
    "oke": {
      "name": "iad-c-prod"
    },
    "logstashhost": "10.244.2.131",
    "type": "clusterlog",
    "adp": {

      "set:url.auditd_latency": 0.0010150140330799206,
      "set:url.cronjob_latency": 0.0006744190799288795,
      "lag_set:url.auditd": 0.0010150140330799206,
      "set:url.osquery_latency": 0.0034675150383879118,
      "set:event.invesgested_rate": 1649.426966292135,
      "lag_set:event.invesgested": 0.0010694895741796607,
      "lag_set:process.exit_code,ENOENT": 0,
      "enrich:dns_ip_enrich_policy_latency": 287.27606594105686,
      "set:url.osquery_load": 5.719101123595506,
      "set:url.baseinfo_rate": 1649.3370786516855,
      "count": 2190789529,
      "set:url.osquery_rate": 1649.3370786516855,
      "set:url.baseinfo_load": 1.1123595505617978,
      "failed": 0,
      "lag_set:url.network": 0.0019687050825289345,
      "node": "scs-c-prod-iad-adp-elk-hot-14",
      "set:process.exit_code,ENOENT_load": 0,
      "time_in_millis": 1257475481745,
      "latency": 1147.6483379190297,
      "@timestamp": 1603119794,
    },
    "cloud": {
      "instance": {
        "name": "iad-c-prod-2-cluster"
      }
    },
    "input": {
      "type": "container"
    },
    "@timestamp": "2020-10-19T15:03:15.942Z",
    "lag": {
      "total": 0.32399988174438477
    },
    "ecs": {
      "version": "1.5.0"
    },
    "stream": "stdout",
    "@version": "1",
    "host": {
      "hostname": "monitoring-filebeat-filebeat-jmtj6",
      "os": {
        "kernel": "4.14.35-1902.7.3.1.el7uek.x86_64",
        "codename": "Core",
        "name": "CentOS Linux",
        "family": "redhat",
        "version": "7 (Core)",
        "platform": "centos"
      },
      "containerized": false,
      "ip": [
        "10.241.1.53"
      ],
      "name": "monitoring-filebeat-filebeat-jmtj6",
      "id": "8cc8b83c2903418e8e1fa9d45d4fc3e5",
      "mac": [
        "5e:1b:54:02:6c:98"
      ],
      "architecture": "x86_64"
    },
    "event": {
      "ingested": "2020-10-19T15:03:17.885Z",
      "kind": "metric",
      "created": "2020-10-19T15:03:14.529177",
      "category": "elasticsearch",
      "type": "info",
      "dataset": "pipeline",
      "outcome": "success"
    }
  },
  "fields": {
    "event.ingested": [
      "2020-10-19T15:03:17.885Z"
    ],
    "@timestamp": [
      "2020-10-19T15:03:15.942Z"
    ],
    "event.created": [
      "2020-10-19T15:03:14.529Z"
    ]
  },

}

just look into these two from the above
t1 is 1603119705

      "count": 2190647057,
      "time_in_millis": 1257311973991,
      "latency": 1199.862326037742,
      "@timestamp": 1603119705,

t2 is 1603119794

     "count": 2190789529,
      "node": "scs-c-prod-iad-adp-elk-hot-14",
      "time_in_millis": 1257475481745,
      "latency": 1147.6483379190297,
      "@timestamp": 1603119794,
>>> (1257475481745-1257311973991)/(2190789529-2190647057)

1147.6483379190297

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.