Fast way to find recent unique fields

ivan.computer · April 17, 2019, 6:03pm

I'm using Jaeger for tracing with Elasticsearch 6.5.2 as the backend. Mapping looks like this:

mapping

{
  "jaeger-span-2019-04-16" : {
    "mappings" : {
      "span" : {
        "_all" : {
          "enabled" : false
        },
        "dynamic_templates" : [
          {
            "span_tags_map" : {
              "path_match" : "tag.*",
              "mapping" : {
                "ignore_above" : 256,
                "type" : "keyword"
              }
            }
          },
          {
            "process_tags_map" : {
              "path_match" : "process.tag.*",
              "mapping" : {
                "ignore_above" : 256,
                "type" : "keyword"
              }
            }
          }
        ],
        "properties" : {
          "duration" : {
            "type" : "long"
          },
          "flags" : {
            "type" : "integer"
          },
          "logs" : {
            "properties" : {
              "fields" : {
                "type" : "nested",
                "dynamic" : "false",
                "properties" : {
                  "key" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  },
                  "tagType" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  },
                  "value" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  }
                }
              },
              "timestamp" : {
                "type" : "long"
              }
            }
          },
          "operationName" : {
            "type" : "keyword",
            "ignore_above" : 256
          },
          "parentSpanID" : {
            "type" : "keyword",
            "ignore_above" : 256
          },
          "process" : {
            "properties" : {
              "serviceName" : {
                "type" : "keyword",
                "ignore_above" : 256
              },
              "tag" : {
                "type" : "object"
              },
              "tags" : {
                "type" : "nested",
                "dynamic" : "false",
                "properties" : {
                  "key" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  },
                  "tagType" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  },
                  "value" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  }
                }
              }
            }
          },
          "references" : {
            "type" : "nested",
            "dynamic" : "false",
            "properties" : {
              "refType" : {
                "type" : "keyword",
                "ignore_above" : 256
              },
              "spanID" : {
                "type" : "keyword",
                "ignore_above" : 256
              },
              "traceID" : {
                "type" : "keyword",
                "ignore_above" : 256
              }
            }
          },
          "spanID" : {
            "type" : "keyword",
            "ignore_above" : 256
          },
          "startTime" : {
            "type" : "long"
          },
          "startTimeMillis" : {
            "type" : "date",
            "format" : "epoch_millis"
          },
          "tag" : {
            "type" : "object"
          },
          "tags" : {
            "..." : "..."
          },
          "traceID" : {
            "type" : "keyword",
            "ignore_above" : 256
          }
        }
      },
      "_default_" : {
        "_all" : {
          "enabled" : false
        },
        "dynamic_templates" : [
          {
            "span_tags_map" : {
              "path_match" : "tag.*",
              "mapping" : {
                "ignore_above" : 256,
                "type" : "keyword"
              }
            }
          },
          {
            "process_tags_map" : {
              "path_match" : "process.tag.*",
              "mapping" : {
                "ignore_above" : 256,
                "type" : "keyword"
              }
            }
          }
        ]
      }
    }
  }
}

Individual documents are "spans" that have spanID and traceID. Multiple spans with the same traceID are considered a single "trace". UI has the need to find the latest X traces in some time span (let's say 1h) that match some criteria.

The current way of doing this is an aggregation like this:

{
  "aggregations": {
    "traceIDs": {
      "aggregations": {
        "startTime": {
          "max": {
            "field": "startTime"
          }
        }
      },
      "terms": {
        "field": "traceID",
        "order": [
          {
            "startTime": "desc"
          }
        ],
        "size": 20
      }
    }
  },
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "startTime": {
              "from": 1555437540000000,
              "include_lower": true,
              "include_upper": true,
              "to": 1555437600000000
            }
          }
        },
        {
          "match": {
            "process.serviceName": {
              "query": "nginx-ssl"
            }
          }
        }
      ]
    }
  },
  "size": 0
}

This seems reasonable, but it's unbearably slow: 20-40s to complete for 1h window. We have 4k spans/s for this service and each trace has two spans, amounting to ~330k hits (115k buckets then?).

{
  "took": 38372,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 331761,
    "max_score": 0,
    "hits": []
  },
  "other": "stuff"
}

If instead I query for the last X documents and count unique traceID myself, the time drops down to minuscule 26ms (instead of 38372ms):

{
  "took": 26,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 331761,
    "max_score": null,
    "hits": [
      "100 hits here"
    ]
  },
  "other": "stuff"
}

This is marvelous in terms of performance, but a bit lossy, since some large traces with many spans may dominate the search results (think 10k spans in one trace).

My question is how to do this sort of operation properly, so it's both faster than existing aggregation and more complete than naive search and manual unique on the client.

Related PR for Jaeger: https://github.com/jaegertracing/jaeger/pull/1475

system · May 15, 2019, 6:03pm

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
Search latest events and unique by field Elasticsearch	2	390	September 9, 2020
How to get Unique Records Elasticsearch	16	3882	December 10, 2020
Get newest value per distinct field Elasticsearch	3	1113	August 12, 2020
How to list one records when there are 4 similar records with no unique field in ELK Elasticsearch elastic-stack-alerting	4	314	January 31, 2023
Finding unique values of a field from the returned search results Elasticsearch	6	4036	July 5, 2017

Fast way to find recent unique fields

Related topics