Kibana performance tuning

We are having pretty bad performance in our new environment we setup with one Kibana server and two Elasticsearch nodes. We followed the best practices and adjusted the heap size to 31GB for both values, disabled swapping and have each node set to 64GB (DDR4) memory and 12vCPU (Gen9 HPE with hyperthreading disabled). Our hyper-v nodes are not over subscribed on vCPU to CPU cores. The storage it is on is a 3PAR 8200 with enterprise SSD and two 16Gbps fiber paths per hyper-v node. Kibana is configured with 16GB of memory and 6vCPU.

Running htop on each node I see none of the cpu cores are maxed out but the memory is at 35.3GB/62.8GB. We have about a weeks worth of data from around 238 hosts totaling 1,953,491,523 documents. I have both nodes configured for hot data and machine learning and have configured each index template for two shards. Right now the index that takes up the most is from IIS access and error logs which I put in their own index which was copied from the legacy filebeat template and then next up is the packetbeat indexes. Each index rolls over when it hits close to 50GB.

It is so bad that Kibana won't even load the Index Management page sometimes and times out at 30000ms. Since we are new to Kibana/ES, I think it has to do with our configuration of shards or something related that may be the issue with performance. We did run a 2x graylog with a 2x ES cluster that performed a LOT better than this prior to switching to Kibana/ES. Unfortunately I do not have a document count from the graylog system.

What would be the next thing to start checkin to achieve better performance?

What is the output from the _cluster/stats?pretty&human API?

What version are you on?

Thank you for responding Mark. I am on version 7.12. Below is the output:

{
  "_nodes" : {
    "total" : 3,
    "successful" : 3,
    "failed" : 0
  },
  "cluster_name" : "######-lr-siem",
  "cluster_uuid" : "RlGV2pvsQ2-MGWrRvrI_FA",
  "timestamp" : 1619489888669,
  "status" : "green",
  "indices" : {
    "count" : 95,
    "shards" : {
      "total" : 182,
      "primaries" : 136,
      "replication" : 0.3382352941176471,
      "index" : {
        "shards" : {
          "min" : 1,
          "max" : 2,
          "avg" : 1.9157894736842105
        },
        "primaries" : {
          "min" : 1,
          "max" : 2,
          "avg" : 1.431578947368421
        },
        "replication" : {
          "min" : 0.0,
          "max" : 1.0,
          "avg" : 0.4842105263157895
        }
      }
    },
    "docs" : {
      "count" : 4199853626,
      "deleted" : 2294479
    },
    "store" : {
      "size" : "2tb",
      "size_in_bytes" : 2270941654262,
      "reserved" : "0b",
      "reserved_in_bytes" : 0
    },
    "fielddata" : {
      "memory_size" : "24.8mb",
      "memory_size_in_bytes" : 26055488,
      "evictions" : 0
    },
    "query_cache" : {
      "memory_size" : "1009.3mb",
      "memory_size_in_bytes" : 1058340419,
      "total_count" : 164654364,
      "hit_count" : 94400151,
      "miss_count" : 70254213,
      "cache_size" : 44709,
      "cache_count" : 174087,
      "evictions" : 129378
    },
    "completion" : {
      "size" : "0b",
      "size_in_bytes" : 0
    },
    "segments" : {
      "count" : 3579,
      "memory" : "141.5mb",
      "memory_in_bytes" : 148418280,
      "terms_memory" : "100.3mb",
      "terms_memory_in_bytes" : 105223096,
      "stored_fields_memory" : "3.7mb",
      "stored_fields_memory_in_bytes" : 3978536,
      "term_vectors_memory" : "0b",
      "term_vectors_memory_in_bytes" : 0,
      "norms_memory" : "48.3kb",
      "norms_memory_in_bytes" : 49472,
      "points_memory" : "0b",
      "points_memory_in_bytes" : 0,
      "doc_values_memory" : "37.3mb",
      "doc_values_memory_in_bytes" : 39167176,
      "index_writer_memory" : "227.3mb",
      "index_writer_memory_in_bytes" : 238416424,
      "version_map_memory" : "185.7kb",
      "version_map_memory_in_bytes" : 190203,
      "fixed_bit_set" : "356.4mb",
      "fixed_bit_set_memory_in_bytes" : 373786120,
      "max_unsafe_auto_id_timestamp" : 1619481608460,
      "file_sizes" : { }
    },
    "mappings" : {
      "field_types" : [
        {
          "name" : "alias",
          "count" : 1506,
          "index_count" : 38
        },
        {
          "name" : "binary",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "boolean",
          "count" : 4512,
          "index_count" : 74
        },
        {
          "name" : "byte",
          "count" : 4,
          "index_count" : 4
        },
        {
          "name" : "constant_keyword",
          "count" : 2,
          "index_count" : 1
        },
        {
          "name" : "date",
          "count" : 5194,
          "index_count" : 80
        },
        {
          "name" : "date_nanos",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "date_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "double",
          "count" : 1422,
          "index_count" : 32
        },
        {
          "name" : "double_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "flattened",
          "count" : 567,
          "index_count" : 27
        },
        {
          "name" : "float",
          "count" : 1699,
          "index_count" : 56
        },
        {
          "name" : "float_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "geo_point",
          "count" : 397,
          "index_count" : 50
        },
        {
          "name" : "geo_shape",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "half_float",
          "count" : 87,
          "index_count" : 21
        },
        {
          "name" : "integer",
          "count" : 294,
          "index_count" : 18
        },
        {
          "name" : "integer_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "ip",
          "count" : 3888,
          "index_count" : 50
        },
        {
          "name" : "ip_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "keyword",
          "count" : 129230,
          "index_count" : 80
        },
        {
          "name" : "long",
          "count" : 39202,
          "index_count" : 74
        },
        {
          "name" : "long_range",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "nested",
          "count" : 130,
          "index_count" : 44
        },
        {
          "name" : "object",
          "count" : 34983,
          "index_count" : 77
        },
        {
          "name" : "scaled_float",
          "count" : 405,
          "index_count" : 3
        },
        {
          "name" : "shape",
          "count" : 1,
          "index_count" : 1
        },
        {
          "name" : "short",
          "count" : 2729,
          "index_count" : 29
        },
        {
          "name" : "text",
          "count" : 4159,
          "index_count" : 72
        },
        {
          "name" : "wildcard",
          "count" : 1,
          "index_count" : 1
        }
      ]
    },
    "analysis" : {
      "char_filter_types" : [ ],
      "tokenizer_types" : [ ],
      "filter_types" : [ ],
      "analyzer_types" : [ ],
      "built_in_char_filters" : [ ],
      "built_in_tokenizers" : [ ],
      "built_in_filters" : [ ],
      "built_in_analyzers" : [
        {
          "name" : "whitespace",
          "count" : 1,
          "index_count" : 1
        }
      ]
    },
    "versions" : [
      {
        "version" : "7.12.0",
        "index_count" : 95,
        "primary_shard_count" : 136,
        "total_primary_size" : "2tb",
        "total_primary_bytes" : 2224941831072
      }
    ]
  },
  "nodes" : {
    "count" : {
      "total" : 3,
      "coordinating_only" : 0,
      "data" : 2,
      "data_cold" : 0,
      "data_content" : 2,
      "data_frozen" : 0,
      "data_hot" : 2,
      "data_warm" : 0,
      "ingest" : 2,
      "master" : 3,
      "ml" : 2,
      "remote_cluster_client" : 2,
      "transform" : 2,
      "voting_only" : 1
    },
    "versions" : [
      "7.12.0"
    ],
    "os" : {
      "available_processors" : 30,
      "allocated_processors" : 30,
      "names" : [
        {
          "name" : "Linux",
          "count" : 3
        }
      ],
      "pretty_names" : [
        {
          "pretty_name" : "Ubuntu 20.04.1 LTS",
          "count" : 3
        }
      ],
      "architectures" : [
        {
          "arch" : "amd64",
          "count" : 3
        }
      ],
      "mem" : {
        "total" : "141.1gb",
        "total_in_bytes" : 151524380672,
        "free" : "4.8gb",
        "free_in_bytes" : 5253730304,
        "used" : "136.2gb",
        "used_in_bytes" : 146270650368,
        "free_percent" : 3,
        "used_percent" : 97
      }
    },
    "process" : {
      "cpu" : {
        "percent" : 25
      },
      "open_file_descriptors" : {
        "min" : 355,
        "max" : 2275,
        "avg" : 1584
      }
    },
    "jvm" : {
      "max_uptime" : "4.9d",
      "max_uptime_in_millis" : 430621878,
      "versions" : [
        {
          "version" : "15.0.1",
          "vm_name" : "OpenJDK 64-Bit Server VM",
          "vm_version" : "15.0.1+9",
          "vm_vendor" : "AdoptOpenJDK",
          "bundled_jdk" : true,
          "using_bundled_jdk" : true,
          "count" : 3
        }
      ],
      "mem" : {
        "heap_used" : "31.8gb",
        "heap_used_in_bytes" : 34199977608,
        "heap_max" : "71.7gb",
        "heap_max_in_bytes" : 77087113216
      },
      "threads" : 665
    },
    "fs" : {
      "total" : "15.1tb",
      "total_in_bytes" : 16670732271616,
      "free" : "7.1tb",
      "free_in_bytes" : 7912097628160,
      "available" : "6.7tb",
      "available_in_bytes" : 7432241364992
    },
    "plugins" : [ ],
    "network_types" : {
      "transport_types" : {
        "security4" : 3
      },
      "http_types" : {
        "security4" : 3
      }
    },
    "discovery_types" : {
      "zen" : 3
    },
    "packaging_types" : [
      {
        "flavor" : "default",
        "type" : "deb",
        "count" : 3
      }
    ],
    "ingest" : {
      "number_of_pipelines" : 50,
      "processor_stats" : {
        "append" : {
          "count" : 2345768538,
          "failed" : 0,
          "current" : 0,
          "time" : "1.7h",
          "time_in_millis" : 6154981
        },
        "conditional" : {
          "count" : 16366964935,
          "failed" : 0,
          "current" : 1,
          "time" : "3.4d",
          "time_in_millis" : 294778638
        },
        "convert" : {
          "count" : 318817569,
          "failed" : 0,
          "current" : 0,
          "time" : "35.5m",
          "time_in_millis" : 2133701
        },
        "date" : {
          "count" : 2330224709,
          "failed" : 0,
          "current" : 0,
          "time" : "16.8h",
          "time_in_millis" : 60823880
        },
        "dot_expander" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "geoip" : {
          "count" : 13119001816,
          "failed" : 0,
          "current" : 1,
          "time" : "4.6d",
          "time_in_millis" : 403242271
        },
        "grok" : {
          "count" : 7111019976,
          "failed" : 111767639,
          "current" : 0,
          "time" : "3.5d",
          "time_in_millis" : 309660369
        },
        "gsub" : {
          "count" : 7115954,
          "failed" : 0,
          "current" : 0,
          "time" : "20.5s",
          "time_in_millis" : 20531
        },
        "json" : {
          "count" : 0,
          "failed" : 0,
          "current" : 0,
          "time" : "0s",
          "time_in_millis" : 0
        },
        "kv" : {
          "count" : 8578232,
          "failed" : 0,
          "current" : 0,
          "time" : "14.6m",
          "time_in_millis" : 877528
        },
        "lowercase" : {
          "count" : 8578232,
          "failed" : 0,
          "current" : 0,
          "time" : "17.3s",
          "time_in_millis" : 17399
        },
        "remove" : {
          "count" : 7153662373,
          "failed" : 0,
          "current" : 0,
          "time" : "7.3h",
          "time_in_millis" : 26315941
        },
        "rename" : {
          "count" : 8796738310,
          "failed" : 0,
          "current" : 0,
          "time" : "10.8h",
          "time_in_millis" : 39111785
        },
        "script" : {
          "count" : 18618742,
          "failed" : 0,
          "current" : 0,
          "time" : "9.9m",
          "time_in_millis" : 599101
        },
        "set" : {
          "count" : 5903691390,
          "failed" : 0,
          "current" : 0,
          "time" : "18.1h",
          "time_in_millis" : 65433534
        },
        "split" : {
          "count" : 1311932,
          "failed" : 0,
          "current" : 0,
          "time" : "53.4s",
          "time_in_millis" : 53488
        },
        "uri_parts" : {
          "count" : 111362667,
          "failed" : 0,
          "current" : 0,
          "time" : "12.6m",
          "time_in_millis" : 760138
        },
        "urldecode" : {
          "count" : 2247362986,
          "failed" : 11,
          "current" : 0,
          "time" : "1.8h",
          "time_in_millis" : 6723509
        },
        "user_agent" : {
          "count" : 2247362975,
          "failed" : 0,
          "current" : 0,
          "time" : "14.9h",
          "time_in_millis" : 53860049
        }
      }
    }
  }
}

It took 13367 ms to run this query btw

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.