First time query operations are pretty slow

For the first time elasticsearch query time is too long on a collection with 400+ million documents.The problem basically is "first time query operations are pretty slow".
However, we want to understand the ROOT CAUSE and unable to identify the same.

My setup consists of:

2 nodes with 8 cores and 16 GB RAM each and 5 GB allocated to Elasticsearch.
500 GB hard disk
doc size : 9kb

Top Command screenshots

ES

I'm using this kind of mapping

{
  "event": {
    "mappings": {
      "doc": {
        "properties": {
          "@timestamp": {
            "type": "date"
          },
          "@version": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "event": {
            "properties": {
              "contextProperties": {
                "properties": {
                  "carId": {
                    "type": "text",
                    "fields": {
                      "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                      }
                    }
                  }
                }
              },
              "eventName": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "projectId": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "time": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "ts": {
                "type": "long"
              },
              "userId": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "value": {
                "type": "long"
              }
            }
          },
          "id": {
            "type": "keyword",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "timestamp": {
            "type": "date"
          },
          "type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    }
  }
}

And the type of query I usually make:

{
  "aggs": {
    "catView": {
      "date_histogram": {
        "field": "timestamp",
        "interval": "1d",
        "time_zone": "Asia/Kolkata",
        "min_doc_count": 1
      },
      "aggs": {
        "sub_Data": {
          "terms": {
            "field": "event.contextProperties.carId.keyword",
            "size": 13
          },
          "aggs": {
            "mou_Count": {
              "sum": {
                "field": "event.value"
              }
            }
          }
        }
      }
    }
  },
  "size": 0,
  "_source": {
    "excludes": []
  },
  "stored_fields": [
    "*"
  ],
  "script_fields": {},
  "docvalue_fields": [
    {
      "field": "timestamp",
      "format": "date_time"
    }
  ],
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "minimum_should_match": 1,
            "should": [
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "12345"
                }
              },
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "NA"
                }
              }
            ]
          }
        },
        {
          "match_phrase": {
            "event.projectId.keyword": {
              "query": "123456"
            }
          }
        },
        {
          "range": {
            "timestamp": {
              "gte": 1569349800000,
              "lte": 1569868200000,
              "format": "epoch_millis"
            }
          }
        },
        {
          "bool": {
            "minimum_should_match": 1,
            "should": [
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "45678"
                }
              },
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "NA"
                }
              }
            ]
          }
        },
        {
          "match_phrase": {
            "event.projectId.keyword": {
              "query": "8765"
            }
          }
        }
      ],
      "filter": [
        {
          "match_all": {}
        },
        {
          "match_all": {}
        }
      ],
      "should": []
    }
  },
  "timeout": "30000ms"
}

On this query it has to search on the fields carouselId, projectId, eventName and timestamp and doing the aggregation on value.

Any recommendation for boosting/caching the first query is welcome. Thanks in advance

I’m using Elasticsearch 6.8.3.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.