First time query operations are pretty slow

For the first time elasticsearch query time is too long on a collection with 400+ million documents.The problem basically is "first time query operations are pretty slow".
However, we want to understand the ROOT CAUSE and unable to identify the same.

My setup consists of:

2 nodes with 8 cores and 16 GB RAM each and 5 GB allocated to elastic search.
500 GB hard disk
doc size : 9kb

Top Command screenshots

ES

I'm using this kind of mapping

{
  "event": {
    "mappings": {
      "doc": {
        "properties": {
          "@timestamp": {
            "type": "date"
          },
          "@version": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "event": {
            "properties": {
              "contextProperties": {
                "properties": {
                  "carId": {
                    "type": "text",
                    "fields": {
                      "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                      }
                    }
                  }
                }
              },
              "eventName": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "projectId": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "time": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "ts": {
                "type": "long"
              },
              "userId": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "value": {
                "type": "long"
              }
            }
          },
          "id": {
            "type": "keyword",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "timestamp": {
            "type": "date"
          },
          "type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    }
  }
}

And the type of query I usually make:

{
  "aggs": {
    "catView": {
      "date_histogram": {
        "field": "timestamp",
        "interval": "1d",
        "time_zone": "Asia/Kolkata",
        "min_doc_count": 1
      },
      "aggs": {
        "sub_Data": {
          "terms": {
            "field": "event.contextProperties.carId.keyword",
            "size": 13
          },
          "aggs": {
            "mou_Count": {
              "sum": {
                "field": "event.value"
              }
            }
          }
        }
      }
    }
  },
  "size": 0,
  "_source": {
    "excludes": []
  },
  "stored_fields": [
    "*"
  ],
  "script_fields": {},
  "docvalue_fields": [
    {
      "field": "timestamp",
      "format": "date_time"
    }
  ],
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "minimum_should_match": 1,
            "should": [
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "12345"
                }
              },
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "NA"
                }
              }
            ]
          }
        },
        {
          "match_phrase": {
            "event.projectId.keyword": {
              "query": "123456"
            }
          }
        },
        {
          "range": {
            "timestamp": {
              "gte": 1569349800000,
              "lte": 1569868200000,
              "format": "epoch_millis"
            }
          }
        },
        {
          "bool": {
            "minimum_should_match": 1,
            "should": [
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "45678"
                }
              },
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "NA"
                }
              }
            ]
          }
        },
        {
          "match_phrase": {
            "event.projectId.keyword": {
              "query": "8765"
            }
          }
        }
      ],
      "filter": [
        {
          "match_all": {}
        },
        {
          "match_all": {}
        }
      ],
      "should": []
    }
  },
  "timeout": "30000ms"
}

On this query it has to search on the fields carouselId, projectId, eventName and timestamp and doing the aggregation on value.

Any recommendation for boosting/caching the first query is welcome. Thanks in advance

I’m using elastic search 6.8.3.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.