First time query operations are pretty slow

sudheervarmam · October 1, 2019, 9:06am

For the first time elasticsearch query time is too long on a collection with 400+ million documents.The problem basically is "first time query operations are pretty slow".
However, we want to understand the ROOT CAUSE and unable to identify the same.

My setup consists of:

2 nodes with 8 cores and 16 GB RAM each and 5 GB allocated to Elasticsearch.
500 GB hard disk
doc size : 9kb

Top Command screenshots

I'm using this kind of mapping

{
  "event": {
    "mappings": {
      "doc": {
        "properties": {
          "@timestamp": {
            "type": "date"
          },
          "@version": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "event": {
            "properties": {
              "contextProperties": {
                "properties": {
                  "carId": {
                    "type": "text",
                    "fields": {
                      "keyword": {
                        "type": "keyword",
                        "ignore_above": 256
                      }
                    }
                  }
                }
              },
              "eventName": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "projectId": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "time": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "ts": {
                "type": "long"
              },
              "userId": {
                "type": "keyword",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "value": {
                "type": "long"
              }
            }
          },
          "id": {
            "type": "keyword",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "timestamp": {
            "type": "date"
          },
          "type": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    }
  }
}

And the type of query I usually make:

{
  "aggs": {
    "catView": {
      "date_histogram": {
        "field": "timestamp",
        "interval": "1d",
        "time_zone": "Asia/Kolkata",
        "min_doc_count": 1
      },
      "aggs": {
        "sub_Data": {
          "terms": {
            "field": "event.contextProperties.carId.keyword",
            "size": 13
          },
          "aggs": {
            "mou_Count": {
              "sum": {
                "field": "event.value"
              }
            }
          }
        }
      }
    }
  },
  "size": 0,
  "_source": {
    "excludes": []
  },
  "stored_fields": [
    "*"
  ],
  "script_fields": {},
  "docvalue_fields": [
    {
      "field": "timestamp",
      "format": "date_time"
    }
  ],
  "query": {
    "bool": {
      "must": [
        {
          "bool": {
            "minimum_should_match": 1,
            "should": [
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "12345"
                }
              },
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "NA"
                }
              }
            ]
          }
        },
        {
          "match_phrase": {
            "event.projectId.keyword": {
              "query": "123456"
            }
          }
        },
        {
          "range": {
            "timestamp": {
              "gte": 1569349800000,
              "lte": 1569868200000,
              "format": "epoch_millis"
            }
          }
        },
        {
          "bool": {
            "minimum_should_match": 1,
            "should": [
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "45678"
                }
              },
              {
                "match_phrase": {
                  "event.contextProperties.carId.keyword": "NA"
                }
              }
            ]
          }
        },
        {
          "match_phrase": {
            "event.projectId.keyword": {
              "query": "8765"
            }
          }
        }
      ],
      "filter": [
        {
          "match_all": {}
        },
        {
          "match_all": {}
        }
      ],
      "should": []
    }
  },
  "timeout": "30000ms"
}

On this query it has to search on the fields carouselId, projectId, eventName and timestamp and doing the aggregation on value.

Any recommendation for boosting/caching the first query is welcome. Thanks in advance

I’m using Elasticsearch 6.8.3.

system · October 29, 2019, 9:06am

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.

Topic		Replies	Views
First time queries (not filters) to Elasticsearch takes lot of time Elasticsearch	7	2654	February 1, 2017
First time query with different params is taking a lot of time Elasticsearch	18	996	January 24, 2019
Painfully slow queries Elasticsearch	11	3025	July 5, 2017
Regarding elasticsearch insert operations Elasticsearch	2	273	July 6, 2017
Query takes to much time in elastic compare to sphinx Elasticsearch	5	600	August 27, 2020

First time query operations are pretty slow

Related topics