Why is my query slow?

Hi,

The following query is taking around 2s to complete. It seems very slow knowing that I will need to search for far more documents.

The query:

POST /mail_logs/_search 
{
    "fields": [
        "MAIL_ID",
        "SENDER",
        "RECEIVER",
        "STATUS",
        "INSTANCE",
        "SERVER"
    ],
    "size":10000,
    "query": {
        "filtered": {
            "filter": {
               "bool": {
                    "must": [
                        {
                            "range" : {
                                "CREATEDTIME" : {
                                    "gte":"2016-01-29 00:00:00",
                                    "lt" :"2016-01-30 00:00:00"
                                }
                            }
                        },
                        {
                            "query": {
                                "query_string" : {
                                    "default_field" : "STATUS.raw",
                                    "query" : "sent OR bounced OR deferred OR \"Email moved into the appropriate queue\""
                    			}
                            }
                        }
                    ]
                }
            } 
        }
    } 
}

The response:

{
    "took": 1968,
    "timed_out": false,
    "_shards": {
        "total": 3,
        "successful": 3,
        "failed": 0
    },
    "hits": {
        "total": 190090,
        "max_score": 1,
        "hits": [
            ...
        ]
    }
}

Others information:
Elasticsearch 2.3.3
There is 10M documents in the index.
3 shards.
16GB RAM.

How can I improve the performance of my query?
Thank you.

The mapping:

{
        "mail_logs": {
            "mappings": {
                "jdbc": {
                    "properties": {
                        "CREATEDTIME": {
                            "type": "date",
                            "format": "yyyy-MM-dd HH:mm:ss"
                        },
                        "ERROR": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "FLAG_ALERT": {
                            "type": "long"
                        },
                        "INSTANCE": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "MAIL_ID": {
                            "type": "string"
                        },
                        "MESSAGE_ID": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "RECEIVER": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "RELAY": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "SENDER": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "SERVER": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        },
                        "STATUS": {
                            "type": "string",
                            "fields": {
                                "raw": {
                                    "type": "string",
                                    "index": "not_analyzed"
                                }
                            }
                        }
                   }
               }
    	    }
        }
    }

Elasticsearch.yml:

script.engine.groovy.inline.aggs: on
script.engine.groovy.inline.update: on
script.engine.groovy.inline.search: on
script.groovy.sandbox.enabled: true
index.codec: best_compression
indices.fielddata.cache.size: 20%
node.master: true
node.data: true
bootstrap.mlockall: true
discovery.zen.ping.unicast.hosts: ["127.0.0.1"]
marvel.agent.exporters:
id1:
type: http
host: ["http://localhost:9200"]
marvel.agent.interval: 120s
index.merge.scheduler.max_thread_count: 1

Take a look at the Profile API

Do you need 10,000 hits in one response? Could you paginate or use the scroll API here?

I actually use the scoll api. But i need to scroll through 190 000 docs. And it takes 25 seconds to finish.

As @Glen_Smith noted, the Profile API with Elasticsearch 2.2+ should provide more details over which parts are taking the most time.

You're using a filtered query filter in the query; I'd update this to use bool filter as recommended in the docs. Additionally, since you're doing OR operations on a not_analyzed string field in the query string query, could you use a terms query here? Are you looking for exact matches to the provided terms? And should scores be computed for matching documents based on this query, or should they simply match or not match i.e. should this simply be a filter? I suspect you don't want scores computed.

Moving to a terms query in the filter context and converting to a bool filter, the query would look like

{
  "size": 10000,
  "fields": [
    "MAIL_ID",
    "SENDER",
    "RECEIVER",
    "STATUS",
    "INSTANCE",
    "SERVER"
  ],
  "query": {
    "bool": {
      "filter": [
        {
          "range": {
            "CREATEDTIME": {
              "gte": "2016-01-29 00:00:00",
              "lt": "2016-01-30 00:00:00"
            }
          }
        },
        {
          "terms": {
            "STATUS.raw": [
              "sent",
              "bounced",
              "deferred",
              "Email moved into the appropriate queue"
            ]
          }
        }
      ]
    }
  }
}

No, I don't need the score. I tried your version but I didn't see a huge amelioration.

I used the profile API but I'm not sure how to interpret it: (I didn't put the data for the 2 others shards but they are pretty similar to the first one)

   "profile": {
	"shards": [
		{
			"id": "[RgUxGA4YRzm3nXrjwLe5zQ][mail_logs][0]",
			"searches": [
				{
					"query": [
						{
							"query_type": "BooleanQuery",
							"lucene": "#CREATEDTIME:[1454025600000 TO 1454112000000} #ConstantScore(STATUS.raw:Email moved into the appropriate queue STATUS.raw:bounced STATUS.raw:deferred STATUS.raw:sent)",
							"time": "48.12477700ms",
							"breakdown": {
								"match": 0,
								"score": 2815607,
								"create_weight": 945792,
								"build_scorer": 1327916,
								"next_doc": 27191608,
								"advance": 0
							},
							"children": [
								{
									"query_type": "MultiTermQueryConstantScoreWrapper",
									"lucene": "CREATEDTIME:[1454025600000 TO 1454112000000}",
									"time": "9.296138000ms",
									"breakdown": {
										"match": 0,
										"score": 0,
										"create_weight": 2026,
										"build_scorer": 1291446,
										"next_doc": 5322958,
										"advance": 2679708
									}
								},
								{
									"query_type": "ConstantScoreQuery",
									"lucene": "ConstantScore(STATUS.raw:Email moved into the appropriate queue STATUS.raw:bounced STATUS.raw:deferred STATUS.raw:sent)",
									"time": "6.547716000ms",
									"breakdown": {
										"match": 0,
										"score": 0,
										"create_weight": 885008,
										"build_scorer": 10536,
										"next_doc": 0,
										"advance": 4026820
									},
									"children": [
										{
											"query_type": "BooleanQuery",
											"lucene": "STATUS.raw:Email moved into the appropriate queue STATUS.raw:bounced STATUS.raw:deferred STATUS.raw:sent",
											"time": "1.625352000ms",
											"breakdown": {
												"match": 0,
												"score": 0,
												"create_weight": 857048,
												"build_scorer": 0,
												"next_doc": 0,
												"advance": 0
											},
											"children": [
												{
													"query_type": "TermQuery",
													"lucene": "STATUS.raw:Email moved into the appropriate queue",
													"time": "0.3468720000ms",
													"breakdown": {
														"match": 0,
														"score": 0,
														"create_weight": 346872,
														"build_scorer": 0,
														"next_doc": 0,
														"advance": 0
													}
												},
												{
													"query_type": "TermQuery",
													"lucene": "STATUS.raw:bounced",
													"time": "0.1316970000ms",
													"breakdown": {
														"match": 0,
														"score": 0,
														"create_weight": 131697,
														"build_scorer": 0,
														"next_doc": 0,
														"advance": 0
													}
												},
												{
													"query_type": "TermQuery",
													"lucene": "STATUS.raw:deferred",
													"time": "0.1430440000ms",
													"breakdown": {
														"match": 0,
														"score": 0,
														"create_weight": 143044,
														"build_scorer": 0,
														"next_doc": 0,
														"advance": 0
													}
												},
												{
													"query_type": "TermQuery",
													"lucene": "STATUS.raw:sent",
													"time": "0.1466910000ms",
													"breakdown": {
														"match": 0,
														"score": 0,
														"create_weight": 146691,
														"build_scorer": 0,
														"next_doc": 0,
														"advance": 0
													}
												}
											]
										}
									]
								}
							]
						},
						{
							"query_type": "BooleanQuery",
							"lucene": "",
							"time": "0.05389400000ms",
							"breakdown": {
								"match": 0,
								"score": 0,
								"create_weight": 3647,
								"build_scorer": 50247,
								"next_doc": 0,
								"advance": 0
							}
						},
						{
							and 18 others BooleanQuery...
						}
					],
					"rewrite_time": 164923,
					"collector": [
						{
							"name": "SimpleTopScoreDocCollector",
							"reason": "search_top_hits",
							"time": "18.13502800ms"
						}
					]
				}
			]
		}
	]
}

I started again from a fresh install of ElasticSearch. The time got down to 200ms. So i guess the problem was from some config.
Thanks for the help.

No worries, glad you got it sorted :slight_smile: