Inner_hits query extremly slow

Hello!
I have query:

GET route/_search
{
  "stored_fields": [
    "userId"
  ],
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "userId": {
              "query": 30
            }
          }
        },
        {
          "nested": {
            "query": {
              "bool": {
                "must": [
                  {
                    "nested": {
                      "query": {
                        "match_all": {}
                      },
                      "path": "settings.customers",
                      "inner_hits": {
                        "from": 0,
                        "size": 10
                      }
                    }
                  },
                  {
                    "nested": {
                      "query": {
                        "match_all": {}
                      },
                      "path": "settings.regions",
                      "inner_hits": {
                        "from": 0,
                        "size": 300
                      }
                    }
                  }
          
                ]
              }
            },
            "path": "settings",
            "inner_hits": {
              "from": 0,
              "size": 10
            }
          }
        }
      ]
    }
  }
}

as you see, there are 2 nested query. I need implement pagination for second level entities (settings.customers). But this query is exteremly slow on index with 200к entries (~ 1.5 min for request) when threre are a lot on settings.customers length.

Mapping:

 "mappings": {
    "route": {
      "_source": {
        "enabled": true
      },
      "properties": {
        "userId": {
          "type": "integer"
        },
        "userName": {
          "type": "text",
          "analyzer": "rt_text",
          "fields": {
            "keyword": {
              "type": "keyword"
            }
          }
        },
        "structDepartment": {
          "type": "text",
          "analyzer": "rt_text",
          "fields": {
            "keyword": {
              "type": "keyword"
            }
          }
        },
        "structDepartmentId": {
          "type": "integer"
        },
        "isSpecial": {
          "type": "boolean"
        },
        "hasInn": {
          "type": "boolean"
        },
        "noInn": {
          "type": "boolean"
        },
        "settings": {
          "type": "nested",
          "properties": {
            "id": {
              "type": "integer"
            },
            "customers": {
              "type": "nested",
              "properties": {
                "id": {
                  "type": "integer"
                },
                "name": {
                  "type": "text",
                  "analyzer": "rt_text"
                },
                "inn": {
                  "type": "text"
                },
                "isKeyClient": {
                  "type": "boolean"
                },
                "userId": {
                  "type": "integer"
                },
                "settingId": {
                  "type": "integer"
                }
              }
            },
            "regions": {
              "type": "nested",
              "properties": {
                "id": {
                  "type": "integer"
                }
              }
            },
            "startPrice": {
              "type": "float"
            },
            "endPrice": {
              "type": "float"
            },
            "isFederal": {
              "type": "boolean"
            },
            "hasInn": {
              "type": "boolean"
            },
            "userId": {
              "type": "integer"
            },
            "regionCount": {
              "type": "integer"
            },
            "customersCount": {
              "type": "integer"
            }
          }
        }
      }
    }
  }

After some experiments, I noticed that the more the settings[] in the object, the longer the request is processed.

ES v 5.6.4

Hi @2849ab64d121e26de3a2, can you attach several hot threads dumps while you run the query?

Hi @mvg !
::: {TD10C37B943A72}{2Fa00TWSQbacxLa-0r7ohA}{oUfwX4BrSNKOqCKw9VqOOw}{127.0.0.1}{127.0.0.1:9300}
Hot threads at 2017-11-29T11:01:37.166Z, interval=500ms, busiestThreads=3, ignoreIdleThreads=true:

   87.5% (437.5ms out of 500ms) cpu usage by thread 'elasticsearch[TD10C37B943A72][search][T#3]'
     10/10 snapshots sharing following 15 elements
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:153)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:75)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:170)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:75)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:170)
       org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:426)
       org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:407)
       org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:404)
       org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:69)
       org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:654)
       org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:638)
       org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
       java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
       java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
       java.lang.Thread.run(Unknown Source)
   100.0% (500ms out of 500ms) cpu usage by thread 'elasticsearch[TD10C37B943A72][search][T#4]'
     5/10 snapshots sharing following 35 elements
       com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishString2(UTF8StreamJsonParser.java:2552)
       com.fasterxml.jackson.core.json.UTF8StreamJsonParser._finishAndReturnString(UTF8StreamJsonParser.java:2469)
       com.fasterxml.jackson.core.json.UTF8StreamJsonParser.getText(UTF8StreamJsonParser.java:315)
       org.elasticsearch.common.xcontent.json.JsonXContentParser.text(JsonXContentParser.java:86)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:385)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readMap(AbstractXContentParser.java:333)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:391)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readList(AbstractXContentParser.java:376)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:393)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readMap(AbstractXContentParser.java:333)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:391)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readList(AbstractXContentParser.java:376)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:393)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readMap(AbstractXContentParser.java:333)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readOrderedMap(AbstractXContentParser.java:300)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.mapOrdered(AbstractXContentParser.java:256)
       org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:141)
       org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:114)
       org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:92)
       org.elasticsearch.search.fetch.FetchPhase.createNestedSearchHit(FetchPhase.java:253)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:153)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:75)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:170)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:75)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:170)
       org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:426)
       org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:407)
       org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:404)
       org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:69)
       org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:654)
       org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:638)
       org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
       java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
       java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
       java.lang.Thread.run(Unknown Source)
     5/10 snapshots sharing following 29 elements
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:391)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readList(AbstractXContentParser.java:376)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:393)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readMap(AbstractXContentParser.java:333)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:391)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readList(AbstractXContentParser.java:376)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readValue(AbstractXContentParser.java:393)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readMap(AbstractXContentParser.java:333)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.readOrderedMap(AbstractXContentParser.java:300)
       org.elasticsearch.common.xcontent.support.AbstractXContentParser.mapOrdered(AbstractXContentParser.java:256)
       org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:141)
       org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:114)
       org.elasticsearch.common.xcontent.XContentHelper.convertToMap(XContentHelper.java:92)
       org.elasticsearch.search.fetch.FetchPhase.createNestedSearchHit(FetchPhase.java:253)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:153)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:75)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:170)
       org.elasticsearch.search.fetch.subphase.InnerHitsFetchSubPhase.hitsExecute(InnerHitsFetchSubPhase.java:75)
       org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:170)
       org.elasticsearch.search.SearchService.executeFetchPhase(SearchService.java:426)
       org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:407)
       org.elasticsearch.action.search.SearchTransportService$12.messageReceived(SearchTransportService.java:404)
       org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:69)
       org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:654)
       org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:638)
       org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37)
       java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
       java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
       java.lang.Thread.run(Unknown Source)

From your second hot thread I can see that a lot of time spent on source manipulation for the nested documents. Fetching _source from nested document has an overhead that is noticeable when fetching a lot of inner hits.

To avoid this overhead, you can also disable source feting for nested documents and return doc value fields instead by setting the following parameters to your inner_hits snippets:

"inner_hits": {
   ...
   "_source" : false,
   "docvalue_fields" : ["my_field1", "my_field2", ...]
}

@mvg!
Many thanks!
After changing the request, the time decreased from "504 response" to 50ms.
Perfectly!
ES is awesome!