Hi,
I have a problme with a cluster of 4 nodes with ES 2.4.0 upgraded from 2.3.4.
Currently I have 8 indexes, called url-0,url-1,...,url-8 with around 1B documents.
The template for each index is
{
"template" : "url-*",
"settings" : {
"number_of_shards" : "1",
"number_of_replicas" : "0",
"refresh_interval" : "180s"
},
"mappings" : {
"url" : {
"_all" : {"enabled" : false},
"_timestamp" : {"enabled" : false},
"_source": {"enabled": false},
"_ttl" : { "enabled" : false },
"properties" : {
"url" : {"type" : "string", "index": "not_analyzed", "store" : true},
"domain" : {"type": "string", "index" : "not_analyzed", "store" : false},
"scheme" : {"type": "string", "index" : "not_analyzed", "store" : false},
"parsedUrl" : {"type": "string", "index": "analyzed", "store": false},
"workerIndex": {"type": "integer", "store" : false},
"error": {"type": "boolean", "store" : false},
"banned": {"type": "boolean", "store" : false},
"timestamp": {"type": "date", "store" : true},
"depth": {"type": "integer", "store" : true},
"processed": {"type": "boolean", "store" : false}
}
}
}
When I execute the query
curl -XPOST localhost:9200/url/url/_search?search_type=scan&scroll=5m&size=50&_source=url&preference=_shards:0;_prefer_node:XqfWWAGFRdCPo98qIeFfGg
I get
{
"_scroll_id": "c2Nhbjs4OzMyNzUyNzpYcWZXV0FHRlJkQ1BvOThxSWVGZkdnOzQ3MTkzODplTDZZa3AxSlRVQ3lHd1FMTjR3T3NnOzgxMzY4ODpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzgxMzY4NzpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzgxMzY4NjpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzQ3MTkzOTplTDZZa3AxSlRVQ3lHd1FMTjR3T3NnOzMxNDc1NDpqbUdTWGRmWVRTdWFIT1FFSmZ1WVN3OzgxMzY4OTpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzE7dG90YWxfaGl0czoxMDEzOTAyNTk1Ow==",
"took": 8221,
"timed_out": false,
"_shards": {
"total": 8,
"successful": 8,
"failed": 0
},
"hits": {
"total": 1013902595,
"max_score": 0,
"hits": [ ]
}
}
but
If I query for the scrollId I get an error and a NPE on the server.
culr -XPOST localhost:9200/_search/scroll?scroll=1m -d '
{
"scroll_id": "c2Nhbjs4OzMyNzUyNzpYcWZXV0FHRlJkQ1BvOThxSWVGZkdnOzQ3MTkzODplTDZZa3AxSlRVQ3lHd1FMTjR3T3NnOzgxMzY4ODpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzgxMzY4NzpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzgxMzY4NjpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzQ3MTkzOTplTDZZa3AxSlRVQ3lHd1FMTjR3T3NnOzMxNDc1NDpqbUdTWGRmWVRTdWFIT1FFSmZ1WVN3OzgxMzY4OTpwSHVGaVJRM1JSYTIwaWRxNTlFbXNBOzE7dG90YWxfaGl0czoxMDEzOTAyNTk1Ow"
}'
result:
{
"_scroll_id": "c2NhbjswOzE7dG90YWxfaGl0czoxMDEzOTAyNTk1Ow==",
"took": 105,
"timed_out": false,
"_shards": {
"total": 8,
"successful": 0,
"failed": 8,
"failures": [
{
"shard": -1,
"index": null,
"reason": {
"type": "null_pointer_exception",
"reason": null
}
}
]
},
"hits": {
"total": 1013902595,
"max_score": 0,
"hits": [ ]
}
}
And in the logs:
RemoteTransportException[[c2m-es-1][][indices:data/read/search[phase/scan/scroll]]]; nested: NullPointerException;
Caused by: java.lang.NullPointerException
at org.elasticsearch.search.fetch.source.FetchSourceSubPhase.hitExecute(FetchSourceSubPhase.java:79)
at org.elasticsearch.search.fetch.FetchPhase.execute(FetchPhase.java:188)
at org.elasticsearch.search.SearchService.executeScan(SearchService.java:342)
Any hints?
PS:I got the original query from the logs of Elasticsearch-hadoop library, which I currently use with Apache Spark.
Marco
UPDATE: It turned out to be a problem related to the _source parameter in the query and the fact that the _source field is disabled in the mapping. Is that a bug?