*we deploy elasticsearch 5. version using ngram char split in our 5 **
servers which one of them is master node, and others are data nodes.
settings as follows:
{ "trimps2": { "settings": { "index": { "number_of_shards": "5", "provided_name": "my_ngram_resource", "creation_date": "1496286488490", "analysis": { "analyzer": { "charSplit": { "type": "custom", "tokenizer": "ngram_tokenizer" } }, "tokenizer": { "ngram_tokenizer": { "token_chars": [ "letter", "digit", "punctuation" ], "min_gram": "1", "type": "nGram", "max_gram": "1" } } }, "number_of_replicas": "0", "uuid": "QAs4jqBfTt2xiO0OMFOXzQ", "version": { "created": "5030099" } } } } }
mapping as follows:
{ "trimps2": { "mappings": { "tb": { "properties": { "address": { "type": "text", "store": true, "analyzer": "charSplit" }, "id_card": { "type": "text", "store": true, "analyzer": "charSplit" }, "mobile_phone": { "type": "text", "store": true, "analyzer": "charSplit" }, "phone": { "type": "text", "store": true, "analyzer": "charSplit" }, "post_code": { "type": "text", "store": true, "analyzer": "charSplit" }, "realname": { "type": "text", "store": true, "analyzer": "charSplit" }, "username": { "type": "text", "store": true, "analyzer": "charSplit" } } } } } }
we find it return data very slowly when we use query phrase as follows:
POST trimps2/tb/_search
{ "query": { "multi_match": { "query": "138141", "type": "phrase", "slop": 0, "fields": [ "username", "realname", "phone", "mobile_phone", "id_card", "address", "post_code" ], "analyzer": "charSplit", "max_expansions": 1 } }, "profile": true, "from": 100 }
then, the return data as follows:
"profile": { "shards": [ { "id": "[TZLIzKdMShijRmqi5q9ZMw][trimps2][1]", "searches": [ { "query": [ { "type": "BooleanQuery", "description": "+(address:"1 3 8 1 4 1" | id_card:"1 3 8 1 4 1" | mobile_phone:"1 3 8 1 4 1" | phone:"1 3 8 1 4 1" | post_code:"1 3 8 1 4 1" | realname:"1 3 8 1 4 1" | username:"1 3 8 1 4 1") #(ConstantScore(_type:tb))^0.0", "time": "83253.58251ms", "breakdown": { "score": 24716112935, "build_scorer_count": 1, "match_count": 31173147, "create_weight": 84788168, "next_doc": 35685629647, "match": 22691767834, "create_weight_count": 1, "next_doc_count": 31173148, "score_count": 11170, "build_scorer": 12926458, "advance": 0, "advance_count": 0 }, "children": [ { "type": "DisjunctionMaxQuery", "description": "(address:"1 3 8 1 4 1" | id_card:"1 3 8 1 4 1" | mobile_phone:"1 3 8 1 4 1" | phone:"1 3 8 1 4 1" | post_code:"1 3 8 1 4 1" | realname:"1 3 8 1 4 1" | username:"1 3 8 1 4 1")", "time": "69752.35071ms", "breakdown": { "score": 24713740053, "build_scorer_count": 1, "match_count": 31173147, "create_weight": 84741719, "next_doc": 0, "match": 20160625790, "create_weight_count": 1, "next_doc_count": 0, "score_count": 11170, "build_scorer": 332724, "advance": 24730552960, "advance_count": 31173148 }, "children": [ { "type": "PhraseQuery", "description": "address:"1 3 8 1 4 1"", "time": "4293.176204ms", "breakdown": { "score": 1278093283, "build_scorer_count": 1, "match_count": 651714, "create_weight": 31859192, "next_doc": 0, "match": 880991501, "create_weight_count": 1, "next_doc_count": 0, "score_count": 27, "build_scorer": 176911, "advance": 2100751859, "advance_count": 651715 } }, ...... "rewrite_time": 112744, "collector": [ { "name": "SimpleTopScoreDocCollector", "reason": "search_top_hits", "time": "24717.87966ms" } ] } ], "aggregations": [ ] },
**as we see, return time is about 24717.87966ms, It is almost **
**impossible to accept. Any idea to make fast to get return data, thank **
you very much.
Elasticsearch version: 5.0.2
JVM version (java -version): 1.8.0
OS version (uname -a if on a Unix-like system): Linux ubuntu 3.16.0-30-generic #40~14.04.1-Ubuntu SMP Thu Jan 15 17:43:14 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux