*we deploy elasticsearch 5. version using ngram char split in our 5 **
servers which one of them is master node, and others are data nodes.
settings as follows:
{
"trimps2": {
"settings": {
"index": {
"number_of_shards": "5",
"provided_name": "my_ngram_resource",
"creation_date": "1496286488490",
"analysis": {
"analyzer": {
"charSplit": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"token_chars": [
"letter",
"digit",
"punctuation"
],
"min_gram": "1",
"type": "nGram",
"max_gram": "1"
}
}
},
"number_of_replicas": "0",
"uuid": "QAs4jqBfTt2xiO0OMFOXzQ",
"version": {
"created": "5030099"
}
}
}
}
}
mapping as follows:
{
"trimps2": {
"mappings": {
"tb": {
"properties": {
"address": {
"type": "text",
"store": true,
"analyzer": "charSplit"
},
"id_card": {
"type": "text",
"store": true,
"analyzer": "charSplit"
},
"mobile_phone": {
"type": "text",
"store": true,
"analyzer": "charSplit"
},
"phone": {
"type": "text",
"store": true,
"analyzer": "charSplit"
},
"post_code": {
"type": "text",
"store": true,
"analyzer": "charSplit"
},
"realname": {
"type": "text",
"store": true,
"analyzer": "charSplit"
},
"username": {
"type": "text",
"store": true,
"analyzer": "charSplit"
}
}
}
}
}
}
we find it return data very slowly when we use query phrase as follows:
POST trimps2/tb/_search
{
"query": {
"multi_match": {
"query": "138141",
"type": "phrase",
"slop": 0,
"fields": [
"username",
"realname",
"phone",
"mobile_phone",
"id_card",
"address",
"post_code"
],
"analyzer": "charSplit",
"max_expansions": 1
}
},
"profile": true,
"from": 100
}
then, the return data as follows:
"profile": {
"shards": [
{
"id": "[TZLIzKdMShijRmqi5q9ZMw][trimps2][1]",
"searches": [
{
"query": [
{
"type": "BooleanQuery",
"description": "+(address:"1 3 8 1 4 1" | id_card:"1 3 8 1 4 1" | mobile_phone:"1 3 8 1 4 1" | phone:"1 3 8 1 4 1" | post_code:"1 3 8 1 4 1" | realname:"1 3 8 1 4 1" | username:"1 3 8 1 4 1") #(ConstantScore(_type:tb))^0.0",
"time": "83253.58251ms",
"breakdown": {
"score": 24716112935,
"build_scorer_count": 1,
"match_count": 31173147,
"create_weight": 84788168,
"next_doc": 35685629647,
"match": 22691767834,
"create_weight_count": 1,
"next_doc_count": 31173148,
"score_count": 11170,
"build_scorer": 12926458,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "DisjunctionMaxQuery",
"description": "(address:"1 3 8 1 4 1" | id_card:"1 3 8 1 4 1" | mobile_phone:"1 3 8 1 4 1" | phone:"1 3 8 1 4 1" | post_code:"1 3 8 1 4 1" | realname:"1 3 8 1 4 1" | username:"1 3 8 1 4 1")",
"time": "69752.35071ms",
"breakdown": {
"score": 24713740053,
"build_scorer_count": 1,
"match_count": 31173147,
"create_weight": 84741719,
"next_doc": 0,
"match": 20160625790,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 11170,
"build_scorer": 332724,
"advance": 24730552960,
"advance_count": 31173148
},
"children": [
{
"type": "PhraseQuery",
"description": "address:"1 3 8 1 4 1"",
"time": "4293.176204ms",
"breakdown": {
"score": 1278093283,
"build_scorer_count": 1,
"match_count": 651714,
"create_weight": 31859192,
"next_doc": 0,
"match": 880991501,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 27,
"build_scorer": 176911,
"advance": 2100751859,
"advance_count": 651715
}
},
......
"rewrite_time": 112744,
"collector": [
{
"name": "SimpleTopScoreDocCollector",
"reason": "search_top_hits",
"time": "24717.87966ms"
}
]
}
],
"aggregations": [
]
},
**as we see, return time is about 24717.87966ms, It is almost **
**impossible to accept. Any idea to make fast to get return data, thank **
you very much.
Elasticsearch version: 5.0.2
JVM version (java -version): 1.8.0
OS version (uname -a if on a Unix-like system): Linux ubuntu 3.16.0-30-generic #40~14.04.1-Ubuntu SMP Thu Jan 15 17:43:14 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux