We are a very large high tech organization, and we try to test out ES for one of our products. We are using ES v1.6, and elasticsearch-mapper-attachments v2.60. Using Highlight search we experienced very high CPU load initially and followed by very high HEAP. But if without Highlight it will be some what better. But not in 10s millisec level. I've updated one file 2MB in size for testing.
What I'm using?
I'm using AWS EC2 t2.medium, and allocated 2GB memory (mlockall=true) with OpenJDK v1.7.0_79. It's a 3 nodes cluster using elasticsearch-cloud-aws v2.6.0.
What the settings?
- elasticsearch.config
cluster.name: my_cluster
plugin.mandatory: mapper-attachments
bootstrap.mlockall: true
index.mapper.dynamic: false
action.destructive_requires_name: true
action.disable_shutdown: true
cloud.aws.region: us-east
discovery.zen.ping.multicast.enabled: false
discovery.zen.minimum_master_nodes: 2
discovery.type: ec2
discovery.ec2.groups: dev-es-sg
discovery.ec2.tag.Role: role-elasticsearch
gateway.recover_after_nodes: 2
gateway.recover_after_time: 5m
gateway.expected_nodes: 3
action.auto_create_index: false
script.inline: on
document mapping
{
"my_index": {
"mappings": {
"document": {
"dynamic": "strict",
"index_analyzer": "index_analyzer",
"search_analyzer": "search_analyzer",
"_id": {
"path": "id"
},
"properties": {
"content": {
"type": "attachment",
"path": "full",
"fields": {
"content": {
"type": "string",
"store": true,
"term_vector": "with_positions_offsets"
},
"author": {
"type": "string"
},
"title": {
"type": "string"
},
"name": {
"type": "string"
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"keywords": {
"type": "string"
},
"content_type": {
"type": "string"
},
"content_length": {
"type": "integer"
},
"language": {
"type": "string"
}
}
},
"description": {
"type": "string"
}
"fileName": {
"type": "string"
},
"id": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"tenantId": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
}
}
}
}
}
index setting
{
"my_index": {
"settings": {
"index": {
"creation_date": "1436454677044",
"uuid": "C8bO_Ef1QIC5L9yxIFz_Hw",
"analysis": {
"analyzer": {
"search_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"kstem"
],
"tokenizer": "standard"
},
"index_analyzer": {
"type": "custom",
"char_filter": [
"html_strip"
],
"filter": [
"english_possessive_stemmer",
"asciifolding",
"word_delimiter",
"lowercase",
"english_stop",
"kstem",
"edgeNGram"
],
"tokenizer": "standard"
}
},
"filter": {
"english_stop": {
"type": "stop",
"stopwords": "english"
},
"edgeNGram": {
"max_gram": "15",
"min_gram": "2",
"type": "edgeNGram",
"side": "front"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
},
"word_delimiter": {
"preserve_original": "true",
"catenate_words": "true",
"type": "word_delimiter",
"catenate_numbers": "true"
}
}
},
"number_of_replicas": "1",
"number_of_shards": "3",
"version": {
"created": "1060099"
}
}
}
}
}
nodes stat
{
"cluster_name": "my_cluster",
"nodes": {
"VQR6nTK4R6yIBZwsLY_Sdw": {
"name": "Colleen Wing",
"transport_address": "inet[/10.178.115.30:9300]",
"host": "ip-10-178-115-30",
"ip": "10.178.115.30",
"version": "1.6.0",
"build": "cdd3ac4",
"http_address": "inet[/10.178.115.30:9200]",
"process": {
"refresh_interval_in_millis": 1000,
"id": 4690,
"max_file_descriptors": 65535,
"mlockall": true
}
},
"FfjFnmLFS86xrR3HuaKYEw": {
"name": "Salvo",
"transport_address": "inet[/10.178.115.52:9300]",
"host": "ip-10-178-115-52",
"ip": "10.178.115.52",
"version": "1.6.0",
"build": "cdd3ac4",
"http_address": "inet[/10.178.115.52:9200]",
"process": {
"refresh_interval_in_millis": 1000,
"id": 6682,
"max_file_descriptors": 65535,
"mlockall": true
}
},
"mxKM02SeQgywR5V140tF3A": {
"name": "Man-Eater",
"transport_address": "inet[/10.178.115.75:9300]",
"host": "ip-10-178-115-75",
"ip": "10.178.115.75",
"version": "1.6.0",
"build": "cdd3ac4",
"http_address": "inet[/10.178.115.75:9200]",
"process": {
"refresh_interval_in_millis": 1000,
"id": 6578,
"max_file_descriptors": 65535,
"mlockall": true
}
}
}
}
query
GET /documents/document/_search
{
"query": {
"filtered": {
"query": {
"match": {
"_all": "configu"
}
},
"filter": {
"term": {
"tenantId": "123"
}
}
}
},
"highlight": {
"fields": {
"*": {}
}
}
}