Term and Has_Child Query Optimization
I'm doing large queries ( 20 terms and 20 has_child queries) and am looking
for ways to optimize the response time which is currently at 8 min on 4
million docs. A pure term query is just a few seconds. At a high level the
has_child query is for collections that users create. Since they change
they are in a child index. The query is meant to capture things the user
"likes" in the form of terms and other users collections so I can't require
any one item and I want to highly rank documents that have allot of liked
terms and collections. The question is are there alternative to the method
I've chosen that is faster? I've included an example.
Numbers
Documents: 4 million
Collection Items: 18 million
on two AWS m3.xlarge with ten shards
Small Example
Mapping
curl -XPUT 'http://localhost:9200/collection-test?pretty=true' -d '{
"settings" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
},
"mappings" : {
"document": {
"properties": {
"bodyText": { "type": "string" }
}
},
"collection_item": {
"_parent": { "type": "document" },
"_all" : {"enabled" : false},
"properties": {
"collection_id": { "type": "integer", "index":
"not_analyzed" }
}
}
}
}'
Documents
curl -XPUT 'http://localhost:9200/collection-test/document/1' -d '{
"bodyText" : "Creativity is inteligence having fun - Albert Einstein"
}'
curl -XPUT 'http://localhost:9200/collection-test/document/2' -d '{
"bodyText" : "Anything one man can imagine, other men can make real. -
Jules Verne"
}'
curl -XPUT 'http://localhost:9200/collection-test/document/3' -d '{
"bodyText" : "Man will become better when you show him what he is like.
- Anton Chekhov"
}'
Collections
curl -XPOST localhost:9200/collection-test/collection_item/1?parent=1 -d '{
"collection_id" : "1" }'
curl -XPOST localhost:9200/collection-test/collection_item/2?parent=1 -d '{
"collection_id" : "2" }'
curl -XPOST localhost:9200/collection-test/collection_item/4?parent=2 -d '{
"collection_id" : "2" }'
Multiple Term and Multiple Collection Query
curl -XPOST localhost:9200/collection-test/document/_search?pretty=true -d
'{
"query" : {
"bool" : {
"should" : [
{
"term" : { "bodyText" : { "value" : "anything", "boost" :
1.0 } }
},
{
"term" : { "bodyText" : { "value" : "man", "boost" : 1.0 }}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "1" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "2" }
}
}
}
],
"minimum_number_should_match" : 1
}
}
}'
Delete Index
curl -XDELETE 'http://localhost:9200/collection-test/'
Large Query Example
curl -XPOST localhost:9200/collection-test /document/_search?pretty=true -d
'{
"fields" : ["_id", "title","summary"],
"query" : {
"bool" : {
"should" : [
{
"query_string" : { "default_field" : "bodyText", "query" :
""harry potter"^1.0" }
},
{
"query_string" : { "default_field" : "bodyText", "query" :
""j.k. rowling"^0.4083824" }
},
{
"query_string" : { "default_field" : "bodyText", "query" :
""final movie"^0.40137964" }
},
{
"query_string" : { "default_field" : "bodyText", "query" :
""fantasy series"^0.3629825" }
},
{
"query_string" : { "default_field" : "bodyText", "query" :
""box office records"^0.35038263" }
},
{
"query_string" : { "default_field" : "bodyText", "query" :
""breaking dawn"^0.11963159" }
},
{
"query_string" : { "default_field" : "bodyText", "query" :
""final installment"^0.11438772" }
},
{
"query_string" : { "default_field" : "bodyText", "query" :
""film series"^0.35038263" }
},
{
"term" : { "bodyText" : { "value" : "potter", "boost" :
0.805837 } }
},
{
"term" : { "bodyText" : { "value" : "deathly", "boost" :
0.46554363 }
},
{
"term" : { "bodyText" : { "value" : "hallows", "boost" :
0.46430007 }}
},
{
"term" : { "bodyText" : { "value" : "rowling", "boost" :
0.3994508 } }
},
{
"term" : { "bodyText" : { "value" : "j.k.", "boost" :
0.39741242 }}
},
{
"term" : { "bodyText" : { "value" : "pottermore", "boost" :
0.36284378 } }
},
{
"term" : { "bodyText" : { "value" : "dumbledore", "boost" :
0.36096284 }}
},
{
"term" : { "bodyText" : { "value" : "muggles", "boost" :
0.3579579 } }
},
{
"term" : { "bodyText" : { "value" : "harry", "boost" :
0.17482029 }}
},
{
"term" : { "bodyText" : { "value" : "grint", "boost" :
0.12138573 } }
},
{
"term" : { "bodyText" : { "value" : "hogwarts", "boost" :
0.119226046 }}
},
{
"term" : { "bodyText" : { "value" : "blackly", "boost" :
0.11385573 } }
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "445" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "529" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "93" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "480" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "341" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "99" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "563" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "34" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "347" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "355" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "571" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "95" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "96" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "108" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "435" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "474" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "550" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "326" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "514" }
}
}
},
{
"has_child" : {
"type" : "collection_item",
"boost": "1.0",
"query" : {
"term" : { "collection_id" : "490" }
}
}
}
],
"minimum_number_should_match" : 1
}
}
}'
--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.