Correct mapping/search query for my data?


(Zelfapp) #1

This is my php array mapping for my index and type. Below that is the query
I'm currently using and an example of results and the issue I'm not sure
how to fix in either my mapping or in my search query.

Summary of mapping:

  • We have partial matching on the award_name field, which is an
    alphanumeric string.
  • We also boost exact matches on the award_name field .
  • On the "sku" fields we do the same partial matching and exact matches.
    • On the "search" fields we do the same partial matching and exact
      matches.

$params = [
'index' => 'ezr_v1',
'body' => [
'settings' => [
'number_of_shards' => 3,
'number_of_replicas' => 1,
'analysis' => [
'filter' => [
'name_ngrams' => [
'side' => 'front',
'max_gram' => '10',
'min_gram' => 1,
'type' => 'edgeNGram'
]
],
'analyzer' => [
'full_name' => [
'filter' => [
'standard',
'lowercase'
],
'type' => 'custom',
'tokenizer' => 'standard'
],
'partial_name' => [
'filter' => [
'standard',
'lowercase',
'name_ngrams'
],
'type' => 'custom',
'tokenizer' => 'standard'
]
]
]
],
'mappings' => [
'ezr_' [
'_all' => [
'enabled' => false
],
'properties' => [
'award_name' => [
'fields' => [
'partial' => [
'search_analyzer' => 'full_name',
'index_analyzer' => 'partial_name',
'type' => 'string'
],
'award_name' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
],
'medal' => [
'properties' => [
'anodized' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
],
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'mini' => [
'properties' => [
'anodized' => [
'properties' => [
'image' => [
'type' => 'string',
'index' => 'no'
],
'price' => [
'type' => 'string',
'index' => 'no'
],
'product_name' => [
'type' => 'string',
'index' => 'no'
],
'sku' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
],
'weight' => [
'type' => 'string',
'index' => 'no'
]
]
],
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'mini_ribbon' => [
'properties' => [
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'ribbon' => [
'properties' => [
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
],
'thin' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'search' => [
'properties' => [
'abbrev_long' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'abbrev_long' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
],
'abbrev_short' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'abbrev_short' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
],
'data_code' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'data_code' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
]
]
]
];

Bool query:

{
"query" : {
"bool" : {
"should" : [
{
"match" : {
"award_name" : {
"boost" : 4,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"award_name.partial" : "navy meritorious"
}
},
{
"match" : {
"ribbon.standard.sku" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"ribbon.standard.sku.partial" : "navy meritorious"
}
},
{
"match" : {
"search.abbrev_long" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"search.abbrev_long.partial" : "navy meritorious"
}
},
{
"match" : {
"search.data_code" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"search.data_code.partial" : "navy meritorious"
}
},
{
"match" : {
"search.abbrev_short" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"search.abbrev_short.partial" : "navy meritorious"
}
}
]
}
}
}

Partial Search Results:

{
"took": 8,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 79,
"max_score": 0.5152277,
"hits": [
{
"_index": "ezrshop.v1",
"type": "ezr",
"_id": "552d9597628062741c000e52",
"_score": 0.5152277,
"_source": {
"award_name": "Navy Cross",
"precedence": {
"army": 30,
"navy": 20,
"marine_corps": 20,
"air_force": 40,
"coast_guard": 20
},
"search": {
"data_code": "MILDECNX",
"abbrev_short": "NC",
"abbrev_long": "NAVY CROSS"
},
"ribbon": {
"standard": {
"product_name": "Navy Cross Medal Ribbon",
"sku": "512 NCMR",
"price": "1.3900",
"weight": "0.015",
"image": "navy_cross_512_ncmr.jpg"
},
"thin": {
"product_name": "Navy Cross Medal Thin Ribbon",
"sku": "138-THR101-7960100",
"price": "0.8900",
"weight": "0.015",
"image": "navy_cross_512_ncmr.jpg"
}
},
"medal": {
"standard": {
"product_name": "Navy Cross Medal",
"sku": "171 NCM",
"price": "69.9900",
"weight": "0.090",
"image": "NavyCross.jpg"
},
"anodized": {
"product_name": "Navy Cross Anodized Medal",
"sku": "6861",
"price": "83.5900",
"weight": "0.090",
"image": "NAVY_CROSS_ANO.jpg"
}
},
"mini": {
"standard": {
"product_name": "Navy Cross Miniature Medal",
"sku": "2071 NAVCROSS-MINI",
"price": "11.4900",
"weight": "0.080",
"image": "NavyCrossMedalFNL.jpg"
},
"anodized": {
"product_name": "Navy Cross Anodized Miniature Medal",
"sku": "7471",
"price": "13.6900",
"weight": "0.090",
"image": "NAVYCROSS_MAND_33XX.jpg"
}
},
"mini_ribbon": {
"standard": {
"product_name": "Navy Cross Medal Mini Ribbon",
"sku": "293-MTR",
"price": "0.8900",
"weight": "0.015",
"image": "navy_cross_512_ncmr_mini.jpg"
}
},
"mongo_id": "552d9597628062741c000e52"
}
},
{
"_index": "ezrshop.v1",
"type": "ezr",
"_id": "552d959c628062741c000e85",
"_score": 0.5143846,
"_source": {
"award_name": "Joint Meritorious Unit Award with
NAVY/AF/MC/CG Frame",
"precedence": {
"army": 470,
"navy": 470,
"marine_corps": 470,
"air_force": 470,
"coast_guard": 510
},
"search": {
"data_code": "UNTAWDJU",
"abbrev_short": "JMUA",
"abbrev_long": "JT MERITORIOUS UNIT AWD"
},
"ribbon": {
"standard": {
"product_name": "Joint Meritorious Unit Award with
NAVY/AF/MC/CG Frame",
"sku": "483 NJMAR",
"price": "2.8000",
"weight": "0.015",
"image":
"joint_meritorious_with_navy_af_mc_cg_frame_483_njmar.jpg"
},
"thin": {
"product_name": "Joint Meritorious Unit Award - Thin
Ribbon with NAVY/AF/MC/CG Frame",
"sku": "SPECTHIN-109-B",
"price": "2.4900",
"weight": "0.015",
"image":
"joint_meritorious_with_navy_af_mc_cg_frame_483_njmar.jpg"
}
},
"mongo_id": "552d959c628062741c000e85"
}
}

The second result is what I want first because the award_name, which is
being boosted has both "meritorious" and "navy" in the string. I know there
is a lot going on here with relevance scoring, but if the award_name
contains more than one parameter regardless of the order I want that award
to score higher. How do I do that in either my mapping/analyzers or in my
search query?

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/bda701cf-4f64-4cf5-ac5c-2d12458f6d2f%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


(Zelfapp) #2

I also don't want the order to matter. e.g. if I typed "meritorious navy"
vs. "navy meritorious" I should get nearly or exactly the same results.

On Friday, April 24, 2015 at 5:16:18 PM UTC-7, Zelfapp wrote:

This is my php array mapping for my index and type. Below that is the
query I'm currently using and an example of results and the issue I'm not
sure how to fix in either my mapping or in my search query.

Summary of mapping:

  • We have partial matching on the award_name field, which is an
    alphanumeric string.
  • We also boost exact matches on the award_name field .
  • On the "sku" fields we do the same partial matching and exact
    matches.
    • On the "search" fields we do the same partial matching and exact
      matches.

$params = [
'index' => 'ezr_v1',
'body' => [
'settings' => [
'number_of_shards' => 3,
'number_of_replicas' => 1,
'analysis' => [
'filter' => [
'name_ngrams' => [
'side' => 'front',
'max_gram' => '10',
'min_gram' => 1,
'type' => 'edgeNGram'
]
],
'analyzer' => [
'full_name' => [
'filter' => [
'standard',
'lowercase'
],
'type' => 'custom',
'tokenizer' => 'standard'
],
'partial_name' => [
'filter' => [
'standard',
'lowercase',
'name_ngrams'
],
'type' => 'custom',
'tokenizer' => 'standard'
]
]
]
],
'mappings' => [
'ezr_' [
'_all' => [
'enabled' => false
],
'properties' => [
'award_name' => [
'fields' => [
'partial' => [
'search_analyzer' => 'full_name',
'index_analyzer' => 'partial_name'
,
'type' => 'string'
],
'award_name' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
],
'medal' => [
'properties' => [
'anodized' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
],
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'mini' => [
'properties' => [
'anodized' => [
'properties' => [
'image' => [
'type' => 'string',
'index' => 'no'
],
'price' => [
'type' => 'string',
'index' => 'no'
],
'product_name' => [
'type' => 'string',
'index' => 'no'
],
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
],
'weight' => [
'type' => 'string',
'index' => 'no'
]
]
],
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'mini_ribbon' => [
'properties' => [
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'ribbon' => [
'properties' => [
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
],
'thin' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'search' => [
'properties' => [
'abbrev_long' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'abbrev_long' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
],
'abbrev_short' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'abbrev_short' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
],
'data_code' => [
'fields' => [
'partial' => [
'search_analyzer' =>
'full_name',
'index_analyzer' =>
'partial_name',
'type' => 'string'
],
'data_code' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
]
]
]
];

Bool query:

{
"query" : {
"bool" : {
"should" : [
{
"match" : {
"award_name" : {
"boost" : 4,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"award_name.partial" : "navy meritorious"
}
},
{
"match" : {
"ribbon.standard.sku" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"ribbon.standard.sku.partial" : "navy meritorious"
}
},
{
"match" : {
"search.abbrev_long" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"search.abbrev_long.partial" : "navy meritorious"
}
},
{
"match" : {
"search.data_code" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"search.data_code.partial" : "navy meritorious"
}
},
{
"match" : {
"search.abbrev_short" : {
"boost" : 1,
"query" : "navy meritorious"
}
}
},
{
"match" : {
"search.abbrev_short.partial" : "navy meritorious"
}
}
]
}
}
}

Partial Search Results:

{
"took": 8,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 79,
"max_score": 0.5152277,
"hits": [
{
"_index": "ezrshop.v1",
"type": "ezr",
"_id": "552d9597628062741c000e52",
"_score": 0.5152277,
"_source": {
"award_name": "Navy Cross",
"precedence": {
"army": 30,
"navy": 20,
"marine_corps": 20,
"air_force": 40,
"coast_guard": 20
},
"search": {
"data_code": "MILDECNX",
"abbrev_short": "NC",
"abbrev_long": "NAVY CROSS"
},
"ribbon": {
"standard": {
"product_name": "Navy Cross Medal Ribbon",
"sku": "512 NCMR",
"price": "1.3900",
"weight": "0.015",
"image": "navy_cross_512_ncmr.jpg"
},
"thin": {
"product_name": "Navy Cross Medal Thin Ribbon",
"sku": "138-THR101-7960100",
"price": "0.8900",
"weight": "0.015",
"image": "navy_cross_512_ncmr.jpg"
}
},
"medal": {
"standard": {
"product_name": "Navy Cross Medal",
"sku": "171 NCM",
"price": "69.9900",
"weight": "0.090",
"image": "NavyCross.jpg"
},
"anodized": {
"product_name": "Navy Cross Anodized Medal",
"sku": "6861",
"price": "83.5900",
"weight": "0.090",
"image": "NAVY_CROSS_ANO.jpg"
}
},
"mini": {
"standard": {
"product_name": "Navy Cross Miniature Medal",
"sku": "2071 NAVCROSS-MINI",
"price": "11.4900",
"weight": "0.080",
"image": "NavyCrossMedalFNL.jpg"
},
"anodized": {
"product_name": "Navy Cross Anodized Miniature Medal"
,
"sku": "7471",
"price": "13.6900",
"weight": "0.090",
"image": "NAVYCROSS_MAND_33XX.jpg"
}
},
"mini_ribbon": {
"standard": {
"product_name": "Navy Cross Medal Mini Ribbon",
"sku": "293-MTR",
"price": "0.8900",
"weight": "0.015",
"image": "navy_cross_512_ncmr_mini.jpg"
}
},
"mongo_id": "552d9597628062741c000e52"
}
},
{
"_index": "ezrshop.v1",
"type": "ezr",
"_id": "552d959c628062741c000e85",
"_score": 0.5143846,
"_source": {
"award_name": "Joint Meritorious Unit Award with
NAVY/AF/MC/CG Frame",
"precedence": {
"army": 470,
"navy": 470,
"marine_corps": 470,
"air_force": 470,
"coast_guard": 510
},
"search": {
"data_code": "UNTAWDJU",
"abbrev_short": "JMUA",
"abbrev_long": "JT MERITORIOUS UNIT AWD"
},
"ribbon": {
"standard": {
"product_name": "Joint Meritorious Unit Award with
NAVY/AF/MC/CG Frame",
"sku": "483 NJMAR",
"price": "2.8000",
"weight": "0.015",
"image":
"joint_meritorious_with_navy_af_mc_cg_frame_483_njmar.jpg"
},
"thin": {
"product_name": "Joint Meritorious Unit Award - Thin
Ribbon with NAVY/AF/MC/CG Frame",
"sku": "SPECTHIN-109-B",
"price": "2.4900",
"weight": "0.015",
"image":
"joint_meritorious_with_navy_af_mc_cg_frame_483_njmar.jpg"
}
},
"mongo_id": "552d959c628062741c000e85"
}
}

The second result is what I want first because the award_name, which is
being boosted has both "meritorious" and "navy" in the string. I know there
is a lot going on here with relevance scoring, but if the award_name
contains more than one parameter regardless of the order I want that award
to score higher. How do I do that in either my mapping/analyzers or in my
search query?

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/cf495227-1fad-4239-8886-9320837144f1%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


(Suganthi) #3

The mapping looks good to me. Please refer to the following link on the
impact of prioritizing-clauses
http://www.elastic.co/guide/en/elasticsearch/guide/current/multi-query-strings.html#prioritising-clauses
on bool query.

So looks like you want to score the award_name matches higher. so you can
keep that in seperate should clause. And depends on how the other field
matches or partial matches should contribute to the total score of
relevance you can group them and keep it in the same level or sub level of
the bool query.

"query" : {
"bool" : {
"should" : [
{
"match" : {
"award_name" : {
"boost" : 4,
"query" : "navy meritorious"
}
}
},
{ "bool": {
"should": [
{ "match": { "ribbon.standard.sku": "navy meritorious" }},
{ "match": { "search.abbrev_long": "navy meritorious"}}
...
]
}}

Also the order of the terms in the query doesn't matter, because the search
analyzer splits "meritorious navy" to two tokens as seen by the below query
GET /<index_name>/_analyze?analyzer=full_name
{
"navvy meritorious"
}
to
{
"token": "navvy",
"start_offset": 5,
"end_offset": 10,
"type": "",
"position": 1
},
{
"token": "meritorious",
"start_offset": 11,
"end_offset": 22,
"type": "",
"position": 2
}

Addressing your next question on boosting score if more query terms appear
on the award_name field, that's due to bool match query being field
centric rather than terms centric.
please refer to the link
http://www.elastic.co/guide/en/elasticsearch/guide/current/_cross_fields_entity_search.html#_a_naive_approach.
Basically the bool query you have here is same as multi_match query with
type set to "most_fields". As it's being field-centric query that faces
three common issues (as listed down in
http://www.elastic.co/guide/en/elasticsearch/guide/current/field-centric.html)
which is exactly the problem statement that you are trying to resolve.

You may verify the same by enclosing your current query with _validate
option
GET ///_validate/query?explain{
...
}

So you may want to try out term-centric query (i.e) cross_fields query
(http://www.elastic.co/guide/en/elasticsearch/guide/current/_cross_fields_queries.html).
Also you can take advantage of per-field boosting of this query to boost
award_name matches.

"query": {
    "multi_match": {
        "query":       "navy meritorious",
        "type":        "cross_fields",
        "fields":      [ "award_name^2", "ribbon.standard.sku", 

"search.abbrev_long", "search.data_code", "search.abbrev_short" ..]
}
}

So you may have your final query in a single multi_match
GET ///_validate/query?explain
{
"query": {
"multi_match": {
"query": "navy meritorious",
"type": "cross_fields",
"fields": [
"award_name^4",
"ribbon.standard.sku^2",
"search.abbrev_long^2",
"search.data_code^2",
"search.abbrev_short^2",
"award_name.partial^2",
"ribbon.standard.sku.partial",
"search.abbrev_long.partial",
"search.data_code.partial",
"search.abbrev_short.partial"
]
}
}
}

or you can combine multiple multi_match (cross_fields) queries with bool
again based on your best needs.
GET ///_validate/query?explain
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "navy meritorious",
"type": "cross_fields",
"fields": [
"award_name^2",
"ribbon.standard.sku",
"search.abbrev_long",
"search.data_code",
"search.abbrev_short"
],
"boost": 2
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "navy meritorious",
"type": "cross_fields",
"fields": [
"award_name.partial^2",
"ribbon.standard.sku.partial",
"search.abbrev_long.partial",
"search.data_code.partial",
"search.abbrev_short.partial"
]
}
}
]
}
}
]
}
}
}

Hope that helps.

On Friday, April 24, 2015 at 5:18:12 PM UTC-7, Zelfapp wrote:

I also don't want the order to matter. e.g. if I typed "meritorious navy"
vs. "navy meritorious" I should get nearly or exactly the same results.

On Friday, April 24, 2015 at 5:16:18 PM UTC-7, Zelfapp wrote:

This is my php array mapping for my index and type. Below that is the
query I'm currently using and an example of results and the issue I'm not
sure how to fix in either my mapping or in my search query.

Summary of mapping:

  • We have partial matching on the award_name field, which is an
    alphanumeric string.
  • We also boost exact matches on the award_name field .
  • On the "sku" fields we do the same partial matching and exact
    matches.
    • On the "search" fields we do the same partial matching and exact
      matches.

$params = [
'index' => 'ezr_v1',
'body' => [
'settings' => [
'number_of_shards' => 3,
'number_of_replicas' => 1,
'analysis' => [
'filter' => [
'name_ngrams' => [
'side' => 'front',
'max_gram' => '10',
'min_gram' => 1,
'type' => 'edgeNGram'
]
],
'analyzer' => [
'full_name' => [
'filter' => [
'standard',
'lowercase'
],
'type' => 'custom',
'tokenizer' => 'standard'
],
'partial_name' => [
'filter' => [
'standard',
'lowercase',
'name_ngrams'
],
'type' => 'custom',
'tokenizer' => 'standard'
]
]
]
],
'mappings' => [
'ezr_' [
'_all' => [
'enabled' => false
],
'properties' => [
'award_name' => [
'fields' => [
'partial' => [
'search_analyzer' => 'full_name',
'index_analyzer' => 'partial_name'
,
'type' => 'string'
],
'award_name' => [
'type' => 'string',
'analyzer' => 'full_name'
]
],
'type' => 'multi_field'
],
'medal' => [
'properties' => [
'anodized' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
],
'standard' => [
'properties' => [
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type' => 'multi_field'
]
]
]
]
],
'mini' => [
'properties' => [
'anodized' => [
'properties' => [
'image' => [
'type' => 'string',
'index' => 'no'
],
'price' => [
'type' => 'string',
'index' => 'no'
],
'product_name' => [
'type' => 'string',
'index' => 'no'
],
'sku' => [
'fields' => [
'partial' => [
'search_analyzer'
=> 'full_name',
'index_analyzer'
=> 'partial_name',
'type' => 'string'
],
'sku' => [
'type' => 'string',
'analyzer' =>
'full_name'
]
],
'type'

...

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/f16962f0-a3fc-4da9-afb9-b00d2b88920f%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


(Zelfapp) #4

First off, thanks for the knowledgeable reply. cross_fields definitely
makes a difference in writing and the prioritizing clauses also makes
sense. Great recommendations.

On Wednesday, April 29, 2015 at 2:48:26 PM UTC-7, Suganthi wrote:

The mapping looks good to me. Please refer to the following link on the
impact of prioritizing-clauses
http://www.elastic.co/guide/en/elasticsearch/guide/current/multi-query-strings.html#prioritising-clauses
on bool query.

So looks like you want to score the award_name matches higher. so you can
keep that in seperate should clause. And depends on how the other field
matches or partial matches should contribute to the total score of
relevance you can group them and keep it in the same level or sub level of
the bool query.

"query" : {
"bool" : {
"should" : [
{
"match" : {
"award_name" : {
"boost" : 4,
"query" : "navy meritorious"
}
}
},
{ "bool": {
"should": [
{ "match": { "ribbon.standard.sku": "navy meritorious" }},
{ "match": { "search.abbrev_long": "navy meritorious"}}
...
]
}}

Also the order of the terms in the query doesn't matter, because the
search analyzer splits "meritorious navy" to two tokens as seen by the
below query
GET /<index_name>/_analyze?analyzer=full_name
{
"navvy meritorious"
}
to
{
"token": "navvy",
"start_offset": 5,
"end_offset": 10,
"type": "",
"position": 1
},
{
"token": "meritorious",
"start_offset": 11,
"end_offset": 22,
"type": "",
"position": 2
}

Addressing your next question on boosting score if more query terms appear
on the award_name field, that's due to bool match query being field
centric rather than terms centric.
please refer to the link
http://www.elastic.co/guide/en/elasticsearch/guide/current/_cross_fields_entity_search.html#_a_naive_approach.
Basically the bool query you have here is same as multi_match query with
type set to "most_fields". As it's being field-centric query that faces
three common issues (as listed down in
http://www.elastic.co/guide/en/elasticsearch/guide/current/field-centric.html)
which is exactly the problem statement that you are trying to resolve.

You may verify the same by enclosing your current query with _validate
option
GET ///_validate/query?explain{
...
}

So you may want to try out term-centric query (i.e) cross_fields query (
http://www.elastic.co/guide/en/elasticsearch/guide/current/_cross_fields_queries.html).
Also you can take advantage of per-field boosting of this query to boost
award_name matches.

"query": {
    "multi_match": {
        "query":       "navy meritorious",
        "type":        "cross_fields",
        "fields":      [ "award_name^2", "ribbon.standard.sku", 

"search.abbrev_long", "search.data_code", "search.abbrev_short" ..]
}
}

So you may have your final query in a single multi_match
GET ///_validate/query?explain
{
"query": {
"multi_match": {
"query": "navy meritorious",
"type": "cross_fields",
"fields": [
"award_name^4",
"ribbon.standard.sku^2",
"search.abbrev_long^2",
"search.data_code^2",
"search.abbrev_short^2",
"award_name.partial^2",
"ribbon.standard.sku.partial",
"search.abbrev_long.partial",
"search.data_code.partial",
"search.abbrev_short.partial"
]
}
}
}

or you can combine multiple multi_match (cross_fields) queries with bool
again based on your best needs.
GET ///_validate/query?explain
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "navy meritorious",
"type": "cross_fields",
"fields": [
"award_name^2",
"ribbon.standard.sku",
"search.abbrev_long",
"search.data_code",
"search.abbrev_short"
],
"boost": 2
}
},
{
"bool": {
"should": [
{
"multi_match": {
"query": "navy meritorious",
"type": "cross_fields",
"fields": [
"award_name.partial^2",
"ribbon.standard.sku.partial",
"search.abbrev_long.partial",
"search.data_code.partial",
"search.abbrev_short.partial"
]
}
}
]
}
}
]
}
}
}

Hope that helps.

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/099da52f-3df7-499b-9ae8-ecf63bb80a42%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


(system) #5