Elasticsearch dealing with overspecific terms


(Athar Khan) #1

I want to be able to deal with over-specific search terms. So that if a user search for "taxation" I want to be able to include results for tax as well but only if it presents in title field. Below is my Elasticsearch configs. I'm using version 1.5.

Settings

{  


"content_pages":{  
      "settings":{  
         "index":{  
            "creation_date":"1473848573964",
            "analysis":{  
               "analyzer":{  
                  "string_analyzer":{  
                     "filter":[  
                        "standard",
                        "lowercase",
                        "stop",
                        "asciifolding"
                     ],
                     "char_filter":[  
                        "html_strip"
                     ],
                     "type":"custom",
                     "tokenizer":"standard"
                  }
               }
            },
            "number_of_shards":"2",
            "number_of_replicas":"0",

         }
      }
   }
}

Mappings

"mappings":{  
     "content_page_type":{  
        "_all":{  
           "auto_boost":true
        },
        "properties":{  
           "author":{  
              "type":"integer"
           },
           "body:value":{  
              "type":"string",
              "boost":13.0,
              "analyzer":"string_analyzer"
           },
           "id":{  
              "type":"integer",
              "include_in_all":false
           },
           "title":{  
              "type":"string",
              "boost":21.0,
              "analyzer":"string_analyzer"
           },
           "type":{  
              "type":"string",
              "index":"not_analyzed",
              "analyzer":"string_analyzer"
           }
        }
     }
  }

Query

{  
   "from":0,
   "size":"10",       
   "query":{  
      "bool":{  
         "must":[  
            {  
               "multi_match":{  
                  "query":"taxation",
                  "fields":[  
                     "body:value^13.0",
                     "content_page_tab_data^13.0",
                     "field_body:value^13.0",
                     "field_tabs_page_body:value^13.0",
                     "title^21.0"
                  ]
               }
            }
         ],
         "should":[  
            {  
               "query_string":{  
                  "query":"(taxation)",
                  "fields":[  
                     "body:value^13.0",
                     "content_page_tab_data^13.0",
                     "field_body:value^13.0",
                     "field_tabs_page_body:value^13.0",
                     "title^21.0"
                  ]
               }
            },
            {
               "fuzzy_like_this" : {
                  "fields" : ["title"],
                  "like_text" : "taxation",
                  "fuzziness": "AUTO"
                }
            }
         ]
      }
   }       
}

Above query does not return results with title that contains "tax" but "taxation" only. I also don't want to include irrelevant results such as "relation" or "action".


(Nik Everett) #2

Usually the way to hand this is with stemming. There are a couple of
English stammers included in Elasticsearch and you can customize them to
some degree with stemmer overrides. Play around with that and the analysis
API to see if you can get what you want. It isn't magic, just heuristics.


(Athar Khan) #3

Thanks for pointing me towards the right direction.


(system) #4