Content Search with " " ( Double Quotes) not displaying exact results in ES 5.2.2


(Abhisek DasGupta) #1

When words are searched with "" (Double quotes), it's actually happening partial search. My expection is string wrapped with double quotes should match exact words and highlight with the word searched.

Content - ( For reference only)
Routing is the process of selecting a path for traffic in a network. The routing process usually directs forwarding on the basis of routing-tables. Structured addresses allow a single routing table entry to represent the route to a group of devices. Using this map, each router independently determines the least-cost path ...

Text Search - "route"

Current Results - It's matching "router" , "routing" , "routes", "routing-table", "ES(config-router)" along with "route"

Expected Output match should be - "route" text should only be matched and it should not match other words.

Used "Ingest Attachment Processor Plugin" for extracting html files and indexed into elastic search.
https://www.elastic.co/guide/en/elasticsearch/plugins/5.2/ingest-attachment.html

PUT my_index/_mapping/storage
{
  "properties": {
    "content": {
      "include_in_all": true,
      "analyzer": "match_phrase",
      "term_vector": "with_positions_offsets",
      "type": "text",
      "fields": {
        "ngrammed": {
          "term_vector": "with_positions_offsets",
          "type": "text"
        }
      }
    }
  },
  "meta": {
    "properties": {
      "pageNumber": {
        "type": "long"
      },
      "src": {
        "norms": {
          "enabled": false
        },
        "analyzer": "lowercase_keyword",
        "type": "text"
      }
    }
  }
}

Search Query -

{
  "_source": [
    "meta.pageNumber"
  ],
  "query": {
    "bool": {
      "must": [
        {
          "constant_score": {
            "query": {
              "query_string": {
                "query": "\"route\""
              }
            },
            "boost": 5
          }
        },
        {
          "query_string": {
            "default_field": "content.ngrammed",
            "query": "\"route\""
          }
        }
      ],
      "filter": {
        "term": {
          "meta.src": "456"
        }
      }
    }
  },
  "highlight": {
    "require_field_match": true,
    "fields": {
      "content.ngrammed": {}
    },
    "fragment_size": 100,
    "number_of_fragments": 3
  },
  "sort": {
    "meta.pageNumber": {
      "order": "asc"
    }
  },
  "from": 0,
  "size": "15"
}

Any help or guide from Elastic Team will be really helpful to solve it.


(Adrien Grand) #2

Please share the definition of your analyzer.


(Abhisek DasGupta) #3
PUT _template/my_index_1
{
  "template": "my_index*",
  "settings": {
    "index": {
      "analysis": {
        "char_filter": {
          "whitespace_mapping": {
            "mappings": [
              "\\u00A0=>\\u0020"
            ],
            "type": "mapping"
          }
        },
        "analyzer": {
          "lowercase_keyword": {
            "type": "custom",
            "filter": [
              "lowercase"
            ],
            "tokenizer": "keyword"
          },
          "match_phrase": {
            "type": "custom",
            "char_filter": [
              "whitespace_mapping"
            ],
            "filter": [
              "lowercase"
            ],
            "tokenizer": "whitespace"
          },
          "match_phrase_search": {
            "type": "custom",
            "char_filter": [
              "whitespace_mapping"
            ],
            "filter": [
              "lowercase",
              "stop"
            ],
            "tokenizer": "whitespace"
          }
        }
      }
    }
  }
}

Index Settings -

PUT my_index
{
  "settings": {
    "index": {
      "analysis": {
        "filter": {
          "my_ngram": {
            "type": "nGram",
            "min_gram": 1,
            "max_gram": 50
          }
        },
        "analyzer": {
          "default": {
            "type": "custom",
            "char_filter": [
              "whitespace_mapping"
            ],
            "filter": [
              "lowercase",
              "asciifolding",
              "stop",
              "my_ngram",
              "kstem"
            ],
            "tokenizer": "whitespace"
          },
          "default_search": {
            "type": "custom",
            "char_filter": [
              "whitespace_mapping"
            ],
            "filter": [
              "lowercase",
              "asciifolding",
              "kstem"
            ],
            "tokenizer": "whitespace"
          }
        }
      }
    }
  }
}

(system) #4

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.