Trying to make search like `procter&gamble` and `procter & gamble` work the same way

My task is:

  • Make procter&gamble and procter & gamble produce the same results including score
  • Make it universal, not via synonyms, as it can be any other Somehow&Somewhat
  • Highlight procter&gamble or procter & gamble, not separate tokens if the phrase matches
  • I want to use simple_query_stringas I allow search operators
  • Make AT&T searchable as well

Here is my snippet. The problems that procter&gamble or procter & gamble searches produce different scores and this different documents as the result.
But the user expects the same result for procter&gamble or procter & gamble

DELETE /english_example
PUT /english_example
{
  "settings": {
    "analysis": {
      "filter": {
        "english_stop": {
          "type":       "stop",
          "stopwords":  "_english_" 
        },
        "english_keywords": {
          "type":       "keyword_marker",
          "keywords":   ["example"] 
        },
        "english_stemmer": {
          "type":       "stemmer",
          "language":   "english"
        },
        "english_possessive_stemmer": {
          "type":       "stemmer",
          "language":   "possessive_english"
        },
        "acronymns": {
          "type": "word_delimiter_graph",
          "catenate_all" : true,
          "preserve_original":true
        },
        "acronymns_": {
          "type": "word_delimiter_graph",
          "catenate_all" : true,
          "preserve_original":true
        },
        "custom_stop_words_filter": {
          "type": "stop",
          "ignore_case": true,
          "stopwords": [ "t" ]
        }
        
      },
      "analyzer": {
        "default": {
          "tokenizer":  "whitespace",
          "char_filter": [
           "ampersand_filter"
          ],
          "filter": [
            "english_possessive_stemmer",
            "lowercase",
            "acronymns",
            "flatten_graph",
            "english_stop",
            "custom_stop_words_filter",
            "english_keywords",
            "english_stemmer"
          ]
        }
      },
      "char_filter": {
        "ampersand_filter": {
          "type": "pattern_replace",
          "pattern": "(?=[^&]*)( {0,}& {0,})(?=[^&]*)",
          "replacement": "_and_"
        },
        "ampersand_filter2": {
          "type": "mapping",
          "mappings": [
            "& => _and_"
          ]
        }
      }
    }
  }
}
PUT /english_example/_bulk 
{ "index" : { "_id" : "1" } }
{ "description" : "wi-fi AT&T BB&T Procter & Gamble, some\nOther $500 games with Peter's", "contents" : "Much text with somewhere I meet Procter or Gamble" }
{ "index" : { "_id" : "2" } }
{ "description" : "Procter & Gamble", "contents" : "Much text with somewhere I meet Procter and Gamble" }
{ "index" : { "_id" : "3" } }
{ "description" : "Procter&Gamble", "contents" : "Much text with somewhere I meet Procter & Gamble" }
{ "index" : { "_id" : "4" } }
{ "description" : "Come Procter&Gamble", "contents" : "Much text with somewhere I meet Procter&Gamble" }
{ "index" : { "_id" : "5" } }
{ "description" : "Tome Procter & Gamble", "contents" : "Much text with somewhere I don't meet AT&T" }


# "query": "procter & gamble",
GET english_example/_search
{
    "query": {
      "simple_query_string": {
          "query": "procter & gamble",
          "default_operator": "or",
          "fields": [
            "description^2",
            "contents^80"
          ]
      }
    },
    "highlight": {
      "fields": {
        "description": {},
        "contents": {}
      }
    }
}


# "query": "procter&gamble",
GET english_example/_search
{
    "query": {
      "simple_query_string": {
          "query": "procter&gamble",
          "default_operator": "or",
          "fields": [
            "description^2",
            "contents^80"
          ]
      }
    },
    "highlight": {
      "fields": {
        "description": {},
        "contents": {}
      }
    }
}


# "query": "at&t",
GET english_example/_search
{
    "query": {
      "simple_query_string": {
          "query": "at&t",
          "default_operator": "or",
          "fields": [
            "description^2",
            "contents^80"
          ]
      }
    },
    "highlight": {
      "fields": {
        "description": {},
        "contents": {}
      }
    }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.