How to configure fields for wildcard search?


#1

Here's a config file. The fields that are listed as properties are the fields that I would like to be able to do a wildcard search on. So for instance, if I type *foo* into our search box, I would get all results that have properties matching the substring "foo".

"version" : {
    "number" : "2.3.5",
    "build_hash" : "90f439ff60a3c0f497f91663701e64ccd01edbb4",
    "build_timestamp" : "2016-07-27T10:36:52Z",
    "build_snapshot" : false,
    "lucene_version" : "5.5.0"
  },

Here is the config file excerpt:

  <extension target="org.nuxeo.elasticsearch.ElasticSearchComponent"
    point="elasticSearchIndex">
    <elasticSearchIndex name="${elasticsearch.indexName}" type="doc" repository="default">
      <fetchFromSource>
        <include>ecm:*</include>
        <include>dc:*</include>
        <exclude>ecm:binarytext</exclude>
      </fetchFromSource>

      <settings>
{
   "analysis" : {
      "filter" : {
         "truncate_filter" : {
            "length" : 256,
            "type" : "truncate"
         },
         "word_delimiter_filter" : {
           "type" : "word_delimiter",
           "preserve_original" : true
         },
         "asciifolding_filter" : {
            "type" : "asciifolding",
            "preserve_original" : true
         },
         "en_stem_filter" : {
            "name" : "minimal_english",
            "type" : "stemmer"
         },
         "en_stop_filter" : {
            "stopwords" : [
               "_english_"
            ],
            "type" : "stop"
         },
         "fr_elision_filter" : {
            "articles" : [
               "c",
               "l",
               "m",
               "t",
               "qu",
               "n",
               "s",
               "j"
            ],
            "type" : "elision"
         },
         "fr_stem_filter" : {
            "name" : "minimal_french",
            "type" : "stemmer"
         },
         "fr_stop_filter" : {
            "stopwords" : [
               "_french_"
            ],
            "type" : "stop"
         },
         "mynGram": {
          "type": "nGram",
          "min_gram": 4,
          "max_gram": 4
        }
      },
      "tokenizer" : {
         "path_tokenizer" : {
            "delimiter" : "/",
            "type" : "path_hierarchy"
         }
      },
      "analyzer" : {
         "en_analyzer" : {
            "alias" : "fulltext",
            "char_filter":  [ "html_strip"],
            "filter" : [
               "lowercase",
               "mynGram"
            ],
            "type" : "custom",
            "tokenizer" : "standard"
         },
         "fr_analyzer" : {
            "char_filter":  [ "html_strip"],
            "filter" : [
               "word_delimiter_filter",
               "lowercase",
               "fr_stop_filter",
               "fr_stem_filter",
               "asciifolding_filter",
               "fr_elision_filter"
            ],
            "type" : "custom",
            "tokenizer" : "standard"
         },
         "path_analyzer" : {
            "type" : "custom",
            "tokenizer" : "path_tokenizer"
         },
         "lowercase_analyzer" : {
            "type" : "custom",
            "filter" : [ "truncate_filter", "lowercase", "asciifolding" ],
            "tokenizer" : "keyword"
         },
         "default" : {
            "type" : "custom",
            "filter" : [ "truncate_filter" ],
            "tokenizer" : "keyword"
         }
      }
   }
}
      </settings>
      <mapping>
{
   "_all" : {
      "analyzer" : "fulltext"
   },
   "dynamic_templates": [ {
      "no_thumbnail_template": {
         "path_match": "thumb:thumbnail.*",
         "mapping": {
             "index": "no",
             "include_in_all": false
            }
         }
      }, {
      "no_picture_template": {
         "path_match": "picture:views.*",
         "mapping": {
            "index": "no",
            "include_in_all": false
          }
       }
   } ],
   "properties" : {
      "dc:title" : {
         "type" : "multi_field",
         "fields" : {
           "dc:title" : {
             "type" : "string"
           },
           "fulltext" : {
             "boost": 2,
             "type": "string",
             "analyzer" : "fulltext"
          }
        }
      },
      "dc:description" : {
         "type" : "multi_field",
         "fields" : {
           "dc:description" : {
             "index" : "no",
             "include_in_all" : true,
             "type" : "string"
           },
           "fulltext" : {
             "boost": 1.5,
             "type": "string",
             "analyzer" : "fulltext"
          }
        }
      },
      "note:note" : {
         "type" : "multi_field",
         "fields" : {
           "note:note" : {
             "index" : "no",
             "include_in_all" : true,
             "type" : "string"
           },
           "fulltext" : {
             "type": "string",
             "analyzer" : "fulltext"
          }
        }
      },
      "ecm:binarytext" : {
         "type" : "string",
         "index" : "no",
         "include_in_all" : true
      },
      "ecm:path" : {
         "type" : "multi_field",
         "fields" : {
            "children" : {
               "analyzer" : "path_analyzer",
               "search_analyzer" : "keyword",
               "type" : "string"
            },
            "ecm:path" : {
               "index" : "not_analyzed",
               "type" : "string"
            }
         }
      },
      "ecm:pos": {
         "type": "integer"
      },
      "dc:created": {
         "format": "dateOptionalTime",
         "type": "date"
      },
      "dc:expired": {
         "format": "dateOptionalTime",
         "type": "date"
      },
      "dc:modified": {
         "format": "dateOptionalTime",
         "type": "date"
      },
      "common:icon": {
         "type": "string",
         "index" : "no",
         "include_in_all" : false
      }
	}
}
      </mapping>

    </elasticSearchIndex>
  </extension>

</component>
</#escape>

(David Pilato) #2

I would probably change the mapping to use as well a ngram based analyzer.


(system) #3

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.