How can you set a custom analyzer on the "_content" field of a mapper attachment during index creation?


(Susan Liu) #1

I've tried doing this in my Attachment class (doesn't work):

[String(Name = "_content", TermVector = TermVectorOption.WithPositionsOffsets, Store = true, Analyzer = "autocomplete"]
public string Content { get; set; }

I want to be able to specify an edgeNGram analyzer for the "_content" field so that I can perform autocomplete in my search for all mapper attachments. How can I achieve this?

My current index settings are as follows:
{
"html5-es" : {
"aliases" : { },
"mappings" : {
"topic" : {
"properties" : {
"delete" : {
"type" : "boolean"
},
"file" : {
"type" : "attachment",
"fields" : {
"content" : {
"type" : "string",
"store" : true,
"term_vector" : "with_positions_offsets"
},
"author" : {
"type" : "string",
"store" : true,
"term_vector" : "with_positions_offsets"
},
"title" : {
"type" : "string"
},
"name" : {
"type" : "string"
},
"date" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"keywords" : {
"type" : "string"
},
"content_type" : {
"type" : "string"
},
"content_length" : {
"type" : "integer"
},
"language" : {
"type" : "string"
}
}
},
"hash_id" : {
"type" : "string"
},
"path" : {
"type" : "string"
},
"title" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1478123059878",
"analysis" : {
"filter" : {
"standard" : {
"type" : "standard"
},
"synonym" : {
"format" : "solr",
"ignore_case" : "true",
"expand" : "true",
"type" : "synonym",
"synonyms_path" : "analysis/html5-es-Syn.txt"
},
"lowercase" : {
"type" : "lowercase"
},
"stop" : {
"ignore_case" : "true",
"type" : "stop",
"stopwords_path" : "stopwords/html5-es-Stop.txt"
},
"edgeNGram" : {
"min_gram" : "1",
"side" : "front",
"type" : "edge_ngram",
"max_gram" : "25"
},
"asciifolding" : {
"type" : "asciifolding",
"preserveOriginal" : "true"
},
"shingle" : {
"min_shingle_size" : "2",
"type" : "shingle"
},
"snowball" : {
"type" : "snowball",
"language" : "English"
}
},
"char_filter" : {
"html_strip" : {
"type" : "html_strip"
}
},
"analyzer" : {
"standard" : {
"type" : "standard"
},
"autocomplete" : {
"filter" : [ "standard", "lowercase" ],
"char_filter" : [ "html_strip" ],
"type" : "custom",
"tokenizer" : "edgeNGram"
},
"search" : {
"filter" : [ "standard", "asciifolding", "edgeNGram", "lowercase", "shingle", "snowball", "stop", "synonym" ],
"char_filter" : [ "html_strip" ],
"type" : "custom",
"tokenizer" : "standard"
}
},
"tokenizer" : {
"standard" : {
"type" : "standard"
},
"edgeNGram" : {
"token_chars" : [ "letter", "digit" ],
"min_gram" : "1",
"type" : "edge_ngram",
"max_gram" : "25"
}
}
},
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "RClDLTfzSAelYNQBFu80Ww",
"version" : {
"created" : "2030399"
}
}
},
"warmers" : { }
}
}


(Susan Liu) #2

Realized that the "_content" field is actually the "FileField" attribute on the Attachment object when establishing a mapping.

[RESOLVED]


(system) #3