Illegal_argument_exception - setup of analyzer


(Michael Foldbjerg) #1

Hi,

We'we been using the following analyzer setup for ages:

"analysis": {
"char_filter": {
"danish_char_mapping": {
"type": "mapping",
"mappings": [
"\u00C0 => A",
"\u00C1 => A",
"\u00C2 => A",
"\u00C3 => A",
"\u00C4 => A",
"\u00C5 => AA",
"\u00C6 => AE",
"\u00C7 => C",
"\u00C8 => E",
"\u00C9 => E",
"\u00CA => E",
"\u00CB => E",
"\u00CC => I",
"\u00CD => I",
"\u00CE => I",
"\u00CF => I",
"\u0132 => IJ",
"\u00D0 => D",
"\u00D1 => N",
"\u00D2 => O",
"\u00D3 => O",
"\u00D4 => O",
"\u00D5 => O",
"\u00D6 => O",
"\u00D8 => OE",
"\u0152 => OE",
"\u00DE => TH",
"\u00D9 => U",
"\u00DA => U",
"\u00DB => U",
"\u00DC => U",
"\u00DD => Y",
"\u0178 => Y",
"\u00E0 => a",
"\u00E1 => a",
"\u00E2 => a",
"\u00E3 => a",
"\u00E4 => a",
"\u00E5 => aa",
"\u00E6 => ae",
"\u00E7 => c",
"\u00E8 => e",
"\u00E9 => e",
"\u00EA => e",
"\u00EB => e",
"\u00EC => i",
"\u00ED => i",
"\u00EE => i",
"\u00EF => i",
"\u0133 => ij",
"\u00F0 => d",
"\u00F1 => n",
"\u00F2 => o",
"\u00F3 => o",
"\u00F4 => o",
"\u00F5 => o",
"\u00F6 => o",
"\u00F8 => oe",
"\u0153 => oe",
"\u00DF => ss",
"\u00FE => th",
"\u00F9 => u",
"\u00FA => u",
"\u00FB => u",
"\u00FC => u",
"\u00FD => y",
"\u00FF => y",
"\uFB00 => ff",
"\uFB01 => fi",
"\uFB02 => fl",
"\uFB03 => ffi",
"\uFB04 => ffl",
"\uFB05 => ft",
"\uFB06 => st"
]
}
},
"analyzer": {
"default": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"danish_synonyms_pre_token",
"preserveSelskabsForm",
"stop_characters",
"danish_synonyms_post_token"
],
"char_filter": ["danish_char_mapping"]
},
"danish_ducet": {
"tokenizer": "keyword",
"filter": ["dansk_sortering"]
},
"suggester": {
"tokenizer": "standard",
"filter": ["stop_characters", "lowercase", "shingles"]
}
},
"filter": {
"preserveForwardSlash": {
"type": "word_delimiter",
"type_table": ["/ => ALPHA"]
},
"shingles": {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 3
},
"stop_characters": {
"type": "stop",
"stopwords": [
"-",
"!",
"#",
"*",
"$",
"?",
"+",
"%",
"=",
"£",
":",
".",
",",
"",
";",
"og"
]
},
"danish_synonyms_post_token": {
"type": "synonym",
"synonyms": [
"1,en",
"2,to",
"gl,gammel",
"ll,lille",
"ndr,nordre",
"nr,noerre",
"sdr,soender",
"skt,sankt",
"st,store",
"v,vester",
"oe,oester",
"dr,doktor",
"hf,haveforening",
"a,alle",
"boul,boulevard",
"g,gade",
"kr,kirke",
"kvt,kvarter",
"pas,passage",
"pl,plads",
"str,straede",
"t,torv",
"v,vej",
"vg,vaenge",
"borgm,borgmester",
"prs,prinsesse",
"dronn,dronning",
"kprs,kronprinsesse",
"chr,christian",
"edv,edvard",
"fr,frederik",
"johs,johannes",
"vilh,vilhelm",
"sverrigsgade,sverigesgade",
"&,og",
"i/s,interessentskab",
"a/s,aktieselskab",
"k/s,kommanditselskab"
]
},
"danish_synonyms_pre_token": {
"synonyms": [
"a7s,a/s",
"i7s,i/s",
"k7s,k/s"
],
"type": "synonym"
},
"dansk_sortering": {
"type": "icu_collation",
"language": "da",
"country": "DK"
},
"preserveSelskabsForm": {
"protected_words": [
"a7s",
"a/s",
"i7s",
"i/s",
"k7s",
"k/s"
],
"type": "word_delimiter",
"type_table": [
"
=> ALPHANUM"
],
"split_on_numerics": false
}
}
}

And it has worked under the 1.x series, 2.x series and 5.x series. But now when I try to build a new index under ES 6.2.2, specifying the above analyzer setup in the settings section, I receive the following error, which I don't understand:

{
"error": {
"caused_by": {
"caused_by": {
"reason": "term: i/s analyzed to a token (i7s) with position increment != 1 (got: 0)",
"type": "illegal_argument_exception"
},
"reason": "Invalid synonym rule at line 35",
"type": "parse_exception"
},
"reason": "failed to build synonyms",
"root_cause": [
{
"reason": "failed to build synonyms",
"type": "illegal_argument_exception"
}
],
"type": "illegal_argument_exception"
},
"status": 400
}

Hope somebody can enlighten me.

Regards,
Michael


(system) #2

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.