Upgrade from 6.8.1 to 7.8.1 causes token offset issues

I'm trying to upgrade from 6.8.1 to 7.8.1, using the same mappings from 6.8.1 but when trying to index documents, some (not too many) are throwing exception with the following erre:

startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards startOffset=1487,endOffset=1506,lastStartOffset=1503 for field 'content'

I am using the following mapping:

{
  "settings": {
    "analysis": {
      "filter": {
        "custom_stem_en": {
          "type": "stemmer_override",
          "rules_path": "en/custom_stem.txt"
        },
        "stop_en": {
          "type": "stop",
          "stopwords_path": "en/stopwords.txt"
        },
        "synonym_en": {
          "type": "synonym",
          "synonyms_path": "en/synonyms.txt"
        },
        "stemmer_en": {
          "type": "stemmer",
          "language": "english"
        }
      },
      "analyzer": {
        "stemmed_en": {
          "type": "custom",
          "tokenizer": "whitespace",
          "filter": [
            "lowercase",
            "stop_en",
            "synonym_en",
            "word_delimiter",
            "asciifolding",
            "custom_stem_en",
            "stemmer_en"
          ]
        }
      }
    }
  },
  "mappings": {
	"content": {
        "type": "text",
        "analyzer": "stemmed_en",
        "norms": false
      }
  }
}

any idea why i'm getting it and what should I change?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.