Exception during Reindex

Version: 5.4

I'm trying to use reindex to make a minor change to a mapping, although the mapping itself is complex and contains custom analyzer/tokenizers.

Reindex works if I don't create the new mapping first:

POST _reindex
{
  "source": {
    "index": "cheminfo",
   "size": 1
  },
  "dest": {
    "index": "cheminfo_temp",
    "version_type": "external"
  }
}

But if I create the "cheminfo_temp" mapping first (even if exactly the same as the source mapping). It fails with an IndexOutOfBoundsException somewhere deep inside InternalEngine.java

[2018-09-13T11:41:07,393][DEBUG][o.e.a.b.TransportShardBulkAction] [elspoc-node1] [cheminfo_temp][0] failed to execute bulk item (index) BulkShardRequest [[cheminfo_temp][0]] containing [index {[cheminfo_temp][docs][a7612d2d7834814b6a55f3488747049], source[n/a, actual length: [10.6kb], max length: 2kb]}]
java.lang.IndexOutOfBoundsException: Index: 0, Size: 0
        at java.util.ArrayList.rangeCheck(ArrayList.java:653) ~[?:1.8.0_121]
        at java.util.ArrayList.get(ArrayList.java:429) ~[?:1.8.0_121]
        at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:639) ~[elasticsearch-5.4.1.jar:5.4.1]
        at org.elasticsearch.index.engine.InternalEngine.indexIntoLucene(InternalEngine.java:583) ~[elasticsearch-5.4.1.jar:5.4.1]
        at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:483) ~[elasticsearch-5.4.1.jar:5.4.1]
        at org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:560) ~[elasticsearch-5.4.1.jar:5.4.1]
...

One thing I noticed is that the source length is larger than the max of 2Kb, can 2Kb really be the max? In any case, I think the real problem lies in the creation of the new mapping first. The mapping contains custom tokenizers that have to be renamed or a conflict arises. Could that make reindex throw an IndexOutOfBoundsException?

I'll post the complete mapping in a follow up since it will exceed the allowed length...

PUT cheminfo
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "cpd_id_analyzer_cheminfo": {
          "type": "custom",
          "tokenizer": "cpd_id_tokenizer_cheminfo",
          "filter": [
            "lowercase",
            "stop"
          ]
        },
        "user_name_analyzer_index_cheminfo": {
          "type": "custom",
          "tokenizer": "user_name_tokenizer_index_cheminfo",
          "filter": [
            "lowercase",
            "stop"
          ]
        },
        "user_name_analyzer_search_cheminfo": {
          "type": "custom",
          "tokenizer": "user_name_tokenizer_search_cheminfo",
          "filter": [
            "lowercase",
            "stop"
          ]
        }
      },
      "tokenizer": {
        "cpd_id_tokenizer_cheminfo": {
          "type": "cpd_id_tokenizer",
          "cpd_service_url": "https://myserverXXX/CpdService/CServ?act=convert",
          "max_cpd_count": 3000,
          "patterns": [
            "\\bSID[0-9]{1,10}\\b",
            "\\b(GNF(-)?)?\\d{2}(-)?\\d{4}(-)?\\d{4}(-)?\\d\\b",
            "\\b(NV[PC]-)?[A-Z]{3}\\d{3}(-\\d{1,3})?(-[a-zA-Z]{2}-\\d+)?\\b",
            "\\b[A-Z]{2}-\\d{2}-[A-Z]{2}\\d{2}\\b",
            "\\b[A-Z]{2}\\d{2}-\\d{2}[A-Z]{2}\\b",
            "\\b(((NV[PC]-)?[a-zA-Z]{3}\\d{3})|(CHIR\\d+))\\b"
          ],
          "pattern_ids": [
            1234,
            1000,
            9007,
            9000,
            9001,
            4567
          ]
        },
        "user_name_tokenizer_index_cheminfo": {
          "type": "user_name_tokenizer_index"
        },
        "user_name_tokenizer_search_cheminfo": {
          "type": "user_name_tokenizer_search",
          "usr_service_url": "https://myserverXXX/CpdService/CServ?act=guessUser",
          "max_user_count": 100
        }
      }
    }
  },
  "mappings": {
    "docs": {
      "dynamic": false,
      "_source": {
        "excludes": [
          "attachment"
        ]
      },
      "properties": {
        "attachment": {
          "type": "binary",
          "store": true
        },
        "attributes": {
          "properties": {
            "group": {
              "type": "keyword",
              "store": true
            },
            "owner": {
              "type": "keyword",
              "store": true
            },
            "environment": {
              "type": "keyword",
              "store": true
            }
          }
        },
        "content": {
          "type": "text",
          "store": true,
          "index": true,
          "analyzer": "cpd_id_analyzer_cheminfo",
          "term_vector": "with_positions_offsets"
        },
        "file": {
          "properties": {
            "checksum": {
              "type": "keyword",
              "store": true
            },
            "content_type": {
              "type": "keyword",
              "store": true
            },
            "extension": {
              "type": "keyword",
              "store": true
            },
            "extension_friendly": {
              "type": "keyword",
              "store": true
            },
            "filename": {
              "type": "text",
              "store": true
            },
            "hit_count": {
              "type": "long",
              "store": true,
              "null_value": "0"
            },
            "similar_document_group": {
              "type": "keyword",
              "store": true
            },
            "filename_friendly": {
              "type": "text",
              "store": true,
              "index": true,
              "analyzer": "cpd_id_analyzer_cheminfo",
              "term_vector": "with_positions_offsets"
            },
            "filesize": {
              "type": "long",
              "store": true
            },
            "indexed_chars": {
              "type": "long",
              "store": true
            },
            "indexing_date": {
              "type": "date",
              "store": true,
              "format": "dateOptionalTime"
            },
            "last_modified": {
              "type": "date",
              "store": true,
              "format": "dateOptionalTime"
            },
            "url": {
              "type": "keyword",
              "index": false,
              "store": true
            }
          }
        },
        "security": {
          "properties": {
            "groups": {
              "type": "keyword",
              "store": true
            }
          }
        },
        "ids": {
          "properties": {
            "values": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            }
          }
        },
        "meta": {
          "properties": {
            "author": {
              "type": "keyword",
              "store": true
            },
            "author_friendly": {
              "type": "text",
              "store": true,
              "fielddata": true,
              "analyzer": "user_name_analyzer_index_cheminfo",
              "search_analyzer": "user_name_analyzer_search_cheminfo",
              "term_vector": "with_positions_offsets",
              "fields": {
                "raw": {
                  "type": "keyword"
                }
              }
            },
            "date": {
              "type": "date",
              "store": true,
              "format": "dateOptionalTime"
            },
            "keywords": {
              "type": "text",
              "store": true
            },
            "title": {
              "type": "text",
              "store": true
            }
          }
        },

...

...
            "path": {
              "properties": {
                "encoded": {
                  "type": "keyword",
                  "store": true
                },
                "real": {
                  "type": "keyword",
                  "store": true
                },
                "root": {
                  "type": "keyword",
                  "store": true
                },
                "virtual": {
                  "type": "keyword",
                  "store": true
                }
              }
            }
          }
        },
        "folder": {
          "dynamic": false,
          "properties": {
            "encoded": {
              "type": "keyword",
              "store": true
            },
            "name": {
              "type": "keyword",
              "store": true
            },
            "real": {
              "type": "keyword",
              "store": true
            },
            "root": {
              "type": "keyword",
              "store": true
            },
            "virtual": {
              "type": "keyword",
              "store": true
            }
          }
        }
      }
    }

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.