New elasticsearch-java API `CreateIndexRequest` using `.withJson` causes `co.elastic.clients.util.MissingRequiredPropertyException`

I am having a hard time on using the new elasticsearch-java api client.

I am migrating from HLRC to the new elasticsearch java api.

When I create an index, I used CreateIndexRequest and load it with json source

But why is it resulting to an exception, seems that there are missing required properties

Are all properties needed to be in the json file?
But why is it when I used Kibana, the json file works even I just put the needed properties?

Also in the deprecated HLRC client, the json works when using its CreateIndexRequest.

Below is the exception

 co.elastic.clients.json.JsonpMappingException: Error deserializing co.elastic.clients.elasticsearch._types.analysis.TokenizerDefinition: co.elastic.clients.util.MissingRequiredPropertyException: Missing required property 'PathHierarchyTokenizer.bufferSize' (JSON path: settings.analysis.tokenizer.unix_path_tokenizer) (line no=15, column no=10, offset=377)

Below is my code

final String assetJsonSource = "./config/elasticsearch/my_index_settings.json";
    try (InputStream input = new FileInputStream(assetJsonSource)) {
      CreateIndexRequest request =
          CreateIndexRequest.of(builder -> builder.index(indexName).withJson(input));
      CreateIndexResponse response = client2.indices().create(request);
      boolean ack = Boolean.TRUE.equals(response.acknowledged());
    } catch (IOException e) {
      log.error("Failed to create an index", e);
    }

The json I used is

{
  "settings": {
    "number_of_shards": 5,
    "max_ngram_diff": 2,
    "analysis": {
      "tokenizer": {
        "ngram_tokenizer": {
          "type": "ngram",
          "min_gram": "1",
          "max_gram": "3",
          "token_chars": ["letter", "digit", "punctuation", "symbol"]
        },
        "unix_path_tokenizer": {
          "type": "path_hierarchy"
        },
        "whitespace_tokenizer": {
          "type": "whitespace"
        },
        "keyword_tokenizer": {
          "type": "keyword"
        }
      },
      "analyzer": {
        "ngram_analyzer": {
          "tokenizer": "ngram_tokenizer",
          "char_filter": ["icu_normalizer"],
          "filter": ["lowercase"]
        },
        "lowercase_analyzer": {
          "tokenizer": "keyword",
          "filter": ["lowercase"]
        },
        "directory_path_analyzer": {
          "tokenizer": "unix_path_tokenizer"
        },
        "whitespace_analyzer": {
          "tokenizer": "whitespace_tokenizer"
        },
        "keyword_analyzer": {
          "tokenizer": "keyword_tokenizer"
        }
      },
      "normalizer": {
        "lowercase_normalizer": {
          "filter": ["lowercase"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "analyzer": "ngram_analyzer",
        "fields": {
          "lowercase": {
            "type": "keyword",
            "normalizer": "lowercase_normalizer"
          }
        }
      },
      "path": {
        "type": "text",
        "analyzer": "directory_path_analyzer",
        "fields": {
          "full": {
            "type": "keyword"
          }
        }
      },
      "originalSize": {
        "type": "double",
        "store": "true"
      },
      "assetCategory": {
        "type": "text",
        "analyzer": "keyword_analyzer",
        "search_analyzer": "whitespace_analyzer",
        "fields": {
          "keyword": {
            "type": "keyword"
          }
        }
      },
      "mimetype": {
        "type": "keyword"
      },
      "importedBy": {
        "type": "integer",
        "store": "true"
      },
      "updatedBy": {
        "type": "keyword"
      },
      "importedAt": {
        "type": "date",
        "store": "true"
      },
      "updatedAt": {
        "type": "date",
        "store": "true"
      },
      "fileCreatedAt": {
        "type": "date",
        "store": "true"
      },
      "fileUpdatedAt": {
        "type": "date",
        "store": "true"
      },
      "metadataSet": {
        "type": "long"
      },
      "instanceId": {
        "type": "keyword"
      },
      "referenceId": {
        "type": "keyword"
      },
      "cutComment": {
        "type": "text",
        "analyzer": "ngram_analyzer",
        "fields": {
          "lowercase": {
            "type": "text",
            "analyzer": "lowercase_analyzer"
          }
        }
      },
      "comment": {
        "properties": {
          "userId": {
            "type": "long"
          },
          "value": {
            "type": "text",
            "analyzer": "ngram_analyzer",
            "fields": {
              "lowercase": {
                "type": "text",
                "analyzer": "lowercase_analyzer"
              }
            }
          },
          "updatedAt": {
            "type": "date"
          }
        }
      },
      "content": {
        "type": "text",
        "analyzer": "ngram_analyzer"
      },
      "shadow": {
        "type": "boolean",
        "store": "true"
      },
      "shadowUpdatedAt": {
        "type": "date",
        "store": "true"
      },
      "downloadValue": {
        "type": "long"
      },
      "collection": {
        "type": "long"
      },
      "sha1": {
        "type": "keyword"
      },
      "subtitle": {
        "type": "text",
        "analyzer": "ngram_analyzer"
      },
      "videoOcr": {
        "type": "text",
        "analyzer": "ngram_analyzer"
      },
      "version": {
        "type": "long"
      }
    },
    "dynamic_templates": [
      {
        "cmeta_str": {
          "match": "cmeta_str-*",
          "mapping": {
            "type": "text",
            "store": "true",
            "analyzer": "ngram_analyzer",
            "fields": {
              "lowercase": {
                "type": "keyword",
                "normalizer": "lowercase_normalizer"
              }
            }
          }
        }
      },
      {
        "cmeta_select": {
          "match": "cmeta_select-*",
          "mapping": {
            "type": "text",
            "store": "true",
            "analyzer": "ngram_analyzer",
            "fields": {
              "lowercase": {
                "type": "keyword",
                "normalizer": "lowercase_normalizer"
              }
            }
          }
        }
      },
      {
        "cmeta_bool": {
          "match": "cmeta_bool-*",
          "mapping": {
            "type": "boolean",
            "store": "true"
          }
        }
      },
      {
        "cmeta_double": {
          "match": "cmeta_double-*",
          "mapping": {
            "type": "double",
            "store": "true"
          }
        }
      },
      {
        "cmeta_date": {
          "match": "cmeta_date-*",
          "mapping": {
            "type": "date",
            "store": "true"
          }
        }
      },
      {
        "cmeta_multi_label": {
          "match": "cmeta_multi_label-*",
          "mapping": {
            "type": "long",
            "store": "true"
          }
        }
      }
    ]
  }
}

Hi @ALX_DM

What the error says is that some analyzer parameters of type path_hierarchy are missing in your mapping.
I understand that your mapping is created perfectly, for example, running in kibana dev tools but in the new api these fields are being required.
I made the changes to the analyzers ngram_analyzer, lowercase_analyzer and others by adding the "type" property.
In the error described by you I added the necessary fields to the tokenizer "unix_path_tokenizer".

   "unix_path_tokenizer": {
           "type": "path_hierarchy",
           "buffer_size": 1024,
           "replacement": "/",
           "reverse": "false",
           "skip": 0,
           "delimiter": "/"
         }

Full mapping

{
  "settings": {
    "number_of_shards": 5,
    "max_ngram_diff": 2,
    "analysis": {
      "tokenizer": {
        "ngram_tokenizer": {
          "type": "ngram",
          "min_gram": "1",
          "max_gram": "3",
          "token_chars": ["letter", "digit", "punctuation", "symbol"]
        },
        "unix_path_tokenizer": {
          "type": "path_hierarchy",
          "buffer_size": 1024,
          "replacement": "/",
          "reverse": "false",
          "skip": 0,
          "delimiter": "/"
        },
        "whitespace_tokenizer": {
          "type": "whitespace"
        },
        "keyword_tokenizer": {
          "type": "keyword",
          "buffer_size": 1024
        }
      },
      "analyzer": {
        "ngram_analyzer": {
          "type": "custom",
          "tokenizer": "ngram_tokenizer",
          "char_filter": ["icu_normalizer"],
          "filter": ["lowercase"]
        },
        "lowercase_analyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": ["lowercase"]
        },
        "directory_path_analyzer": {
          "type": "custom",
          "tokenizer": "unix_path_tokenizer"
        },
        "whitespace_analyzer": {
          "type": "custom",
          "tokenizer": "whitespace_tokenizer"
        },
        "keyword_analyzer": {
          "type": "custom",
          "tokenizer": "keyword_tokenizer"
        }
      },
      "normalizer": {
        "lowercase_normalizer": {
          "type": "custom",
          "filter": ["lowercase"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "analyzer": "ngram_analyzer",
        "fields": {
          "lowercase": {
            "type": "keyword",
            "normalizer": "lowercase_normalizer"
          }
        }
      },
      "path": {
        "type": "text",
        "analyzer": "directory_path_analyzer",
        "fields": {
          "full": {
            "type": "keyword"
          }
        }
      },
      "originalSize": {
        "type": "double",
        "store": "true"
      },
      "assetCategory": {
        "type": "text",
        "analyzer": "keyword_analyzer",
        "search_analyzer": "whitespace_analyzer",
        "fields": {
          "keyword": {
            "type": "keyword"
          }
        }
      },
      "mimetype": {
        "type": "keyword"
      },
      "importedBy": {
        "type": "integer",
        "store": "true"
      },
      "updatedBy": {
        "type": "keyword"
      },
      "importedAt": {
        "type": "date",
        "store": "true"
      },
      "updatedAt": {
        "type": "date",
        "store": "true"
      },
      "fileCreatedAt": {
        "type": "date",
        "store": "true"
      },
      "fileUpdatedAt": {
        "type": "date",
        "store": "true"
      },
      "metadataSet": {
        "type": "long"
      },
      "instanceId": {
        "type": "keyword"
      },
      "referenceId": {
        "type": "keyword"
      },
      "cutComment": {
        "type": "text",
        "analyzer": "ngram_analyzer",
        "fields": {
          "lowercase": {
            "type": "text",
            "analyzer": "lowercase_analyzer"
          }
        }
      },
      "comment": {
        "properties": {
          "userId": {
            "type": "long"
          },
          "value": {
            "type": "text",
            "analyzer": "ngram_analyzer",
            "fields": {
              "lowercase": {
                "type": "text",
                "analyzer": "lowercase_analyzer"
              }
            }
          },
          "updatedAt": {
            "type": "date"
          }
        }
      },
      "content": {
        "type": "text",
        "analyzer": "ngram_analyzer"
      },
      "shadow": {
        "type": "boolean",
        "store": "true"
      },
      "shadowUpdatedAt": {
        "type": "date",
        "store": "true"
      },
      "downloadValue": {
        "type": "long"
      },
      "collection": {
        "type": "long"
      },
      "sha1": {
        "type": "keyword"
      },
      "subtitle": {
        "type": "text",
        "analyzer": "ngram_analyzer"
      },
      "videoOcr": {
        "type": "text",
        "analyzer": "ngram_analyzer"
      },
      "version": {
        "type": "long"
      }
    },
    "dynamic_templates": [
      {
        "cmeta_str": {
          "match": "cmeta_str-*",
          "mapping": {
            "type": "text",
            "store": "true",
            "analyzer": "ngram_analyzer",
            "fields": {
              "lowercase": {
                "type": "keyword",
                "normalizer": "lowercase_normalizer"
              }
            }
          }
        }
      },
      {
        "cmeta_select": {
          "match": "cmeta_select-*",
          "mapping": {
            "type": "text",
            "store": "true",
            "analyzer": "ngram_analyzer",
            "fields": {
              "lowercase": {
                "type": "keyword",
                "normalizer": "lowercase_normalizer"
              }
            }
          }
        }
      },
      {
        "cmeta_bool": {
          "match": "cmeta_bool-*",
          "mapping": {
            "type": "boolean",
            "store": "true"
          }
        }
      },
      {
        "cmeta_double": {
          "match": "cmeta_double-*",
          "mapping": {
            "type": "double",
            "store": "true"
          }
        }
      },
      {
        "cmeta_date": {
          "match": "cmeta_date-*",
          "mapping": {
            "type": "date",
            "store": "true"
          }
        }
      },
      {
        "cmeta_multi_label": {
          "match": "cmeta_multi_label-*",
          "mapping": {
            "type": "long",
            "store": "true"
          }
        }
      }
    ]
  }
}
1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.