Elasticsearch 8.6 - enrich processor is not behaving as expected

Hello,

I am new to Elasticsearch and I am probably missing something.
It seems that enrich processor is not using custom normalizer.

// My custom Normalizer
PUT /_component_template/comptpl_norm_letters
{
  "template": {
    "settings": {
      "analysis": {
        "char_filter": {
          "chfilter_only_letters": {
            "type": "pattern_replace",
            "pattern": """(\P{L}+)""",
            "replacement": " "
          }
        },
        "normalizer": {
          "norm_letters": {
            "type": "custom",
            "char_filter": [
              "chfilter_only_letters"
            ],
            "filter": [
              "lowercase",
              "asciifolding",
              "trim"
            ]
          }
        }
      }
    }
  }
}


// my template for a firstname referentiel
PUT /_index_template/tplidx_my-firstname_ref
{
  "index_patterns": [
    "my-firstname"
  ],
  "priority": 2,
  "template": {
    "mappings": {
      "dynamic":"strict",
      "properties": {
        "firstname": {
          "type": "keyword",
          "normalizer": "norm_letters",
          "eager_global_ordinals": true
        }
      }
    }
  },
  "composed_of": [
    "comptpl_norm_letters"
  ]
}

// some firstname are ingested
POST my-firstname/_doc
{
  "firstname":"Marie-Françoise"
}

POST my-firstname/_doc
{
  
  "firstname":"Marie"
}

POST my-firstname/_doc
{
  
  "firstname":"Françoise"
}

// let's create a template for my index to be enriched
PUT /_index_template/tplidx_my-error
{
  "index_patterns": [
    "error_between_chair_and_keyboard"
  ],
  "priority": 2,
  "template": {
    "settings": {
      "default_pipeline": "pipeline-my-firstname"
    },
    "mappings": {
      "properties": {
        "firstname": {
          "type": "keyword",
          "normalizer": "norm_letters",
          "eager_global_ordinals": true
        }
      }
    }
  },
  "composed_of": [
    "comptpl_norm_letters"
  ]
}

// the related policy
PUT _enrich/policy/policy-my-firstname
{
  "match": {
    "indices": "my-firstname",
    "match_field": "firstname",
    "enrich_fields": [ "firstname"]
  }
}

// let's run the policy
POST /_enrich/policy/policy-my-firstname/_execute

//let's create an ingest pipeline to enrich my firstnames
PUT _ingest/pipeline/pipeline-my-firstname
{
  "processors": [
    {
      "enrich": {
        "description": "try to find firstname",
        "policy_name": "policy-my-firstname",
        "field": "firstname",
        "target_field": "enriched"
      }
    }
  ]
}


// let's bulk load doc inside my index
POST error_between_chair_and_keyboard/_bulk
  { "index" : { } }
  { "firstname": "Marie-Françoise"}
  { "index" : { } }
  { "firstname": "Marie#Françoise"}
  { "index" : { } }
  { "firstname": "Marie’Francoise"}
  { "index" : { } }
  { "firstname": "Françoise"}
  { "index" : { } }
  { "firstname": "Marie"}
  { "index" : { } }
  { "firstname": "marie-Françoise"}
  { "index" : { } }
  { "firstname": "marie-Françoise"}
  
  
 // let's have a look to the docs  
GET error_between_chair_and_keyboard/_search
 

It returns (as we can see, enriched field is not always generated):

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 7,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "FjG5f4YBgPb-ljrw_r6A",
        "_score": 1,
        "_source": {
          "firstname": "Marie-Françoise",
          "enriched": {
            "firstname": "Marie-Françoise"
          }
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "FzG5f4YBgPb-ljrw_r6A",
        "_score": 1,
        "_source": {
          "firstname": "Marie#Françoise"
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "GDG5f4YBgPb-ljrw_r6A",
        "_score": 1,
        "_source": {
          "firstname": "Marie’Francoise"
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "GTG5f4YBgPb-ljrw_r6A",
        "_score": 1,
        "_source": {
          "firstname": "Françoise",
          "enriched": {
            "firstname": "Françoise"
          }
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "GjG5f4YBgPb-ljrw_r6A",
        "_score": 1,
        "_source": {
          "firstname": "Marie",
          "enriched": {
            "firstname": "Marie"
          }
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "GzG5f4YBgPb-ljrw_r6A",
        "_score": 1,
        "_source": {
          "firstname": "marie-Françoise"
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "HDG5f4YBgPb-ljrw_r6A",
        "_score": 1,
        "_source": {
          "firstname": "marie-Françoise"
        }
      }
    ]
  }
}

Yet, if i run:

GET my-firstname/_search
  {
    "size":100,
    "query": {
      "bool":{
        "must" : [
          {"term": {"firstname": "marie francoise"}}
          ]
      }
    }
  }

It returns:

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1,
      "relation": "eq"
    },
    "max_score": 0.9808291,
    "hits": [
      {
        "_index": "my-firstname",
        "_id": "_zGzf4YBgPb-ljrw-702",
        "_score": 0.9808291,
        "_source": {
          "firstname": "Marie-Françoise"
        }
      }
    ]
  }
}

And if I run:

GET error_between_chair_and_keyboard/_search
  {
    "size":100,
    "query": {
      "bool":{
        "must" : [
          {"term": {"firstname": "marie francoise"}}
          ]
      }
    }
  }

It returns:

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 5,
      "relation": "eq"
    },
    "max_score": 0.37469345,
    "hits": [
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "FjG5f4YBgPb-ljrw_r6A",
        "_score": 0.37469345,
        "_source": {
          "firstname": "Marie-Françoise",
          "enriched": {
            "firstname": "Marie-Françoise"
          }
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "FzG5f4YBgPb-ljrw_r6A",
        "_score": 0.37469345,
        "_source": {
          "firstname": "Marie#Françoise"
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "GDG5f4YBgPb-ljrw_r6A",
        "_score": 0.37469345,
        "_source": {
          "firstname": "Marie’Francoise"
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "GzG5f4YBgPb-ljrw_r6A",
        "_score": 0.37469345,
        "_source": {
          "firstname": "marie-Françoise"
        }
      },
      {
        "_index": "error_between_chair_and_keyboard",
        "_id": "HDG5f4YBgPb-ljrw_r6A",
        "_score": 0.37469345,
        "_source": {
          "firstname": "marie-Françoise"
        }
      }
    ]
  }
}

What am I missing?

Thanks in advance,

Best regards

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.