How to avoid duplicate values being copied while using copy_to?

My Index definition:

PUT /test-index
{
  "mappings": {
    "properties": {
      "category":{
        "type": "text", 
        "similarity": "boolean",
        "term_vector": "with_positions_offsets",
        "fields":{
          "facet":{
            "type": "keyword"
          }
        },
        "copy_to": "test"
      },
      "keywords":{
        "type": "text", 
        "similarity": "boolean",
        "term_vector": "with_positions_offsets",
        "fields":{
          "facet":{
            "type": "keyword"
          }
        },
        "copy_to": "test"
      },
      "test":{
        "type": "text", 
        "similarity": "boolean",
        "term_vector": "with_positions_offsets",
        "fields":{
          "facet":{
            "type": "keyword"
          }
        }
      }
    }
  }
}

documents:


PUT /test-index/_doc/1
{
    "category": ["ABC","123","hgh6"],
    "keywords": "XYZ"
}

PUT /test-index/_doc/2
{
    "category": ["XYZ","ABC","hgh7"],
    "keywords": "XYZ"
}

PUT /test-index/_doc/3
{
    "category": ["XYZ","ABC","hgh7"],
    "keywords": "uuu"
}

Search Query:

GET test-index/_search
{
  "query": {
    "match_all": {}
  },
  "fields": [
    "test"
  ]
}

Output:

My question is,
how to avoid duplicate values in the field "test"? I want it behave as a set instead of list.

Thank you in advance

Hi @Srikrishna_Raghupath

Maybe this filters dont work for you, unique and remove duplicate, but this script processor can help you.

In this case you dont need the copy_to, the processor set put the values in field.

POST /_ingest/pipeline/_simulate
{
  "pipeline": {
    "description": "_description",
    "processors": [
      {
        "script": {
          "source": """
            def categories = ctx['category'];
            def keyword = ctx['keywords'];
            if (!categories.contains(keyword)) {
              categories.add(keyword);
            }
            ctx['test'] = categories;
          """
        }
      }
    ]
  },
  "docs": [
    {
      "_index": "index",
      "_id": "id",
      "_source": {
        "category": [
          "XYZ",
          "ABC",
          "hgh"
        ],
        "keywords": "XYZ"
      }
    }
  ]
}

Index Data

PUT /test-index/_doc/2?pipeline=avoid_duplicate
{
    "category": ["XYZ","ABC","hgh7"],
    "keywords": "XYZ"
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.