How to merge Array while reindexing data?

I have an index in which i have duplication document information which i wish to transform and to create one document with array of information.

I've the following data in the index:

</>

    {
      "took" : 0,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 2,
          "relation" : "eq"
        },
        "max_score" : 1.0,
        "hits" : [
          {
            "_index" : "reservation",
            "_type" : "_doc",
            "_id" : "26_87",
            "_score" : 1.0,
            "_source" : {
              "Reservations" : [
                {
                  "ItemStatus" : "In Progress",
                  "displayName" : null,
                  "DocumentId" : 87,
                  "ModifiedBy" : "479044",
                  "RequestedQty" : 925428.0,
                  "entityDescription" : null,              
                  "ReservationId" : 67,
                  "ItemName" : "TestName",
                  "DocumentStatusId" : 1,
                  "DocumentType" : null,
                  "OnHandQty" : 0.0,
                  "DocumentNumber" : null,
                  "DocTypeCode" : 26,
                  "DocumentStatus" : "In Progress",
                  "DocumentSeqNo" : "PL#0000059",
                  "ItemNo" : "1014060",
                  "StatusId" : 1,
                  "DocumentCode" : "87",
                  "ReservationNo" : "0055674568"
                }
              ],
              "DocumentNo" : "0000059",
              "IsDeleted" : false
            }
          },
          {
            "_index" : "reservation",
            "_type" : "_doc",
            "_id" : "26_86",
            "_score" : 1.0,
            "_source" : {
              "Reservations" : [
                {
                  "ItemStatus" : "In Progress",
                  "displayName" : null,
                  "DocumentId" : 86,
                  "ModifiedBy" : "479044",
                  "RequestedQty" : 925428.0,          
                  "ReservationId" : 67,
                  "ItemName" : "TestName1",
                  "DocumentStatusId" : 1,
                  "DocumentType" : null,
                  "OnHandQty" : 0.0,
                  "LastAccessedOn" : null,
                  "DocumentNumber" : null,
                  "DocTypeCode" : 26,
                  "DocumentStatus" : "In Progress",
                  "DocumentSeqNo" : "PL#0000059",
                  "ItemNo" : "1013960",
                  "StatusId" : 1,
                  "DocumentCode" : "86",
                  "ReservationNo" : "0055674568"
                }
              ],
              "DocumentNo" : "0000059",
              "IsDeleted" : false
            }
          }
        ]
      }
    }

</>

I want to reindex the data in the following format:

</>

    {
      "took" : 0,
      "timed_out" : false,
      "_shards" : {
        "total" : 1,
        "successful" : 1,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : {
          "value" : 2,
          "relation" : "eq"
        },
        "max_score" : 1.0,
        "hits" : [
          {
            "_index" : "reservation",
            "_type" : "_doc",
            "_id" : "0000059",
            "_score" : 1.0,
            "_source" : {
              "Reservations" : [
                {
                  "ItemStatus" : "In Progress",
                  "displayName" : null,
                  "DocumentId" : 87,
                  "ModifiedBy" : "479044",
                  "RequestedQty" : 925428.0,
                  "entityDescription" : null,              
                  "ReservationId" : 67,
                  "ItemName" : "TestName",
                  "DocumentStatusId" : 1,
                  "DocumentType" : null,
                  "OnHandQty" : 0.0,
                  "DocumentNumber" : null,
                  "DocTypeCode" : 26,
                  "DocumentStatus" : "In Progress",
                  "DocumentSeqNo" : "PL#0000059",
                  "ItemNo" : "1014060",
                  "StatusId" : 1,
                  "DocumentCode" : "87",
                  "ReservationNo" : "0055674568"
                },
    			{
                  "ItemStatus" : "In Progress",
                  "displayName" : null,
                  "DocumentId" : 86,
                  "ModifiedBy" : "479044",
                  "RequestedQty" : 925428.0,          
                  "ReservationId" : 67,
                  "ItemName" : "TestName1",
                  "DocumentStatusId" : 1,
                  "DocumentType" : null,
                  "OnHandQty" : 0.0,
                  "LastAccessedOn" : null,
                  "DocumentNumber" : null,
                  "DocTypeCode" : 26,
                  "DocumentStatus" : "In Progress",
                  "DocumentSeqNo" : "PL#0000059",
                  "ItemNo" : "1013960",
                  "StatusId" : 1,
                  "DocumentCode" : "86",
                  "ReservationNo" : "0055674568"
                }
              ],
              "DocumentNo" : "0000059",
              "IsDeleted" : false
            }
          }
        ]
      }
    }

</>

Please any suggestion on having a custom processor is possible in this scenario..

1 Like

One thing to try out would be transforms, allowing you to group several documents together to create a so called entity centric index.

If the above does not help you this is something that you have to implement on the client side likely. Even though Elasticsearch has the ability to reindex a whole index, everything happens on a per document base so there is no ability to merge documents.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.