Return unique documents in the hit response using an aggregation

I have three documents. Document 2 is a new version of document 1. I have no control on updating document 1 because a 3rd party system automatically generates the new document based on an update action on an upstream system otherwise I know that an update to the record would be the easiest way to prevent not having to do this kind of aggregation.

PUT test_index

PUT test_index/_mapping/test_doc
{  
   "properties":{  
      "account_number":{  
         "properties":{  
            "S":{  
               "type":"text",
               "fields":{  
                  "keyword":{  
                     "type":"keyword",
                     "ignore_above":256
                  }
               }
            }
         }
      },
      "client_transaction_id":{  
         "properties":{  
            "S":{  
               "type":"text",
               "fields":{  
                  "keyword":{  
                     "type":"keyword",
                     "ignore_above":256
                  }
               }
            }
         }
      },
      "created_date_time_utc_ms":{  
         "properties":{  
            "N":{  
               "type":"text",
               "fields":{  
                  "keyword":{  
                     "type":"keyword",
                     "ignore_above":256
                  }
               }
            }
         }
      },
      "transId_createdDateTime":{  
         "properties":{  
            "S":{  
               "type":"text",
               "fields":{  
                  "keyword":{  
                     "type":"keyword"
                  }
               }
            }
         }
      },
      "SequenceNumber":{  
         "type":"text",
         "fields":{  
            "keyword":{  
               "type":"keyword",
               "ignore_above":256
            }
         }
      },
      "eventName":{  
         "type":"text",
         "fields":{  
            "keyword":{  
               "type":"keyword",
               "ignore_above":256
            }
         }
      }
   }
}

PUT test_index/test_doc/1
{
  "account_number": {"S": "account_x"},
  "client_transaction_id": {"S": "a"},
  "created_date_time_utc_ms": {"N": "1"},
  "transId_createdDateTime": {"S": "a_1"},  
  "SequenceNumber": "1",
  "eventName": "INSERT"
}

PUT test_index/test_doc/2
{
  "account_number": {"S": "account_x"},
  "client_transaction_id": {"S": "a"},
  "created_date_time_utc_ms": {"N": "1"},
  "transId_createdDateTime": {"S": "a_1"},  
  "SequenceNumber": "1",
  "eventName": "MODIFY"
}

PUT test_index/test_doc/3
{
  "account_number": {"S": "account_x"},
  "client_transaction_id": {"S": "b"},
  "created_date_time_utc_ms": {"N": "1"},
  "transId_createdDateTime": {"S": "b_1"},  
  "SequenceNumber": "1",
  "eventName": "INSERT"
}

POST test_index/test_doc/_search
{
        "from": 0,
        "size": 5,
        "query": {
            "match": {
                "account_number.S": {
                    "query": "account_x"
                }
            }
        },
        "sort": {
            "_id": {
                "order": "asc"
            }
        }
}

The above search returns all three documents in the hits response. Here are the results:

{
  "took": 7,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 3,
    "max_score": null,
    "hits": [
      {
        "_index": "test_index",
        "_type": "test_doc",
        "_id": "1",
        "_score": null,
        "_source": {
          "account_number": {
            "S": "account_x"
          },
          "client_transaction_id": {
            "S": "a"
          },
          "created_date_time_utc_ms": {
            "N": "1"
          },
          "transId_createdDateTime": {
            "S": "a_1"
          },
          "SequenceNumber": "1",
          "eventName": "INSERT"
        },
        "sort": [
          "1"
        ]
      },
      {
        "_index": "test_index",
        "_type": "test_doc",
        "_id": "2",
        "_score": null,
        "_source": {
          "account_number": {
            "S": "account_x"
          },
          "client_transaction_id": {
            "S": "a"
          },
          "created_date_time_utc_ms": {
            "N": "1"
          },
          "transId_createdDateTime": {
            "S": "a_1"
          },
          "SequenceNumber": "1",
          "eventName": "MODIFY"
        },
        "sort": [
          "2"
        ]
      },
      {
        "_index": "test_index",
        "_type": "test_doc",
        "_id": "3",
        "_score": null,
        "_source": {
          "account_number": {
            "S": "account_x"
          },
          "client_transaction_id": {
            "S": "b"
          },
          "created_date_time_utc_ms": {
            "N": "1"
          },
          "transId_createdDateTime": {
            "S": "b_1"
          },
          "SequenceNumber": "1",
          "eventName": "INSERT"
        },
        "sort": [
          "3"
        ]
      }
    ]
  }
}

How do I create a search aggregation to return only documents 2 and 3 in the hits response?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.