Delete document from search query

Hello everyone,

i use this request for get all(200) documents duplicated in my index, and i want delete all documents in this result. I tried tu use _delete_by_query but after many try and search on internet i failed ...

My query is :

GET /index-ip/_query
{
  "aggs": {
      "dup": {
         "terms": {
            "size" : 200,
            "field": "ip_commune.keyword",
            "min_doc_count": 2
     }
    }
  }
}

returned result :

{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : 754,
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "V64TV2kB0AORAV2GIdMZ",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "98.183.164.36",
          "@version" : "1",
          "tags" : [
            "ip-ipam"
          ],
          "@timestamp" : "2019-03-07T07:34:24.874Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "Dq4TV2kB0AORAV2GIdUe",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "136.168.48.254",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.979Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "264TV2kB0AORAV2GIdUf",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "137.83.124.226",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.889Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "F64TV2kB0AORAV2GIdUe",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "136.168.50.1",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.980Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "Iq4TV2kB0AORAV2GIdUe",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "136.168.86.91",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:25.003Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "Za4TV2kB0AORAV2GIdYh",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "137.69.223.4",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.888Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "Z64TV2kB0AORAV2GIdYh",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "137.78.101.254",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.888Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "aa4TV2kB0AORAV2GIdYh",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "137.83.29.198",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.890Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "d64TV2kB0AORAV2GIdYh",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "98.183.164.55",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.916Z"
        }
      },
      {
        "_index" : "index-ip",
        "_type" : "doc",
        "_id" : "-a4TV2kB0AORAV2GIdUf",
        "_score" : 1.0,
        "_source" : {
          "ip_commune" : "98.183.164.117",
          "@version" : "1",
          "tags" : [
            "ip-glpi"
          ],
          "@timestamp" : "2019-03-07T07:34:24.899Z"
        }
      }
    ]
  },
  "aggregations" : {
    "dup" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "98.183.164.124",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.142",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.166",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.172",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.232",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.25",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.27",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.28",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.29",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.35",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.36",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.37",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.38",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.39",
          "doc_count" : 2
        },
        {
          "key" : "98.183.164.58",
          "doc_count" : 2
        },
        {
          "key" : "98.183.27.21",
          "doc_count" : 2
        },
        {
          "key" : "98.183.27.22",
          "doc_count" : 2
        },
        {
          "key" : "98.183.27.23",
          "doc_count" : 2
        },
        {
          "key" : "98.183.27.24",
          "doc_count" : 2
        },
        {
          "key" : "98.183.27.25",
          "doc_count" : 2
        },
        {
          "key" : "98.183.27.27",
          "doc_count" : 2
        },
        {
          "key" : "98.183.27.28",
          "doc_count" : 2
        }
      ]
    }
  }
}

I don't want to keep one of the duplicate records =) I need to delete both of them.
My goal is to compare two ITSM(GLPI) and IPAM IP lists to extract all IP addresses not in GLPI.
Any idea ? :confused: and sorry for my bad english ^^

Hello Axel,

The general format for delete by query is to create a good search query and once you have that, use it as the value to query as described here. Having said this, I'm not certain of the exact syntax from there that would allow you to safely delete only duplicate entries. I would try different searches (using the search API) first to safely ensure you're only returning the correct records before proceeding.

Conventionally, people frequently recreate their indices without duplicates or leverage code-based utilities to remove duplicate documents.

Regards,
Aaron

Hello Aaron,
Thank for your answer i have already read this documentation, and i didn't know how to use my query in _delete_by_query :confused: i have tried many syntax like :

POST /index-ip/_delete_by_query
{
 "query":{
   "must": [
    {"terms":{"field": "ip_commune.keyword", "min_doc_count": 2}}
    ]
 } 
}

but i got an error every time for all parameters from my search query :confused:

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.