Remove nested fields by query and/or by _reindex

I currently was using dynamic_mapping to create .raw fields for all strings but want to remove those in favor of a .keyword field.
Trying to figure out the proper ctx._source syntax for update_by_query or _reindex to remove these fields from all documents in the index.

What about using reindex API to read the _source from the old index and send it to the new index which has the new mapping?

New mapping is still using dynamic but dynamic with keyword fields instead of raw fields. So either way I would need to ctx._source.remove(FIELD.raw)

I've tried a few things like:

POST logs-000068.1/_update_by_query?wait_for_completion=false&conflicts=proceed
{

  "script": {
    "source": """ctx._source.remove("raw")""",
    "lang": "painless"
  },
  "query": {
    "bool": {
      "must": [
        {
          "exists": {
            "field": "raw"
          }
        }
      ]
    }
  }
}

and

POST logs-000068.1/_update_by_query
{
  "script": {
    "lang": "painless",
    "source": """
      if (ctx._source.keySet().stream().collect(Collectors.toList()).forEach(k".raw" -> ctx._source.remove(k".raw"))
    """
  }
}

The important thing is how the _source looks like.
IMHO you won't see any raw field in the _source.
So you don't need to remove what does not exist.

Could you share a sample document?

you are correct the raw isn't in the document itself so this should be fine:

{
  "_index": "logs-000068.1",
  "_type": "_doc",
  "_id": "ARNv7WkB31eIUe7WLbyH",
  "_version": 1,
  "_seq_no": 44930422,
  "_primary_term": 1,
  "found": true,
  "_source": {
    "msg": "Received Tagging Service Request.",
    "entitled": "company",
    "apiKey": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXx",
    "activity": "TaggingService.Tag",
    "source": [
      "C:\\Tagging\\TaggingService.log",
      "Metadata.Services.Tagging.TaggingService"
    ],
    "sessionid": "be7ff00c08ce48f69428ca34789a60ff",
    "type": "feed_logs",
    "responseType": "application/rdf+xml",
    "apenv": "prod",
    "log_category": "feed_logs",
    "requestId": "c1ff4eca042f4d569a3975d5af4dbbe2",
    "beat": {
      "hostname": "win-dnr3cmptlrv",
      "name": "win-dnr3cmptlrv",
      "version": "5.6.4"
    },
    "host": "win-dnr3cmptlrv",
    "@version": "1",
    "elapsedTime_t": "2911",
    "app": "apms",
    "Status": "Success",
    "hashdata": "7f22d9020fc199f8c0184b8b4b53c2c0",
    "offset": 3414611,
    "level": "INFO",
    "input_type": "log",
    "message": "2019-04-05 12:17:57.4075|INFO|Metadata.Services.Tagging.TaggingService|_sessionid=\"be7ff00c08ce48f69428ca34789a60ff\" _subcategory=\"NONE\" _activity=\"TaggingService.Tag\" requestId=\"c1ff4eca042f4d569a3975d5af4dbbe2\" apiKey=\"XXXXXXXXXXXXXXXXXXXX\" entitled=\"company\" responseType=\"application/rdf+xml\" Status=\"Success\" _msg=\"Received Tagging Service Request.\" elapsedTime_t=\"2911\"",
    "tags": [
      "beats",
      "beats_input_codec_plain_applied",
      "priority_queue",
      "eastern"
    ],
    "site": "api",
    "@timestamp": "2019-04-05T16:17:57.407Z",
    "cacheKeys_test": null,
    "states_built_test": null,
    "subcategory": "NONE",
    "skipped_min_dates_test": null
  }
}

_reindex gives me issues sometimes, like here I had to rename a few fields:

{
  "completed" : true,
  "task" : {
    "node" : "_b8r5Ci0Qxa2UyiZJIFwrQ",
    "id" : 1432658604,
    "type" : "transport",
    "action" : "indices:data/write/reindex",
    "status" : {
      "total" : 269913131,
      "updated" : 1850000,
      "created" : 0,
      "deleted" : 0,
      "batches" : 185,
      "version_conflicts" : 0,
      "noops" : 0,
      "retries" : {
        "bulk" : 0,
        "search" : 0
      },
      "throttled_millis" : 0,
      "requests_per_second" : -1.0,
      "throttled_until_millis" : 0
    },
    "description" : "reindex from [logs-000070] updated with Script{type=inline, lang='painless', idOrCode='ctx._source['cacheKeys_test'] = ctx._source.remove('cacheKeys_t'); ctx._source['states_built_test'] = ctx._source.remove('states_built_t'); ctx._source['skipped_min_dates_test'] = ctx._source.remove('skipped_min_dates_t')', options={}, params={}} to [logs-000070.1][_doc]",
    "start_time_in_millis" : 1559239057119,
    "running_time_in_nanos" : 1152160768166,
    "cancellable" : true,
    "headers" : { }
  },
  "error" : {
    "type" : "search_phase_execution_exception",
    "reason" : "all shards failed",
    "phase" : "query",
    "grouped" : true,
    "failed_shards" : [
      {
        "shard" : -1,
        "index" : null,
        "reason" : {
          "type" : "search_context_missing_exception",
          "reason" : "No search context found for id [2638765]"
        }
      },
      {
        "shard" : -1,
        "index" : null,
        "reason" : {
          "type" : "search_context_missing_exception",
          "reason" : "No search context found for id [2287697]"
        }
      },
      {
        "shard" : -1,
        "index" : null,
        "reason" : {
          "type" : "search_context_missing_exception",
          "reason" : "No search context found for id [1369216]"
        }
      },
      {
        "shard" : -1,
        "index" : null,
        "reason" : {
          "type" : "search_context_missing_exception",
          "reason" : "No search context found for id [373423]"
        }
      },
      {
        "shard" : -1,
        "index" : null,
        "reason" : {
          "type" : "search_context_missing_exception",
          "reason" : "No search context found for id [190986]"
        }
      },
      {
        "shard" : -1,
        "index" : null,
        "reason" : {
          "type" : "search_context_missing_exception",
          "reason" : "No search context found for id [1369215]"
        }
      }
    ],
    "caused_by" : {
      "type" : "search_context_missing_exception",
      "reason" : "No search context found for id [1369215]"
    }
  }
}

Anything in elasticsearch logs which could give more details?
Any chance a node went down or was overloaded while you were doing that?

I don't recall seeing anything in the logs but as you see from that task output it was my second attempt at this "updated" vs "created" and it had same issues. So don't think it has anything to do with overloaded but more with something going wrong with the _reindex itself. This was on the 3rd index as I did 2 before it with no issues.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.