Getting version conflict even while updating sequentially using update_by_query

I am trying to update a document's nested type field using painless script by _update_by_query. The script is executing sequentially that means the next _update_by_query is run only when the previous one has successfully returned. But I am still getting version conflict when the two queries try to update the nested field by painless script. Also there is no other process updating the elastic search.

Following is the first _update_by_query which successfully return

{
  "index": [
    "user_39"
  ],
  "body": {
    "script": {
      "source": "boolean done = false;
for (int i = 0; i < ctx._source.user.length; ++i){
if(ctx._source.user[i].id == params.id){
ctx._source.user[i] = params.obj; 
done=true;
}
}
if(done==false){
ctx._source.user.add(params.obj)
}",
      "params": {
        "id": 7018947,
        "obj": {
          "id": 7018947,
          "user_id": 3750511,
          "name": "User_1",
          "created_at": "2019-07-03T06:32:02.000Z",
          "updated_at": "2019-07-03T06:32:02.000Z"
        }
      },
      "lang": "painless"
    },
    "query": {
      "terms": {
        "_id": [
          232323
        ]
      }
    }
  }
}

Following is the next _update_by_query which throws version conflict. Note that both the queries are trying to update the same document and same nested field array but different element and are running one after other.

{
      "index": [
        "user_39"
      ],
      "body": {
        "script": {
          "source": "boolean done = false;
    for (int i = 0; i < ctx._source.user.length; ++i){
    if(ctx._source.user[i].id == params.id){
    ctx._source.user[i] = params.obj; 
    done=true;
    }
    }
    if(done==false){
    ctx._source.user.add(params.obj)
    }",
          "params": {
            "id": 7018949,
            "obj": {
              "id": 7018949,
              "user_id": 3750511,
              "name": "User_2",
              "created_at": "2019-07-03T06:32:02.000Z",
              "updated_at": "2019-07-03T06:32:02.000Z"
            }
          },
          "lang": "painless"
        },
        "query": {
          "terms": {
            "_id": [
              232323
            ]
          }
        }
      }
    }

Here is the error object thrown by second query

"body": {
            "took": 7,
            "timed_out": false,
            "total": 1,
            "updated": 0,
            "deleted": 0,
            "batches": 1,
            "version_conflicts": 1,
            "noops": 0,
            "retries": {
                "bulk": 0,
                "search": 0
            },
            "throttled_millis": 0,
            "requests_per_second": -1,
            "throttled_until_millis": 0,
            "failures": [
                {
                    "index": "user_39",
                    "type": "_doc",
                    "id": "232323",
                    "cause": {
                        "type": "version_conflict_engine_exception",
                        "reason": "[232323]: version conflict, required seqNo [1394834], primary term [1]. current document has seqNo [1394835] and primary term [1]",
                        "index_uuid": "SDOxthUJRw2h8vryIoZxqQ",
                        "shard": "0",
                        "index": "user_39"
                    },
                    "status": 409
                }
            ]
        },

I also saw the pattern that only nested field updates are throwing conflict. Other non nested field updates with painless and update_by_query are running without conflict one after other. Anyone have any idea?

1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.