Elastic Cross Cluster Replication of Data Stream

Both Cluster are using v8.9.1

Hi, this is a new upgrade from 7.16.3 to 8.9.1 and we just reconfigured CCR. This is our first time using CCR on a data stream. We have been using CCR before on Index (metricbeat-, filebeat-, heartbeat-, winlogbeat-) but we are getting errors on data streams (traces-, metrics-).

  "auto_follow_stats": {
    "number_of_failed_follow_indices": 119561,
    "number_of_failed_remote_cluster_state_requests": 0,
    "number_of_successful_follow_indices": 10,
    "recent_auto_follow_errors": [
      {
        "leader_index": "metrics:.ds-metrics-apm.internal-default-2023.10.31-000002",
        "timestamp": 1698823229449,
        "auto_follow_exception": {
          "type": "exception",
          "reason": "java.lang.IllegalArgumentException: cannot follow backing index [.ds-metrics-apm.internal-default-2023.10.31-000002], because local data stream [metrics-apm.internal-default] is no longer marked as replicated",
          "caused_by": {
            "type": "illegal_argument_exception",
            "reason": "cannot follow backing index [.ds-metrics-apm.internal-default-2023.10.31-000002], because local data stream [metrics-apm.internal-default] is no longer marked as replicated"
          }
        }
      },
      {
        "leader_index": "metrics:.ds-metrics-apm.service_destination.10m-default-2023.11.01-000002",
        "timestamp": 1698823229449,
        "auto_follow_exception": {
          "type": "snapshot_restore_exception",
          "reason": "[_ccr_ELK-PROD2-LN:_latest_/_latest_] cannot restore index [.ds-metrics-apm.service_destination.10m-default-2023.11.01-000002] because an open index with same name already exists in the cluster. Either close or delete the existing index or restore the index under a different name by providing a rename pattern and replacement name"
        }
      },
      {
        "leader_index": "metrics:.ds-metrics-apm.service_summary.10m-default-2023.11.01-000002",
        "timestamp": 1698823229449,
        "auto_follow_exception": {
          "type": "snapshot_restore_exception",
          "reason": "[_ccr_ELK-PROD2-LN:_latest_/_latest_] cannot restore index [.ds-metrics-apm.service_summary.10m-default-2023.11.01-000002] because an open index with same name already exists in the cluster. Either close or delete the existing index or restore the index under a different name by providing a rename pattern and replacement name"
        }
      },
      {
        "leader_index": "metrics:.ds-metrics-apm.service_transaction.10m-default-2023.11.01-000002",
        "timestamp": 1698823229449,
        "auto_follow_exception": {
          "type": "snapshot_restore_exception",
          "reason": "[_ccr_ELK-PROD2-LN:_latest_/_latest_] cannot restore index [.ds-metrics-apm.service_transaction.10m-default-2023.11.01-000002] because an open index with same name already exists in the cluster. Either close or delete the existing index or restore the index under a different name by providing a rename pattern and replacement name"
        }
      },
      {
        "leader_index": "metrics:.ds-metrics-apm.transaction.10m-default-2023.11.01-000002",
        "timestamp": 1698823229449,
        "auto_follow_exception": {
          "type": "snapshot_restore_exception",
          "reason": "[_ccr_ELK-PROD2-LN:_latest_/_latest_] cannot restore index [.ds-metrics-apm.transaction.10m-default-2023.11.01-000002] because an open index with same name already exists in the cluster. Either close or delete the existing index or restore the index under a different name by providing a rename pattern and replacement name"
        }
      },
      {
        "leader_index": "traces:.ds-traces-apm-default-2023.10.26-000002",
        "timestamp": 1698376533004,
        "auto_follow_exception": {
          "type": "exception",
          "reason": "java.lang.IllegalArgumentException: cannot follow backing index [.ds-traces-apm-default-2023.10.26-000002], because local data stream [traces-apm-default] is no longer marked as replicated",
          "caused_by": {
            "type": "illegal_argument_exception",
            "reason": "cannot follow backing index [.ds-traces-apm-default-2023.10.26-000002], because local data stream [traces-apm-default] is no longer marked as replicated"
          }
        }
      },
      {
        "leader_index": "traces:.ds-traces-apm-default-2023.10.31-000003",
        "timestamp": 1698823229449,
        "auto_follow_exception": {
          "type": "exception",
          "reason": "java.lang.IllegalArgumentException: cannot follow backing index [.ds-traces-apm-default-2023.10.31-000003], because local data stream [traces-apm-default] is no longer marked as replicated",
          "caused_by": {
            "type": "illegal_argument_exception",
            "reason": "cannot follow backing index [.ds-traces-apm-default-2023.10.31-000003], because local data stream [traces-apm-default] is no longer marked as replicated"
          }
        }
      }
    ],
    "auto_followed_clusters": [
      {
        "cluster_name": "ELK-PROD2-LN",
        "time_since_last_check_millis": 3401,
        "last_seen_metadata_version": 357732
      }
    ]
  },
  "follow_stats": {
    "indices": [
      {
        "index": "filebeat-all-000039",
        "shards": [
          {
            "remote_cluster": "ELK-PROD2-LN",
            "leader_index": "filebeat-all-000039",
            "follower_index": "filebeat-all-000039",
            "shard_id": 0,
            "leader_global_checkpoint": 37388,
            "leader_max_seq_no": 37388,
            "follower_global_checkpoint": 37388,
            "follower_max_seq_no": 37388,
            "last_requested_seq_no": 37388,
            "outstanding_read_requests": 1,
            "outstanding_write_requests": 0,
            "write_buffer_operation_count": 0,
            "write_buffer_size_in_bytes": 0,
            "follower_mapping_version": 7,
            "follower_settings_version": 2,
            "follower_aliases_version": 2,
            "total_read_time_millis": 108016853,
            "total_read_remote_exec_time_millis": 107872915,
            "successful_read_requests": 28849,
            "failed_read_requests": 0,
            "operations_read": 37389,
            "bytes_read": 105984042,
            "total_write_time_millis": 161282,
            "successful_write_requests": 28849,
            "failed_write_requests": 0,
            "operations_written": 37389,
            "read_exceptions": [],
            "time_since_last_read_millis": 3099
          }
        ]
      },
      {
        "index": "heartbeat-all-000038",
        "shards": [
          {
            "remote_cluster": "ELK-PROD2-LN",
            "leader_index": "heartbeat-all-000038",
            "follower_index": "heartbeat-all-000038",
            "shard_id": 0,
            "leader_global_checkpoint": 267800,
            "leader_max_seq_no": 267800,
            "follower_global_checkpoint": 267800,
            "follower_max_seq_no": 267800,
            "last_requested_seq_no": 267800,
            "outstanding_read_requests": 1,
            "outstanding_write_requests": 0,
            "write_buffer_operation_count": 0,
            "write_buffer_size_in_bytes": 0,
            "follower_mapping_version": 2,
            "follower_settings_version": 2,
            "follower_aliases_version": 2,
            "total_read_time_millis": 108082829,
            "total_read_remote_exec_time_millis": 107665584,
            "successful_read_requests": 90314,
            "failed_read_requests": 0,
            "operations_read": 267801,
            "bytes_read": 464193592,
            "total_write_time_millis": 903446,
            "successful_write_requests": 90289,
            "failed_write_requests": 0,
            "operations_written": 267801,
            "read_exceptions": [],
            "time_since_last_read_millis": 261
          }
        ]
      },
      {
        "index": "metricbeat-all-000666",
        "shards": [
          {
            "remote_cluster": "ELK-PROD2-LN",
            "leader_index": "metricbeat-all-000666",
            "follower_index": "metricbeat-all-000666",
            "shard_id": 0,
            "leader_global_checkpoint": 57780752,
            "leader_max_seq_no": 57780802,
            "follower_global_checkpoint": 57780752,
            "follower_max_seq_no": 57780752,
            "last_requested_seq_no": 57780752,
            "outstanding_read_requests": 1,
            "outstanding_write_requests": 0,
            "write_buffer_operation_count": 0,
            "write_buffer_size_in_bytes": 0,
            "follower_mapping_version": 15,
            "follower_settings_version": 2,
            "follower_aliases_version": 2,
            "total_read_time_millis": 43524157,
            "total_read_remote_exec_time_millis": 39235857,
            "successful_read_requests": 440310,
            "failed_read_requests": 0,
            "operations_read": 57780753,
            "bytes_read": 103986094824,
            "total_write_time_millis": 17372081,
            "successful_write_requests": 439898,
            "failed_write_requests": 0,
            "operations_written": 57780753,
            "read_exceptions": [],
            "time_since_last_read_millis": 33
          }
        ]
      },
      {
        "index": "winlogbeat-all-000001",
        "shards": [
          {
            "remote_cluster": "ELK-PROD2-LN",
            "leader_index": "winlogbeat-all-000001",
            "follower_index": "winlogbeat-all-000001",
            "shard_id": 0,
            "leader_global_checkpoint": 132085,
            "leader_max_seq_no": 132085,
            "follower_global_checkpoint": 132085,
            "follower_max_seq_no": 132085,
            "last_requested_seq_no": 132085,
            "outstanding_read_requests": 1,
            "outstanding_write_requests": 0,
            "write_buffer_operation_count": 0,
            "write_buffer_size_in_bytes": 0,
            "follower_mapping_version": 32,
            "follower_settings_version": 2,
            "follower_aliases_version": 1,
            "total_read_time_millis": 642184747,
            "total_read_remote_exec_time_millis": 641919345,
            "successful_read_requests": 52247,
            "failed_read_requests": 0,
            "operations_read": 132086,
            "bytes_read": 192964177,
            "total_write_time_millis": 539394,
            "successful_write_requests": 52247,
            "failed_write_requests": 0,
            "operations_written": 132086,
            "read_exceptions": [],
            "time_since_last_read_millis": 8235
          }
        ]
      }
    ]
  }
}

GET _data_stream/traces-apm-default

{
  "data_streams": [
    {
      "name": "traces-apm-default",
      "timestamp_field": {
        "name": "@timestamp"
      },
      "indices": [
        {
          "index_name": ".ds-traces-apm-default-2023.10.18-000001",
          "index_uuid": "yj9t_c7OTLq6iERtcF3Slw"
        },
        {
          "index_name": ".ds-traces-apm-default-2023.10.26-000002",
          "index_uuid": "lBdUuZg_TNyjUyuzQ9wlyQ"
        }
      ],
      "generation": 2,
      "_meta": {
        "package": {
          "name": "apm"
        },
        "managed_by": "fleet",
        "managed": true
      },
      "status": "GREEN",
      "template": "traces-apm",
      "ilm_policy": "traces-apm.traces-default_policy",
      "hidden": false,
      "system": false,
      "allow_custom_routing": false,
      "replicated": false
    }
  ]
}

Looks like the solution for us is to delete the data stream and do a rollover. So snapshot restore and ccr will not work for data stream. It should be ccr, rollover and snapshot restore.

Another option that worked for us is if you have done the snapshot restore first, the auto follow pattern should be different by adding something in the suffix.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.