Cluster State RED after opening a closed index

HI, I have a managed cluster hosted by elastio.co. Here is the configuration
|Platform => Amazon Web Services| |Region => ***| |Memory => 4 GB|
|Storage => 96 GB| |SSD => Yes | |High availability => Yes 2 data center|

Each index in this cluster contain log data of exactly one day. Average index size is 15 mb and average doc count is 15000. The cluster is not in any way under any kind of pressure (JVM, Indexing & Searching time, Disk Space all are in very comfort zone)

When I opened a previously closed index the cluster is turned RED. Here are some matrices I found querying the elasticsearch.

GET /_cluster/allocation/explain
{
  "index": "some_index_name",    # 1 Primary shard , 1 replica shard 
  "shard": 0,
  "primary": true
}

Response :

"unassigned_info": {
"reason": "ALLOCATION_FAILED"
"failed_allocation_attempts": 3,
"details": "failed recovery, failure RecoveryFailedException[[some_index_name][0]: Recovery failed on {instance-*****}{Hash}{HASH}{IP}{IP}{logical_availability_zone=zone-1, availability_zone=***, region=***}]; nested: IndexShardRecoveryException[failed to fetch index version after copying it over]; nested: IndexShardRecoveryException[shard allocated for local recovery (post api), should exist, but doesn't, current files: []]; nested: IndexNotFoundException[no segments* file found in store(mmapfs(/app/data/nodes/0/indices/MFIFAQO2R_ywstzqrfbY4w/0/index)): files: []]; ",
"last_allocation_status": "no_valid_shard_copy"
}, 
"can_allocate": "no_valid_shard_copy",
"allocate_explanation": "cannot allocate because all found copies of the shard are either stale or corrupt",
"node_allocation_decisions": [
  {
    "node_name": "instance-***",
    "node_decision": "no",
    "store": {
      "in_sync": false,
      "allocation_id": "RANDOM_HASH",
      "store_exception": {
        "type": "index_not_found_exception",
        "reason": "no segments* file found in SimpleFSDirectory@/app/data/nodes/0/indices/RANDOM_HASH/0/index lockFactory=org.apache.lucene.store.NativeFSLockFactory@346e1b99: files: []"
      }
  }
},
{
  "node_name": "instance-***",
  "node_attributes": {
    "logical_availability_zone": "zone-0",
  },
  "node_decision": "no",
  "store": {
    "found": false
  }
}

I've tried rerouting the shards to a node. Even setting data loss flag to true.

POST _cluster/reroute
{
  "commands" : [
  {"allocate_stale_primary" : {
  "index" : "some_index_name", "shard" : 0,
  "node" : "instance-***",
  "accept_data_loss" : true
    }
  }
  ]
}

Response:

"acknowledged": true,
"state": {
"version": 338190,
"state_uuid": "RANDOM_HASH",
"master_node": "RANDOM_HASH",
"blocks": {
  "indices": {
    "restored_**: {
      "4": {
        "description": "index closed",
        "retryable": false,
        "levels": [
          "read",
          "write"
        ]
      }
    },
    "restored_**": {
      "4": {
        "description": "index closed",
        "retryable": false,
        "levels": [
          "read",
          "write"
        ]
      }
    }
  }
},
"routing_table": {
  "indices": {
    "SOME_INDEX_NAME": {
      "shards": {
        "0": [
          {
            "state": "INITIALIZING",
            "primary": true,
            "relocating_node": null,
            "shard": 0,
            "index": "SOME_INDEX_NAME",
            "recovery_source": {
              "type": "EXISTING_STORE"
            },
            "allocation_id": {
              "id": "HASH"
            },
            "unassigned_info": {
              "reason": "ALLOCATION_FAILED",
              "failed_attempts": 4,
              "delayed": false,
              "details": "same as explanation above ^ ",
              "allocation_status": "no_valid_shard_copy"
            }
          },
          {
            "state": "UNASSIGNED",
            "primary": false,
            "node": null,
            "relocating_node": null,
            "shard": 0,
            "index": "some_index_name",
            "recovery_source": {
              "type": "PEER"
            },
            "unassigned_info": {
              "reason": "INDEX_REOPENED",
              "delayed": false,
              "allocation_status": "no_attempt"
            }
          }
        ]
      }
    },

Any kind of suggestion is welcomed. Thanks and regards.

Can you provide the full output of the cluster stats API?

The total cluster state is almost 120000 line long. Do you want me to give the complete output here? Or I just truncate some unnecessary portions?

I am not asking for cluster state, but cluster stats.

@Christian_Dahlqvist My mistake. Here is the output.

{
"_nodes": {
    "total": 3,
    "successful": 3,
    "failed": 0
},
"cluster_name": "HASH",
"timestamp": 1519757464182,
"status": "red",
"indices": {
    "count": 676,
    "shards": {
        "total": 1912,
        "primaries": 956,
        "replication": 1,
        "index": {
            "shards": {
                "min": 2,
                "max": 10,
                "avg": 2.828402366863905
            },
            "primaries": {
                "min": 1,
                "max": 5,
                "avg": 1.4142011834319526
            },
            "replication": {
                "min": 1,
                "max": 1,
                "avg": 1
            }
        }
    },
    "docs": {
        "count": 74040398,
        "deleted": 2
    },
    "store": {
        "size": "15.4gb",
        "size_in_bytes": 16571645454,
        "throttle_time": "0s",
        "throttle_time_in_millis": 0
    },
    "fielddata": {
        "memory_size": "0b",
        "memory_size_in_bytes": 0,
        "evictions": 0
    },
    "query_cache": {
        "memory_size": "0b",
        "memory_size_in_bytes": 0,
        "total_count": 0,
        "hit_count": 0,
        "miss_count": 0,
        "cache_size": 0,
        "cache_count": 0,
        "evictions": 0
    },
    "completion": {
        "size": "0b",
        "size_in_bytes": 0
    },
    "segments": {
        "count": 9740,
        "memory": "145.6mb",
        "memory_in_bytes": 152751637,
        "terms_memory": "135.1mb",
        "terms_memory_in_bytes": 141664311,
        "stored_fields_memory": "7.7mb",
        "stored_fields_memory_in_bytes": 8142584,
        "term_vectors_memory": "0b",
        "term_vectors_memory_in_bytes": 0,
        "norms_memory": "1.2mb",
        "norms_memory_in_bytes": 1359872,
        "points_memory": "70.9kb",
        "points_memory_in_bytes": 72614,
        "doc_values_memory": "1.4mb",
        "doc_values_memory_in_bytes": 1512256,
        "index_writer_memory": "1mb",
        "index_writer_memory_in_bytes": 1127528,
        "version_map_memory": "389b",
        "version_map_memory_in_bytes": 389,
        "fixed_bit_set": "26.2kb",
        "fixed_bit_set_memory_in_bytes": 26832,
        "max_unsafe_auto_id_timestamp": 9223372036854776000,
        "file_sizes": {}
    }
},
"nodes": {
    "count": {
        "total": 3,
        "data": 2,
        "coordinating_only": 0,
        "master": 3,
        "ingest": 3
    },
    "versions": [
        "5.6.7"
    ],
    "os": {
        "available_processors": 68,
        "allocated_processors": 6,
        "names": [
            {
                "name": "Linux",
                "count": 3
            }
        ],
        "mem": {
            "total": "510.2gb",
            "total_in_bytes": 547907768320,
            "free": "4.5gb",
            "free_in_bytes": 4858716160,
            "used": "505.7gb",
            "used_in_bytes": 543049052160,
            "free_percent": 1,
            "used_percent": 99
        }
    },
    "process": {
        "cpu": {
            "percent": 5
        },
        "open_file_descriptors": {
            "min": 266,
            "max": 3307,
            "avg": 2005
        }
    },
    "jvm": {
        "max_uptime": "3.7h",
        "max_uptime_in_millis": 13414070,
        "versions": [
            {
                "version": "1.8.0_144",
                "vm_name": "Java HotSpot(TM) 64-Bit Server VM",
                "vm_version": "25.144-b01",
                "vm_vendor": "Oracle Corporation",
                "count": 3
            }
        ],
        "mem": {
            "heap_used": "2.3gb",
            "heap_used_in_bytes": 2520521952,
            "heap_max": "4.5gb",
            "heap_max_in_bytes": 4886495232
        },
        "threads": 153
    },
    "fs": {
        "total": "234gb",
        "total_in_bytes": 251255586816,
        "free": "213.9gb",
        "free_in_bytes": 229705080832,
        "available": "213.9gb",
        "available_in_bytes": 229705080832
    },
    "plugins": [
        {
            "name": "repository-s3",
            "version": "5.6.7",
            "description": "The S3 repository plugin adds S3 repositories",
            "classname": "org.elasticsearch.repositories.s3.S3RepositoryPlugin",
            "has_native_controller": false
        },
        {
            "name": "x-pack",
            "version": "5.6.7",
            "description": "Elasticsearch Expanded Pack Plugin",
            "classname": "org.elasticsearch.xpack.XPackPlugin",
            "has_native_controller": true
        },
        {
            "name": "found-elasticsearch",
            "version": "5.6.7",
            "description": "Elasticsearch plugin for Found",
            "classname": "org.elasticsearch.plugin.found.FoundPlugin",
            "has_native_controller": false
        }
    ],
    "network_types": {
        "transport_types": {},
        "http_types": {}
    }
  }
}

It looks like you have far too many shards for a cluster that size given how much data you have. Have a look at this blog post for some guidance.

Hello, @Christian_Dahlqvist I've removes some indices from ES (of course after snapshotting them) this is the current cluster state. At this state when I open a closed index, the cluster is turned RED again.

{
"_nodes": {
    "total": 3,
    "successful": 3,
    "failed": 0
},
"cluster_name": "cd06806eaf41c8e6cd9d137156ab3db9",
"timestamp": 1519796442764,
"status": "red",
"indices": {
    "count": 17,
    "shards": {
        "total": 82,
        "primaries": 41,
        "replication": 1,
        "index": {
            "shards": {
                "min": 2,
                "max": 10,
                "avg": 4.823529411764706
            },
            "primaries": {
                "min": 1,
                "max": 5,
                "avg": 2.411764705882353
            },
            "replication": {
                "min": 1,
                "max": 1,
                "avg": 1
            }
        }
    },
    "docs": {
        "count": 21886858,
        "deleted": 2
    },
    "store": {
        "size": "3.9gb",
        "size_in_bytes": 4231512086,
        "throttle_time": "0s",
        "throttle_time_in_millis": 0
    },
    "fielddata": {
        "memory_size": "0b",
        "memory_size_in_bytes": 0,
        "evictions": 0
    },
    "query_cache": {
        "memory_size": "0b",
        "memory_size_in_bytes": 0,
        "total_count": 0,
        "hit_count": 0,
        "miss_count": 0,
        "cache_size": 0,
        "cache_count": 0,
        "evictions": 0
    },
    "completion": {
        "size": "0b",
        "size_in_bytes": 0
    },
    "segments": {
        "count": 420,
        "memory": "19.3mb",
        "memory_in_bytes": 20267409,
        "terms_memory": "16.4mb",
        "terms_memory_in_bytes": 17216083,
        "stored_fields_memory": "2.6mb",
        "stored_fields_memory_in_bytes": 2731224,
        "term_vectors_memory": "0b",
        "term_vectors_memory_in_bytes": 0,
        "norms_memory": "60.2kb",
        "norms_memory_in_bytes": 61696,
        "points_memory": "11.2kb",
        "points_memory_in_bytes": 11558,
        "doc_values_memory": "241kb",
        "doc_values_memory_in_bytes": 246848,
        "index_writer_memory": "0b",
        "index_writer_memory_in_bytes": 0,
        "version_map_memory": "0b",
        "version_map_memory_in_bytes": 0,
        "fixed_bit_set": "0b",
        "fixed_bit_set_memory_in_bytes": 0,
        "max_unsafe_auto_id_timestamp": 9223372036854776000,
        "file_sizes": {}
    }
},
"nodes": {
    "count": {
        "total": 3,
        "data": 2,
        "coordinating_only": 0,
        "master": 3,
        "ingest": 3
    },
    "versions": [
        "5.6.7"
    ],
    "os": {
        "available_processors": 68,
        "allocated_processors": 6,
        "names": [
            {
                "name": "Linux",
                "count": 3
            }
        ],
        "mem": {
            "total": "510.2gb",
            "total_in_bytes": 547907768320,
            "free": "17.6gb",
            "free_in_bytes": 18947035136,
            "used": "492.6gb",
            "used_in_bytes": 528960733184,
            "free_percent": 3,
            "used_percent": 97
        }
    },
    "process": {
        "cpu": {
            "percent": 0
        },
        "open_file_descriptors": {
            "min": 266,
            "max": 587,
            "avg": 475
        }
    },
    "jvm": {
        "max_uptime": "14.5h",
        "max_uptime_in_millis": 52393050,
        "versions": [
            {
                "version": "1.8.0_144",
                "vm_name": "Java HotSpot(TM) 64-Bit Server VM",
                "vm_version": "25.144-b01",
                "vm_vendor": "Oracle Corporation",
                "count": 3
            }
        ],
        "mem": {
            "heap_used": "2.8gb",
            "heap_used_in_bytes": 3074184232,
            "heap_max": "4.5gb",
            "heap_max_in_bytes": 4886495232
        },
        "threads": 154
    },
    "fs": {
        "total": "234gb",
        "total_in_bytes": 251255586816,
        "free": "225.7gb",
        "free_in_bytes": 242430738432,
        "available": "225.7gb",
        "available_in_bytes": 242430738432
    },
    "plugins": [
        {
            "name": "repository-s3",
            "version": "5.6.7",
            "description": "The S3 repository plugin adds S3 repositories",
            "classname": "org.elasticsearch.repositories.s3.S3RepositoryPlugin",
            "has_native_controller": false
        },
        {
            "name": "x-pack",
            "version": "5.6.7",
            "description": "Elasticsearch Expanded Pack Plugin",
            "classname": "org.elasticsearch.xpack.XPackPlugin",
            "has_native_controller": true
        },
        {
            "name": "found-elasticsearch",
            "version": "5.6.7",
            "description": "Elasticsearch plugin for Found",
            "classname": "org.elasticsearch.plugin.found.FoundPlugin",
            "has_native_controller": false
        }
    ],
    "network_types": {
        "transport_types": {},
        "http_types": {}
    }
}
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.