Hi everyone!
I upgraded our servers to elastic 7.9 recently, but have been unable to index our full dataset ever since then. In fact, every morning at exactly 4AM, all data gets deleted from that, and only that, index.
The logs make absolutely no mention of this even happening:
[2020-08-26T01:48:01,618][INFO ][o.e.x.m.MlDailyMaintenanceService] [elastic01] triggering scheduled [ML] maintenance tasks
[2020-08-26T01:48:01,618][INFO ][o.e.x.m.a.TransportDeleteExpiredDataAction] [elastic01] Deleting expired data
[2020-08-26T01:48:01,618][INFO ][o.e.x.m.a.TransportDeleteExpiredDataAction] [elastic01] Completed deletion of expired ML data
[2020-08-26T01:48:01,618][INFO ][o.e.x.m.MlDailyMaintenanceService] [elastic01] Successfully completed [ML] maintenance tasks
[2020-08-26T02:00:01,926][INFO ][o.e.c.m.MetadataCreateIndexService] [elastic01] [.monitoring-es-7-2020.08.26] creating index, cause [auto(bulk api)], templates [.monitoring-es], shards [1]/[0]
[2020-08-26T02:00:01,926][INFO ][o.e.c.r.a.AllocationService] [elastic01] updating number_of_replicas to [1] for indices [.monitoring-es-7-2020.08.26]
[2020-08-26T02:00:05,213][INFO ][o.e.c.m.MetadataCreateIndexService] [elastic01] [.monitoring-kibana-7-2020.08.26] creating index, cause [auto(bulk api)], templates [.monitoring-kibana], shards [1]/[0]
[2020-08-26T02:00:05,213][INFO ][o.e.c.r.a.AllocationService] [elastic01] updating number_of_replicas to [1] for indices [.monitoring-kibana-7-2020.08.26]
[2020-08-26T03:00:01,620][INFO ][o.e.x.m.e.l.LocalExporter] [elastic01] cleaning up [2] old indices
[2020-08-26T03:00:01,620][INFO ][o.e.c.m.MetadataDeleteIndexService] [elastic01] [.monitoring-kibana-7-2020.08.19/dH420jbvReqZM2IsQJPMtQ] deleting index
[2020-08-26T03:00:01,620][INFO ][o.e.c.m.MetadataDeleteIndexService] [elastic01] [.monitoring-es-7-2020.08.19/ofujZGO-QdyRRGtolkAmng] deleting index
[2020-08-26T03:30:01,611][INFO ][o.e.x.s.SnapshotRetentionTask] [elastic01] starting SLM retention snapshot cleanup task
[2020-08-26T03:30:01,611][INFO ][o.e.x.s.SnapshotRetentionTask] [elastic01] there are no repositories to fetch, SLM retention snapshot cleanup task complete
The index settings are nothing too special either:
{
"settings": {
"index": {
"routing": {
"allocation": {
"include": {
"purpose": "search"
}
}
},
"refresh_interval": "240s",
"number_of_shards": "6",
"blocks": {
"read_only_allow_delete": "false"
},
"provided_name": "asset_210820",
"max_result_window": "1000000",
"creation_date": "1597991656927",
"analysis": {
...
},
"number_of_replicas": "0",
"uuid": "P3ez0isyQaKcXYBf2nQb7g",
"version": {
"created": "7080099",
"upgraded": "7090099"
}
}
},
"defaults": {
"index": {
"flush_after_merge": "512mb",
"final_pipeline": "_none",
"max_inner_result_window": "100",
"unassigned": {
"node_left": {
"delayed_timeout": "1m"
}
},
"max_terms_count": "65536",
"lifecycle": {
"name": "",
"parse_origination_date": "false",
"indexing_complete": "false",
"rollover_alias": "",
"origination_date": "-1"
},
"routing_partition_size": "1",
"force_memory_term_dictionary": "false",
"max_docvalue_fields_search": "100",
"merge": {
"scheduler": {
"max_thread_count": "2",
"auto_throttle": "true",
"max_merge_count": "7"
},
"policy": {
"reclaim_deletes_weight": "2.0",
"floor_segment": "2mb",
"max_merge_at_once_explicit": "30",
"max_merge_at_once": "10",
"max_merged_segment": "5gb",
"expunge_deletes_allowed": "10.0",
"segments_per_tier": "10.0",
"deletes_pct_allowed": "33.0"
}
},
"max_refresh_listeners": "1000",
"max_regex_length": "1000",
"load_fixed_bitset_filters_eagerly": "true",
"number_of_routing_shards": "1",
"write": {
"wait_for_active_shards": "1"
},
"verified_before_close": "false",
"mapping": {
"coerce": "false",
"nested_fields": {
"limit": "50"
},
"depth": {
"limit": "20"
},
"field_name_length": {
"limit": "9223372036854775807"
},
"total_fields": {
"limit": "1000"
},
"nested_objects": {
"limit": "10000"
},
"ignore_malformed": "false"
},
"source_only": "false",
"soft_deletes": {
"enabled": "false",
"retention": {
"operations": "0"
},
"retention_lease": {
"period": "12h"
}
},
"max_script_fields": "32",
"query": {
"default_field": [
"*"
],
"parse": {
"allow_unmapped_fields": "true"
}
},
"format": "0",
"frozen": "false",
"sort": {
"missing": [],
"mode": [],
"field": [],
"order": []
},
"priority": "1",
"codec": "default",
"max_rescore_window": "10000",
"max_adjacency_matrix_filters": "100",
"analyze": {
"max_token_count": "10000"
},
"gc_deletes": "60s",
"top_metrics_max_size": "10",
"optimize_auto_generated_id": "true",
"max_ngram_diff": "1",
"hidden": "false",
"translog": {
"generation_threshold_size": "64mb",
"flush_threshold_size": "512mb",
"sync_interval": "5s",
"retention": {
"size": "512MB",
"age": "12h"
},
"durability": "REQUEST"
},
"auto_expand_replicas": "false",
"mapper": {
"dynamic": "true"
},
"recovery": {
"type": ""
},
"requests": {
"cache": {
"enable": "true"
}
},
"data_path": "",
"highlight": {
"max_analyzed_offset": "1000000"
},
"routing": {
"rebalance": {
"enable": "all"
},
"allocation": {
"enable": "all",
"total_shards_per_node": "-1"
}
},
"search": {
"slowlog": {
"level": "TRACE",
"threshold": {
"fetch": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
},
"query": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
}
},
"idle": {
"after": "30s"
},
"throttled": "false"
},
"fielddata": {
"cache": "node"
},
"default_pipeline": "_none",
"max_slices_per_scroll": "1024",
"shard": {
"check_on_startup": "false"
},
"xpack": {
"watcher": {
"template": {
"version": ""
}
},
"version": "",
"ccr": {
"following_index": "false"
}
},
"percolator": {
"map_unmapped_fields_as_text": "false"
},
"allocation": {
"max_retries": "5",
"existing_shards_allocator": "gateway_allocator"
},
"indexing": {
"slowlog": {
"reformat": "true",
"threshold": {
"index": {
"warn": "-1",
"trace": "-1",
"debug": "-1",
"info": "-1"
}
},
"source": "1000",
"level": "TRACE"
}
},
"compound_format": "0.1",
"blocks": {
"metadata": "false",
"read": "false",
"read_only": "false",
"write": "false"
},
"store": {
"stats_refresh_interval": "10s",
"type": "",
"fs": {
"fs_lock": "native"
},
"preload": []
},
"queries": {
"cache": {
"enabled": "true"
}
},
"warmer": {
"enabled": "true"
},
"max_shingle_diff": "3",
"query_string": {
"lenient": "false"
}
}
}
}
I am absolutely stumped about where to even start looking. The cluster is a three node cluster, the index has 6 shards and, for indexing purposes, no replicas. In addition, since I read about a sort of attack that deletes your data, the cluster is only available internally, and we're a relatively small company, so there's no chance of someone else fiddling with it.
Since this is still a development setup, I could just do a fresh setup, but I'd rather have an idea what this is before I do so.
Best regards,
Stefano