Repeating key impact on disk usage

Hi,
I am writing to Elasticsearch network devices performance data. Index size per 24 hours is 250227929 documents which sums up to 32 Gb of disk space.
I am looking into possibilities to reduce disco usage.

Documents looks like this:

{
    "took": 3,
    "timed_out": false,
    "_shards": {
        "total": 4,
        "successful": 4,
        "skipped": 0,
        "failed": 0
    },
    "hits": {
        "total": 714,
        "max_score": 16.683826,
        "hits": [
            {
                "_index": "telegraf-interface-2018.06.13",
                "_type": "metrics",
                "_id": "b0gK-WMBvxGYDnrPBB7n",
                "_score": 16.683826,
                "_source": {
                    "@timestamp": "2018-06-13T14:06:12+02:00",
                    "interface": {
                        "ifHCInOctets": 166294586509,
                        "ifHCInUcastPkts": 108997302,
                        "ifHCOutOctets": 77963521118,
                        "ifHCOutUcastPkts": 208360884,
                        "ifHighSpeed": 10000
                    },
                    "measurement_name": "interface",
                    "tag": {
                        "agent_host": "labrouter.local.net",
                        "ifAlias": "Uplink_To_switch",
                        "ifDescr": "TenGigabitEthernet1/50",
                        "platform_tag": "IOS"
                    }
                }
            },
            {
                "_index": "telegraf-interface-2018.06.13",
                "_type": "metrics",
                "_id": "RUoK-WMBvxGYDnrPRlTQ",
                "_score": 16.683826,
                "_source": {
                    "@timestamp": "2018-06-13T14:00:12+02:00",
                    "interface": {
                        "ifHCInOctets": 166294255372,
                        "ifHCInUcastPkts": 108996735,
                        "ifHCOutOctets": 77962950948,
                        "ifHCOutUcastPkts": 208360297,
                        "ifHighSpeed": 10000
                    },
                    "measurement_name": "interface",
                    "tag": {
                        "agent_host": "labrouter.local.net",
                        "ifAlias": "Uplink_To_switch",
                        "ifDescr": "TenGigabitEthernet1/50",
                        "platform_tag": "IOS"
                    }
                }
            },
            }
        ]
    }
}

Interface part is not indexed, tag part is indexed. Same pastern is repeated thousands of times.
So I have few questions.

Are key names actually repeated in all documents and so is consuming disc space? Or Are repeated key names somehow compressed within database?
Does key name length has impact on disk usage. For example if I would shorten key names could I expect documents consume less disk space?

They are repeated, they are compressed.

Probably, but highly likely to be a waste of time for the benefit gained.

Have you tried changing the compression algorithm on the index to best_compression?

I have tried using best_compression and managed to reduce disc usage from ~32Gb to 23Gb. I think it is a good start, but I would like to reduce disk usage even more.

Is there any else I can do to shrink index size?

What's your mapping look like?

{
    "telegraf-interface-2018.06.14": {
        "aliases": {},
        "mappings": {
            "metrics": {
                "_all": {
                    "enabled": false
                },
                "dynamic_templates": [
                    {
                        "tags": {
                            "path_match": "tag.*",
                            "match_mapping_type": "string",
                            "mapping": {
                                "ignore_above": 512,
                                "type": "keyword"
                            }
                        }
                    },
                    {
                        "metrics_long": {
                            "match_mapping_type": "long",
                            "mapping": {
                                "index": false,
                                "type": "float"
                            }
                        }
                    },
                    {
                        "metrics_double": {
                            "match_mapping_type": "double",
                            "mapping": {
                                "index": false,
                                "type": "float"
                            }
                        }
                    },
                    {
                        "text_fields": {
                            "match": "*",
                            "mapping": {
                                "norms": false
                            }
                        }
                    }
                ],
                "properties": {
                    "@timestamp": {
                        "type": "date"
                    },
                    "interface": {
                        "properties": {
                            "ifAlias": {
                                "type": "text",
                                "norms": false
                            },
                            "ifErrCongestedPktsDrops": {
                                "type": "long",
                                "index": false
                            },
                            "ifErrCongestionLimitPktDrops": {
                                "type": "long",
                                "index": false
                            },
                            "ifErrRxNoBuffs": {
                                "type": "long",
                                "index": false
                            },
                            "ifHCInOctets": {
                                "type": "long"
                            },
                            "ifHCInUcastPkts": {
                                "type": "long",
                                "index": false
                            },
                            "ifHCOutOctets": {
                                "type": "long"
                            },
                            "ifHCOutUcastPkts": {
                                "type": "long",
                                "index": false
                            },
                            "ifHighSpeed": {
                                "type": "float",
                                "index": false
                            },
                            "ifInDiscards": {
                                "type": "long",
                                "index": false
                            },
                            "ifInErrors": {
                                "type": "long",
                                "index": false
                            },
                            "ifOutDiscards": {
                                "type": "long",
                                "index": false
                            },
                            "ifOutErrors": {
                                "type": "long",
                                "index": false
                            },
                            "ifTotXoffSent": {
                                "type": "long",
                                "index": false
                            },
                            "ifnicTxStalls": {
                                "type": "long",
                                "index": false
                            }
                        }
                    },
                    "measurement_name": {
                        "type": "keyword"
                    },
                    "tag": {
                        "properties": {
                            "agent_host": {
                                "type": "keyword",
                                "ignore_above": 512
                            },
                            "host": {
                                "type": "keyword",
                                "ignore_above": 512
                            },
                            "ifAlias": {
                                "type": "keyword",
                                "ignore_above": 512
                            },
                            "ifDescr": {
                                "type": "keyword",
                                "ignore_above": 512
                            },
                            "ifIndex": {
                                "type": "keyword",
                                "ignore_above": 512
                            },
                            "measurement_tag": {
                                "type": "keyword",
                                "ignore_above": 512
                            },
                            "platform_tag": {
                                "type": "keyword",
                                "ignore_above": 512
                            }
                        }
                    }
                }
            },
        "_default_": {
            "_all": {
                "enabled": false
            },
            "dynamic_templates": [
                {
                    "tags": {
                        "path_match": "tag.*",
                        "match_mapping_type": "string",
                        "mapping": {
                            "ignore_above": 512,
                            "type": "keyword"
                        }
                    }
                },
                {
                    "metrics_long": {
                        "match_mapping_type": "long",
                        "mapping": {
                            "index": false,
                            "type": "float"
                        }
                    }
                },
                {
                    "metrics_double": {
                        "match_mapping_type": "double",
                        "mapping": {
                            "index": false,
                            "type": "float"
                        }
                    }
                },
                {
                    "text_fields": {
                        "match": "*",
                        "mapping": {
                            "norms": false
                        }
                    }
                }
            ],
            "properties": {
                "@timestamp": {
                    "type": "date"
                },
                "measurement_name": {
                    "type": "keyword"
                }
            }
        }
    },
    "settings": {
        "index": {
            "codec": "best_compression",
            "mapping": {
                "total_fields": {
                    "limit": "5000"
                }
            },
            "refresh_interval": "10s",
            "number_of_shards": "4",
            "translog": {
                "flush_threshold_size": "2048mb"
            },
            "provided_name": "telegraf-interface-2018.06.14",
            "creation_date": "1528934410686",
            "number_of_replicas": "0",
            "uuid": "DUYIcSJdTzGuxDsphtpjew",
            "version": {
                "created": "6010199"
            }
        }
    }
}

}

I don't think there's much more you can do here.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.