Hi, I have old index files from Elasticsearch 1.7.6. The version of lucene utilized is 4.10.4, I was able to get lucene to open a directory that represents one of the three shards of my index. I can pull the documents from this directory. However, the index contains compound fields (meaning properties with properties). I am not sure if these are nested, flattened or how ES stores them, but the are absent from the documents when read using lucene.
I understand that these properties are likely stored in sub-documents, but that they should exist in the same shard. How would I find these sub-documents? When I print all documents in the index, they are all "parent" documents, and have the properties defined below such as "action", "addr", but are missing "location", "user" etc.
My pylucene code is like this:
def import_doc(doc):
print(doc)
def import_index(path):
index = FSDirectory.open(File(path))
reader = DirectoryReader.open(index)
for i in range(reader.maxDoc()):
doc = reader.document(i)
doc = {
f.name(): f.stringValue()
for f in doc.getFields()
}
import_doc(doc)
My index definition is like this:
{
"audit-01-31-2021" : {
"aliases" : {
"audit" : { }
},
"mappings" : {
"activity" : {
"_all" : {
"enabled" : true
},
"_routing" : {
"required" : true,
"path" : "site_id"
},
"properties" : {
"action" : {
"type" : "string",
"store" : true
},
"addr" : {
"type" : "string"
},
"connection_type" : {
"type" : "string",
"store" : true
},
"isdir" : {
"type" : "boolean",
"store" : true
},
"location" : {
"properties" : {
"country" : {
"type" : "string"
},
"ip" : {
"type" : "string"
}
}
},
"message" : {
"properties" : {
"id" : {
"type" : "long"
},
"name" : {
"type" : "string"
}
}
},
"result" : {
"type" : "string",
"store" : true
},
"rule" : {
"properties" : {
"id" : {
"type" : "long"
},
"name" : {
"type" : "string"
}
}
},
"seq_id" : {
"type" : "integer",
"store" : true
},
"signature" : {
"properties" : {
"id" : {
"type" : "long"
},
"name" : {
"type" : "string"
}
}
},
"site_id" : {
"type" : "long",
"store" : true
},
"size" : {
"type" : "long",
"store" : true
},
"target" : {
"properties" : {
"id" : {
"type" : "long"
},
"name" : {
"type" : "string"
},
"type" : {
"type" : "string"
}
}
},
"timestamp" : {
"type" : "date",
"store" : true,
"format" : "dateOptionalTime"
},
"type" : {
"type" : "string",
"store" : true
},
"uid" : {
"type" : "string",
"store" : true
},
"user" : {
"properties" : {
"id" : {
"type" : "long"
},
"name" : {
"type" : "string"
},
"username" : {
"type" : "string",
"analyzer" : "username_analyzer"
}
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1612069201197",
"analysis" : {
"filter" : {
"username_filter" : {
"type" : "word_delimiter",
"type_table" : [ "@ => ALPHA", ". => ALPHA", "# => ALPHA" ]
}
},
"analyzer" : {
"username_analyzer" : {
"filter" : [ "lowercase", "username_filter" ],
"type" : "custom",
"tokenizer" : "whitespace"
}
}
},
"number_of_shards" : "3",
"uuid" : "cSJzoUWgQFa-GLzb09hn3A",
"version" : {
"created" : "1070699"
},
"number_of_replicas" : "1"
}
},
"warmers" : { }
}
}