Hi,
New to the stuck...
I have a relatively nested JSON flowing into elastic.
I'm using kibana for visualisation and reports.
Some of the fields are number by originating system and nature, but as they converted and flow to elastic, the mapping is created as fields of type text, so can not use them for number types aggregations such as sum.
For what I have seen so far, I have the following options:
- Changing the mapping so field will have the proper type (i.e float) - but, as mapping create automatically (data in the specific segment is dynamic), so changing the mapping is an issue...
- Create a scripted field for the index pattern, but I understand this has more resource and performance impact (yet tried it - see the issue in the following).
- Create a pipline with a convert processor, this seems to be the best options for my need and current understanding - the data is flowing from an app using Nest and can just creat a config to index using the pipline.
Here is a "simple" doc:
{
"APP_INSTS" : {
"APP_INST_ID" : "819D000C293E80D911AA2D2F1CCFBF28",
"APP_INST_NAME" : "Expense Reimbursement App_Update Expense Reimbursement-2020-01-02T09:11:43:305",
"VERSION" : "1.01",
"STATUS" : "Completed",
"LAST_MODIFIED_DATE" : "2020-01-02T09:11:43.573",
"APPL_NAME" : "Expense Reimbursement App",
"APP_INITIATOR" : "someuser",
"APP_INITIATOR_LOC" : "?"
},
"APP_AND_FORM_ATTRS" : {
"?xml" : {
"@version" : "1.0",
"@encoding" : "utf-8"
},
"ArrayOfNameValue" : {
"@xmlns:xsd" : "http://www.w3.org/2001/XMLSchema",
"@xmlns:xsi" : "http://www.w3.org/2001/XMLSchema-instance",
"NameValue" : [
{
"Name" : "FormSubmittedXPaths",
"Value" : {
"@xsi:type" : "xsd:string",
"#text" : """[{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:ReportDate__u}","Value":"2019-09-16T17:28:15.000Z"},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:EmployeeID__u}","Value":"Test"},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:ExpenseStatus__u}","Value":"Pending"},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:FullName__u}","Value":"Test"},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:ExpenseDate__u}","Value":"2019-09-16T17:28:00.000Z"},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:ExpenseType__u}","Value":"Food"},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:ExpenseDescription__u}","Value":"Lunch"},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:Amount__u}","Value":"7"},{"Name":"{/pd:AP/pd:formFields/pd:Receipt_MultiFile~}","Value":{"Receipt_MultiFile":{"Receipt":[{"fileName":"","fileId":"","itemId":"","url":"","size":"","uploadedBy":"","encodedURL":""}]}}},{"Name":"{/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:ReceiptFileName__u}","Value":""},{"Name":"${/pd:AP/pd:dataSource/pd:APDataEntity/pd:Expense_Reimbursement_App/pd:ExpenseReimbursement__u/pd:ID}","Value":"ed609b98-9ad8-e911-8163-0800270d7b76"}]"""
}
},
{
"Name" : "//",
"Value" : {
"@xsi:type" : "xsd:string",
"?xml" : {
"@version" : "1.0",
"@encoding" : "utf-8"
},
"pd:AP" : {
"@xmlns:pd" : "http://www.source.com/a/XMLSchema",
"pd:appFields" : null,
"pd:formFields" : {
"pd:ExpenseReimbursement__uList_ListView" : {
"pd:ExpenseReimbursement__uList" : {
"pd:Amount" : null,
"pd:EmployeeID" : null,
"pd:ExpenseDate" : null,
"pd:ExpenseDescription" : null,
"pd:ExpenseStatus" : null,
"pd:ExpenseType" : null,
"pd:FullName" : null,
"pd:ID" : null,
"pd:Quantity" : null,
"pd:ReceiptFileName" : null,
"pd:ReportDate" : null,
"pd:RowID" : null
}
},
"pd:Receipt_MultiFile" : {
"pd:Receipt" : {
"pd:fileName" : null,
"pd:fileId" : null,
"pd:itemId" : null,
"pd:url" : null,
"pd:size" : null,
"pd:uploadedBy" : null,
"pd:encodedURL" : null
}
}
},
"pd:dataSource" : {
"pd:APDataEntity" : {
"pd:Expense_Reimbursement_App" : {
"pd:ExpenseReimbursement__u" : {
"pd:Amount__u" : "7",
"pd:EmployeeID__u" : "Test",
"pd:ExpenseDate__u" : "2019-09-16T17:28:00.000Z",
"pd:ExpenseDescription__u" : "Lunch",
"pd:ExpenseStatus__u" : "Pending",
"pd:ExpenseType__u" : "Food",
"pd:FullName__u" : "Test",
"pd:ID" : "ed609b98-9ad8-e911-8163-0800270d7b76",
"pd:Quantity__u" : null,
"pd:ReceiptFileName__u" : null,
"pd:ReportDate__u" : "2019-09-16T17:28:15.000Z",
"pd:RowID__u" : null
}
}
}
}
}
}
}
]
}
}
}
I want to change pd:Amount__u to float.
Checking current:
GET /test_ingest_index/_mapping/field/APP_AND_FORM_ATTRS.ArrayOfNameValue.NameValue.Value.pd:AP.pd:dataSource.pd:APDataEntity.pd:Expense_Reimbursement_App.pd:ExpenseReimbursement__u.pd:Amount__u
Return:
{
"test_ingest_index" : {
"mappings" : {
"APP_AND_FORM_ATTRS.ArrayOfNameValue.NameValue.Value.pd:AP.pd:dataSource.pd:APDataEntity.pd:Expense_Reimbursement_App.pd:ExpenseReimbursement__u.pd:Amount__u" : {
"full_name" : "APP_AND_FORM_ATTRS.ArrayOfNameValue.NameValue.Value.pd:AP.pd:dataSource.pd:APDataEntity.pd:Expense_Reimbursement_App.pd:ExpenseReimbursement__u.pd:Amount__u",
"mapping" : {
"pd:Amount__u" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
Create the following pipline:
PUT _ingest/pipeline/test_pipline
{
"description": "converts the content of numeric fields from text to number type",
"processors" : [
{
"convert" : {
"field" : "APP_AND_FORM_ATTRS.ArrayOfNameValue.NameValue.Value.pd:AP.pd:dataSource.pd:APDataEntity.pd:Expense_Reimbursement_App.pd:ExpenseReimbursement__u.pd:Amount__u",
"type": "float",
"ignore_missing": true,
"tag": "amoutprocessor"
}
}
]
}
_reindex using pipline:
POST _reindex
{
"source": {
"index": "test_ingest_index"
},
"dest": {
"index": "test_ingest_index_2",
"pipeline": "proc_inst_data_pipline"
}
}
Check again mapping for field in new index, type is still text:
{
"test_ingest_index_2" : {
"mappings" : {
"APP_AND_FORM_ATTRS.ArrayOfNameValue.NameValue.Value.pd:AP.pd:dataSource.pd:APDataEntity.pd:Expense_Reimbursement_App.pd:ExpenseReimbursement__u.pd:Amount__u" : {
"full_name" : "APP_AND_FORM_ATTRS.ArrayOfNameValue.NameValue.Value.pd:AP.pd:dataSource.pd:APDataEntity.pd:Expense_Reimbursement_App.pd:ExpenseReimbursement__u.pd:Amount__u",
"mapping" : {
"pd:Amount__u" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
Seems like there is an issue with the path to the field, may be because of the NameValue array which requires foreach?
Also, tried a script field for the index pattern using this path, does not work as well, guess for the same reason of the path...
Any ides what am I missing?