Opensearch transform splitting array values to new events

Hi,

I'm transforming following event. But the values inside of array are splitting into the new events.

      {
        "_index": "heal_collated_txn_health_2022.05",
        "_type": "_doc",
        "_id": "LAUpboIBh6CUatILrsN3",
        "_score": 1,
        "_source": {
          "timeInGMT": 0,
          "kpiId": 0,
          "compInstanceIdentifier": "d0352b7d-0484-4714-bbc8-eb67cbb7be70",
          "agentIdentifier": "ComponentAgent-171",
          "kpiIdentifier": "PACKETS_DROPPED",
          "categoryIdentifier": "Network Utilization",
          "applicationIdentifier": null,
          "serviceIdentifiers": [
            "Supervisor_Controller Service",
            "Event_Detector Service",
            "UI_Service",
            "Redis",
            "CC_Service"
          ],
          "clusterIdentifiers": [
            "a5c57ef5-4018-41b8-b727-27c8f8376c0e"
          ],
          "collectionInterval": 60,
          "value": "0.0",
          "kpiType": "Core",
          "groupAttribute": "ALL",
          "groupIdentifier": null,
          "watcherValue": null,
          "errorCode": null,
          "clusterOperation": null,
          "aggLevelInMins": 1,
          "error": false,
          "kpiGroup": false,
          "discovery": false,
          "maintenanceExcluded": false,
          "@timestamp": "2022-05-01T01:32:00.000Z"
        }

Following is the transform job configuration.

curl -u admin:admin -XPUT "http://XXX.XXX.XX.XXX9201/_plugins/_transform/my-array-job-2" -H 'Content-type: application/json' -d'
{
    "transform": {
        "schedule": {
            "interval": {
                "start_time": 1659705000000,
                "period": 1,
                "unit": "Minutes"
            }
        },
        "metadata_id": null,
        "updated_at": 1659456180000,
        "enabled": true,
        "enabled_at": 1659457620000,
        "description": "",
        "source_index": "collated_txn_health_2022.05",
        "data_selection_query": {
            "match_all": {
                "boost": 1
            }
          },
        "target_index": "transform_collated_txn_health_2022.05",
        "page_size": 1000,
        "groups": [
            {
                "date_histogram": {
                    "fixed_interval": "1m",
                    "source_field": "@timestamp",
                    "target_field": "@timestamp",
                    "timezone": "Asia/Calcutta"
                }
            },
            {
                "terms": {
                    "source_field": "clusterIdentifiers",
                    "target_field": "clusterIdentifiers"
                }
            },
            {
                "terms": {
                    "source_field": "serviceIdentifiers",
                    "target_field": "serviceIdentifiers"
                }
            },
            {
                "terms": {
                    "source_field": "compInstanceIdentifier",
                    "target_field": "compInstanceIdentifier"
                }
            },
            {
                "terms": {
                    "source_field": "agentIdentifier",
                    "target_field": "agentIdentifier"
                }
            }
        ],
        "aggregations": {
            "count_@timestamp": {
                "value_count": {
                    "field": "@timestamp"
                }
            }
        }
    }
}'

Following are the events from the transform index.

      {
        "_index": "transform_heal_collated_txn_health_2022.05",
        "_type": "_doc",
        "_id": "ybK0McQ9NZrt9xdo9iWKbA",
        "_score": 1,
        "_source": {
          "transform._id": "my-array-job-2",
          "transform._doc_count": 2,
          "@timestamp": 1651365120000,
          "clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "serviceIdentifiers": "Redis",
          "compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "agentIdentifier": "ComponentAgent-170",
          "count_@timestamp": 2
        }
      },
      {
        "_index": "transform_heal_collated_txn_health_2022.05",
        "_type": "_doc",
        "_id": "Wf-4KwnFaYuw9bL-V-9WEQ",
        "_score": 1,
        "_source": {
          "transform._id": "my-array-job-2",
          "transform._doc_count": 2,
          "@timestamp": 1651365120000,
          "clusterIdentifiers": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "serviceIdentifiers": "Redis_Server Service",
          "compInstanceIdentifier": "a5c57ef5-4018-41b8-b727-27c8f8376c0e",
          "agentIdentifier": "ComponentAgent-170",
          "count_@timestamp": 2
        }

It would be a great help if somebody suggest me with solution.

OpenSearch/OpenDistro are AWS run products and differ from the original Elasticsearch and Kibana products that Elastic builds and maintains. You may need to contact them directly for further assistance.

(This is an automated response from your friendly Elastic bot. Please report this post if you have any suggestions or concerns :elasticheart: )

1 Like

You will need to ask aws, their implementation of this is entirely different to Elasticsearch.

Thanks @warkolm for the reply.

Opensearch community is that so active as elastic. Have posted the same, but they have kept my post in pending state.

So you should switch to the official elasticsearch. :wink:

@ dadoonet

Im using Elasticsearch since last 2 years, :smiley:

Its time to explore new things.

By the way I have solved above issue with painless script, will post after few testing.

Thanks.

Have solved the issue with following painless script. Which help to transform array fields in opensearch.

PUT _plugins/_transform/my-array-job-2
{
  
  "transform": {
        "schedule": {
            "interval": {
                "start_time": 1659705000000,
                "period": 1,
                "unit": "Minutes"
            }
        },
        "metadata_id": null,
        "updated_at": 1659456180000,
        "enabled": true,
        "enabled_at": 1659457620000,
        "description": "",
        "source_index": "heal_collated_txn_heal_health_2022.05_reindex",
        "target_index": "transform_heal_collated_txn_heal_health_2022.05",
        "page_size": 1000,
        "groups": [
            {
                "date_histogram": {
                    "fixed_interval": "1m",
                    "source_field": "@timestamp",
                    "target_field": "@timestamp",
                    "timezone": "Asia/Calcutta"
                }
            },    
            {
                "terms": {
                    "source_field": "kpiIdentifier",
                    "target_field": "kpiIdentifier"
                }
            },
            {
                "terms": {
                    "source_field": "clusterIdentifiers",
                    "target_field": "clusterIdentifiers"
                }
            }            
        ],
    "aggregations": { 
      "count_@timestamp": {
          "value_count": {
           "field": "@timestamp"
         }
       },
      "count_agentIdentifier": {
          "value_count": {
              "field": "agentIdentifier"
          }
      },
      "sum_value": {
          "sum": {
              "field": "value"
          }
      },
      "max_value": {
          "max": {
              "field": "value"
          }
      },
      "avg_value": {
          "avg": {
              "field": "value"
          }
      },
      "count_value": {
          "value_count": {
              "field": "value"
          }
      },
      "percentiles_value": {
          "percentiles": {
              "field": "value",
             "percents": [
                  95
              ],
              "keyed": true,
              "tdigest": {
                  "compression": 100
              }
          }
      },
      "serviceIdentifiers": {
        "scripted_metric": {
          "init_script": "state.docs = []", 
          "map_script": """ 
            Map span = [
              'url':doc['serviceIdentifiers']
            ];
            state.docs.add(span)
          """,
          "combine_script": "return state.docs;", 
          "reduce_script": """ 
            def all_docs = [];
            for (s in states) {
              for (span in s) {
                all_docs.add(span);
              }
            }
            def size = all_docs.size();
            def serviceIdentifiers_1 = all_docs[0]['url'];
            def ret = new HashMap();
            ret['serviceIdentifiers'] = serviceIdentifiers_1;
            return ret;
          """
        }
      }
    }
  }
}