ELK Painless: count unique distinct occurrences

Hi there
I'm using ELK stack version 7. What I need to do is to count the unique occorence of a value in my indexes.
My indexes are created by WSO2 Identity Server version 5.10 and they are so defined:

    {
      "login.wso2.node.ip-2021.03.11" : {
        "aliases" : {
          "alias_my_login" : { }
        },
        "mappings" : {
          "dynamic" : "true",
          "_meta" : { },
          "_source" : {
            "includes" : [ ],
            "excludes" : [ ]
          },
          "dynamic_date_formats" : [
            "strict_date_optional_time",
            "yyyy/MM/dd HH:mm:ss Z||yyyy/MM/dd Z"
          ],
          "dynamic_templates" : [ ],
          "date_detection" : true,
          "numeric_detection" : false,
          "properties" : {
            "@timestamp" : {
              "type" : "date",
              "format" : "strict_date_optional_time"
            },
            "@version" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "host" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "instance_IP" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "instance_name" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "java_class" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "level" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "log_message" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "message" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "path" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "rr" : {
              "type" : "text"
            },
            "tags" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "tenant_id" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "timestamp" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            },
            "type" : {
              "type" : "text",
              "fields" : {
                "keyword" : {
                  "type" : "keyword",
                  "ignore_above" : 256
                }
              }
            }
          }
        },
        "settings" : {
          "index" : {
            "creation_date" : "1615481578543",
            "number_of_shards" : "1",
            "number_of_replicas" : "1",
            "uuid" : "9o-UQnn-SKaj7LbhO8GYxQ",
            "version" : {
              "created" : "7070199"
            },
            "provided_name" : "login.wso2.node.ip-2021.03.11"
          }
        }
      }
    }

What I need to do is to check if in the message field I have a SAML2 Response XML and if so I need to access to one value of this XML and count the unique occurrences.
So far so good. The message field is multi mapping field. It is both text type and keyword type so I can use text type for full search and keyword type for aggregation, sorting and so on.
What I did is to write this painless script:

    GET login.wso2.node.ip-2021.03.11/_search
        {
        "query": {
          "bool": {
            "filter": [
              {
                "script": {
                  "script": {
                    "source": "doc['message.keyword'].value.contains('SAML_MESSAGES_LOGFILE') && doc['message.keyword'].value.contains('TINIT-')"
                  }
                }
              }
            ]
           }
          },
          "aggs": {
            "distinct_cf_count": {
              "scripted_metric": {
                "params": {
                  "fieldName":"message"
                },
                "init_script": "state.list = []",
                "map_script": """
                  //Controllo se c'è il campo message e se c'è fiscalnumber
                  //if(doc[params.fieldName] != null && doc[params.fieldName].size()==0  ){
                  //  def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
                    //if (matcher.find()) {
                    //  state.list.add(matcher.group(1));
                    //}
                    if(doc[params.fieldName] != null && doc[params.fieldName].size()==0 && doc[params.fieldName].value.indexOf('TINIT-') > -1 ){
                        def valore = doc[params.fieldName].value;
                        def startIdx = valore.indexOf('TINIT-')+'TINIT-'.length();
                        state.list.add(valore.substring(startIdx, 16));
                    }
                  """,
                "combine_script": "return state.list;",
                "reduce_script": """
                Map uniqueValueMap = new HashMap();
                int count = 0;
                for(shardList in states) {
                  if(shardList != null) {
                    for(key in shardList) {
                      if(!uniqueValueMap.containsKey(key)) {
                        count +=1;
                        uniqueValueMap.put(key, key);
                      }
                    }
                  }
                }
                return count;
                """
              }
            }
          }
        }

But I can't use regex because they are disabled and I should restart my ELK cluster in order to enable them. So I tried the contains and indexOf but I'm not able in counting the unique occorronces of this field.

Do you have any suggestion?

Thank you
Angelo

I gave a look. This check alwaus return 0 so it's like if message.keyword is always missing

        "map_script": """
          //Controllo se c'è il campo message e se c'è fiscalnumber
          //if(doc[params.fieldName] != null && doc[params.fieldName].size()==0  ){
          //  def matcher = /<saml2:Attribute FriendlyName="Codice Fiscale" Name="fiscalNumber"><saml2:AttributeValue xmlns:xs="http:\/\/www.w3.org\/2001\/XMLSchema" xmlns:xsi="http:\/\/www.w3.org\/2001\/XMLSchema-instance" xsi:type="xs:string">(.*)<\/saml2:AttributeValue><\/saml2:Attribute>/.matcher(doc[params.fieldName].value);
            //if (matcher.find()) {
            //  state.list.add(matcher.group(1));
            //}
            **if( doc[params.fieldName].size()==0 ){**
**              state.list.add(UUID.randomUUID().toString());**
**            }**
            //else{
            //    def valore = doc[params.fieldName].value;
            //    def cf = valore.splitOnToken('TINIT-')[1].substring(16);
            //    state.list.add(cf);
            //}
          """,

Do you have any suggestion? I'm really blocked here... at 1 step to the solution :frowning:

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.