Hi there,
the last days I tried to use a supervised model for DGA detection.
As usual, you prepare your index with an ingest pipeline and then use the inference processor to insert a prediction into the event.
So far so good.
So after I deployed the necessary ingest pipelines, I noticed that nothing happens. Also I don't get any error messages regarding this neither in the Beats log nor in the Elasticsearch log.
So I tried to debug it in Kibana with a sample document in ingest pipeline editing mode
Here I found an error which I unfortunately could not interpret correctly. The Ingest Pipeline is using a stored script that is failing. Especially since here the script must also access "_source" and not as in the index on "fields".
Here is the Example Event:
{
"_index": "filebeat-7.13.2-2021.06.19-000001",
"_type": "_doc",
"_id": "yasaM3oBlkpHqeQqUQSz",
"_version": 1,
"_score": null,
"fields": {
"dns.type": [
"query"
],
"event.category": [
"network"
],
"dns.question.subdomain": [
"ec2messages.eu-west-1"
],
"host.os.name.text": [
"Amazon Linux"
],
"host.hostname": [
"ip-192-168-25-20.eu-west-1.compute.internal"
],
"suricata.eve.src_port": [
49944
],
"host.mac": [
"06:05:fa:83:5e:b3",
"02:42:a0:95:70:cd",
"02:42:38:5e:3d:58",
"02:42:ef:58:29:29",
"02:42:8b:79:60:b4",
"02:42:9c:04:14:00",
"02:42:a8:8c:ba:05",
"02:42:9e:24:c4:82",
"02:42:71:87:43:e9",
"02:42:f5:e3:7e:f1",
"b2:98:e5:23:0b:35",
"32:38:b5:78:79:ae",
"be:b3:cf:f4:e0:9c",
"16:fd:8a:78:e0:87",
"aa:39:43:a0:60:f7",
"a6:e1:ed:96:8a:f8",
"be:34:57:e3:d2:20"
],
"cloud.availability_zone": [
"eu-west-1a"
],
"service.type": [
"suricata"
],
"host.os.version": [
"2"
],
"host.os.name": [
"Amazon Linux"
],
"source.ip": [
"192.168.25.20"
],
"suricata.eve.event_type": [
"dns"
],
"destination.address": [
"192.168.0.2"
],
"agent.name": [
"ip-192-168-25-20.eu-west-1.compute.internal"
],
"network.community_id": [
"1:40SoyT+bC6YjlnddFWuj1OzWFP8="
],
"host.name": [
"ip-192-168-25-20.eu-west-1.compute.internal"
],
"event.kind": [
"event"
],
"suricata.eve.flow_id": [
"1762438499692745"
],
"event.original": [
"{\"timestamp\":\"2021-06-22T09:43:18.880841+0000\",\"flow_id\":1762438499692745,\"in_iface\":\"eth0\",\"event_type\":\"dns\",\"src_ip\":\"192.168.25.20\",\"src_port\":49944,\"dest_ip\":\"192.168.0.2\",\"dest_port\":53,\"proto\":\"UDP\",\"dns\":{\"type\":\"query\",\"id\":38051,\"rrname\":\"ec2messages.eu-west-1.amazonaws.com\",\"rrtype\":\"A\",\"tx_id\":0}}"
],
"cloud.region": [
"eu-west-1"
],
"suricata.eve.proto": [
"udp"
],
"host.os.type": [
"linux"
],
"fileset.name": [
"eve"
],
"input.type": [
"log"
],
"log.offset": [
308622947
],
"suricata.eve.in_iface": [
"eth0"
],
"agent.hostname": [
"ip-192-168-25-20.eu-west-1.compute.internal"
],
"tags": [
"suricata"
],
"dns.question.top_level_domain": [
"com"
],
"host.architecture": [
"x86_64"
],
"cloud.machine.type": [
"t3.medium"
],
"cloud.provider": [
"aws"
],
"agent.id": [
"3c709204-ae58-472d-87d1-2694d9cd3bd5"
],
"cloud.service.name": [
"EC2"
],
"source.port": [
49944
],
"host.containerized": [
false
],
"ecs.version": [
"1.9.0"
],
"event.created": [
"2021-06-22T09:43:19.679Z"
],
"suricata.eve.dns.rrname": [
"ec2messages.eu-west-1.amazonaws.com"
],
"agent.version": [
"7.13.2"
],
"host.os.family": [
"redhat"
],
"suricata.eve.dns.tx_id": [
0
],
"suricata.eve.src_ip": [
"192.168.25.20"
],
"destination.port": [
53
],
"source.address": [
"192.168.25.20"
],
"destination.geo.location": [
{
"coordinates": [
-6.2488,
53.3338
],
"type": "Point"
}
],
"host.ip": [
"192.168.25.20",
"fe80::405:faff:fe83:5eb3",
"172.17.0.1",
"fe80::42:a0ff:fe95:70cd",
"172.30.0.1",
"fe80::42:38ff:fe5e:3d58",
"172.31.0.1",
"fe80::42:efff:fe58:2929",
"192.168.32.1",
"fe80::42:8bff:fe79:60b4",
"192.168.48.1",
"fe80::42:9cff:fe04:1400",
"192.168.64.1",
"fe80::42:a8ff:fe8c:ba05",
"192.168.80.1",
"fe80::42:9eff:fe24:c482",
"192.168.96.1",
"fe80::42:71ff:fe87:43e9",
"192.168.112.1",
"fe80::42:f5ff:fee3:7ef1",
"fe80::b098:e5ff:fe23:b35",
"fe80::3038:b5ff:fe78:79ae",
"fe80::bcb3:cfff:fef4:e09c",
"fe80::14fd:8aff:fe78:e087",
"fe80::a839:43ff:fea0:60f7",
"fe80::a4e1:edff:fe96:8af8",
"fe80::bc34:57ff:fee3:d220"
],
"cloud.instance.id": [
"i-00641a1eb2f799ff3"
],
"suricata.eve.app_proto": [
"dns"
],
"agent.type": [
"filebeat"
],
"network.protocol": [
"dns"
],
"event.module": [
"suricata"
],
"related.ip": [
"192.168.25.20",
"192.168.0.2"
],
"host.os.kernel": [
"4.14.232-176.381.amzn2.x86_64"
],
"dns.question.registered_domain": [
"amazonaws.com"
],
"host.id": [
"ec239da046efe33c7665d4508a7d0a61"
],
"dns.question.name": [
"ec2messages.eu-west-1.amazonaws.com"
],
"suricata.eve.dns.id": [
38051
],
"dns.id": [
"38051"
],
"host.os.codename": [
"Karoo"
],
"dns.question.type": [
"A"
],
"destination.ip": [
"192.168.0.2"
],
"network.transport": [
"udp"
],
"suricata.eve.dest_ip": [
"192.168.0.2"
],
"cloud.image.id": [
"ami-0ac43988dfd31ab9a"
],
"event.ingested": [
"2021-06-22T09:43:20.748Z"
],
"@timestamp": [
"2021-06-22T09:43:18.880Z"
],
"suricata.eve.dns.rrtype": [
"A"
],
"cloud.account.id": [
"612565585842"
],
"host.os.platform": [
"amzn"
],
"suricata.eve.dest_port": [
53
],
"event.type": [
"protocol"
],
"log.file.path": [
"/data/suricata/log/eve.json"
],
"agent.ephemeral_id": [
"a026494a-441b-44f9-ad40-08ef81279537"
],
"suricata.eve.dns.type": [
"query"
],
"event.dataset": [
"suricata.eve"
]
},
"sort": [
1624354998880
]
}
Here is the ingest pipeline
PUT _ingest/pipeline/dga_ngram_expansion_inference
{
"description": "Expands a domain into unigrams, bigrams and trigrams and makes a prediction of maliciousness",
"processors": [
{
"script": {
"id": "ngram-extractor-packetbeat",
"params":{
"ngram_count":1
}
}
},
{
"script": {
"id": "ngram-extractor-packetbeat",
"params":{
"ngram_count":2
}
}
},
{
"script": {
"id": "ngram-extractor-packetbeat",
"params": {
"ngram_count":3
}
}
},
{
"inference": {
"model_id": "dga_detection-1624426349035",
"target_field": "predicted_label",
"field_map":{},
"inference_config": { "classification": {"num_top_classes": 2} }
}
},
{
"script": {
"id": "ngram-remover-packetbeat",
"params":{
"ngram_count":1
}
}
},
{
"script": {
"id": "ngram-remover-packetbeat",
"params":{
"ngram_count":2
}
}
},
{
"script": {
"id": "ngram-remover-packetbeat",
"params": {
"ngram_count":3
}
}
}
]
}
Here is the Script used in the Ingest Pipeline:
POST _scripts/ngram-extractor-packetbeat
{
"script": {
"lang": "painless",
"source": """
String nGramAtPosition(String fulldomain, int fieldcount, int n){
String domain = fulldomain.splitOnToken('.')[0];
if (fieldcount+n>=domain.length()){
return ''
}
else
{
return domain.substring(fieldcount, fieldcount+n)
}
}
for (int i=0;i<ctx['dns']['question']['registered_domain'].length();i++){
ctx['field_'+Integer.toString(params.ngram_count)+'_gram_'+Integer.toString(i)] = nGramAtPosition(ctx['dns']['question']['registered_domain'], i, params.ngram_count)
}"""
}
}
Here is the Error from the Test Document:
Thank you in advance