[Elasticsearch] Rechercher dans des array

Bonjour !

Actuellement, j'indexe des documents de la sorte::

{
   "took": 6,
   "timed_out": false,
   "_shards": {
	  "total": 5,
	  "successful": 5,
	  "failed": 0
   },
   "hits": {
	  "total": 3,
	  "max_score": 1,
	  "hits": [
		 {
			"_index": "nmb2b-2017.03.02",
			"_type": "FlightListByAerodromeReply",
			"_id": "AVqOakryqXFr_vbJPqLG",
			"_score": 1,
			"_source": {
			   "@timestamp": "2017-03-02T09:46:53.902Z",
			   "port": 18523,
			   "@version": "1",
			   "host": "192.168.10.195",
			   "parsed": {
				  "xmlns:S": "http://schemas.xmlsoap.org/soap/envelope/",
				  "Body": {
					 "FlightListByAerodromeReply": {
						"xmlns:common": "eurocontrol/cfmu/b2b/CommonServices",
						"requestReceptionTime": "2017-03-02 09:58:55",
						"data": {
						   "effectiveTrafficWindow": {
							  "wef": "2017-03-02 08:00",
							  "unt": "2017-03-02 20:00"
						   },
						   "flights": [
							  {
								 "flight": {
									"flightId": {
									   "keys": {
										  "aircraftId": "BEL32C",
										  "aerodromeOfDestination": "EBBR",
										  "estimatedOffBlockTime": "2017-03-02 07:45",
										  "airFiled": "false",
										  "nonICAOAerodromeOfDestination": "false",
										  "nonICAOAerodromeOfDeparture": "false",
										  "aerodromeOfDeparture": "LFPG"
									   },
									   "id": "AT02676815"
									}
								 }
							  },
							  {
								 "flight": {
									"flightId": {
									   "keys": {
										  "aircraftId": "AFR1033",
										  "aerodromeOfDestination": "LFPG",
										  "estimatedOffBlockTime": "2017-03-02 04:30",
										  "airFiled": "false",
										  "nonICAOAerodromeOfDestination": "false",
										  "nonICAOAerodromeOfDeparture": "false",
										  "aerodromeOfDeparture": "LGAV"
									   },
									   "id": "AT02680179"
									}
								 }
							  }
							
						},
						"xmlns:flight": "eurocontrol/cfmu/b2b/FlightServices",
						"requestId": "B2B_CUR:2419467",
						"xmlns:flow": "eurocontrol/cfmu/b2b/FlowServices",
						"xmlns:airspace": "eurocontrol/cfmu/b2b/AirspaceServices",
						"sendTime": "2017-03-02 09:59:42",
						"status": "OK"
					 }
				  }
			   },
			   "type": "FlightListByAerodromeReply",
			   "tags": []
			}
		 }
	  ]
   }
}

Comme vous pouvez le constater, je dispose d'un objet JSON 'flights' de type 'Array' qui peut contenir N 'flight':

"flights": [
  {
	 "flight": {
		"flightId": {
		   "keys": {
			  "aircraftId": "BEL32C",
			  "aerodromeOfDestination": "EBBR",
			  "estimatedOffBlockTime": "2017-03-02 07:45",
			  "airFiled": "false",
			  "nonICAOAerodromeOfDestination": "false",
			  "nonICAOAerodromeOfDeparture": "false",
			  "aerodromeOfDeparture": "LFPG"
		   },
		   "id": "AT02676815"
		}
	 }
  },

Or, dans certains cas, ces 'flight' peuvent ne pas contenir de champs 'id':

"flights": [
  {
	 "flight": {
		"flightId": {
		   "keys": {
			  "aircraftId": "FRA34Z",
			  "aerodromeOfDestination": "EBBR",
			  "estimatedOffBlockTime": "2017-03-02 07:45",
			  "airFiled": "false",
			  "nonICAOAerodromeOfDestination": "false",
			  "nonICAOAerodromeOfDeparture": "false",
			  "aerodromeOfDeparture": "LFPG"
		   }
		}
	 }
  },

Je souhaiterai donc effectuer une requête ES qui me permettrait de retourner tous les 'flight' qui contiennent bien un champs 'id'.

Etant novice dans les requêtes ES, pensez-vous que cela est possible ? Si oui comment faire ?

Merci d'avance ! :slight_smile:

J'en profite pour mettre le mapping:

{
  "template":   "nmb2b-*",
  "settings": { "number_of_shards": 5 },
  "mappings": {
		"FlightListByAerodromeReply": {
			"properties": {
			   "@timestamp": {
				  "type": "date",
				  "format": "strict_date_optional_time||epoch_millis"
			   },
			   "@version": {
				  "type": "string"
			   },
			   "host": {
				  "type": "string"
			   },
			   "parsed": {
				  "properties": {
					 "Body": {
						"properties": {
						   "FlightListByAerodromeReply": {
							  "properties": {
								 "data": {
									"properties": {
									   "effectiveTrafficWindow": {
										  "properties": {
											 "unt": {
												"type": "string"
											 },
											 "wef": {
												"type": "string"
											 }
										  }
									   },
									   "flights": {
										  "properties": {
											 "flight": {
												"properties": {
												   "flightId": {
													  "properties": {
														 "id": {
															"type": "string"
														 },
														 "keys": {
															"properties": {
															   "aerodromeOfDeparture": {
																  "type": "string"
															   },
															   "aerodromeOfDestination": {
																  "type": "string"
															   },
															   "airFiled": {
																  "type": "string"
															   },
															   "aircraftId": {
																  "type": "string"
															   },
															   "estimatedOffBlockTime": {
																  "type": "string"
															   },
															   "nonICAOAerodromeOfDeparture": {
																  "type": "string"
															   },
															   "nonICAOAerodromeOfDestination": {
																  "type": "string"
															   }
															}
														 }
													  }
												   }
												}
											 }
										  }
									   }
									}
								 },
								 "requestId": {
									"type": "string"
								 },
								 "requestReceptionTime": {
									"type": "string"
								 },
								 "sendTime": {
									"type": "string"
								 },
								 "status": {
									"type": "string"
								 },
								 "xmlns:airspace": {
									"type": "string"
								 },
								 "xmlns:common": {
									"type": "string"
								 },
								 "xmlns:flight": {
									"type": "string"
								 },
								 "xmlns:flow": {
									"type": "string"
								 }
							  }
						   }
						}
					 },
					 "xmlns:S": {
						"type": "string"
					 }
				  }
			   },
			   "port": {
				  "type": "long"
			   },
			   "query": {
				  "properties": {
					 "query_string": {
						"properties": {
						   "query": {
							  "type": "string"
						   }
						}
					 }
				  }
			   },
			   "type": {
				  "type": "string"
			   }
			}
		 }
  }
}

Je pense que tu devrais indexer les flights indépendamment.
Ca peut se faire en typant de type nested ton champ flights. Ainsi chaque flight sera indexé indépendamment des autres dans Lucene.

Ce qui devrait te permettre de faire une nested query avec une exists query sur ce champ id. Puis en demandant les inner hits, tu devrais pouvoir t'en sortir je pense.

Mais peut-être devrais-tu réfléchir à ton modèle et si ton besoin est de chercher des flights un par un et non des ensembles de flights regroupés, peut-être devrais-tu dénormaliser et indexer chaque flight individuellement?

Merci pour ta réponse.

Alors, j'ai modifié mon mapping en y ajoutant un type nested pour le champ flights:

{
  "order": 0,
  "template": "nmb2b-*",
  "settings": {
	"index": {
	  "number_of_shards": "5"
	}
  },
  "mappings": {
	"FlightListByAerodromeReply": {
	  "properties": {
		"@timestamp": {
		  "format": "strict_date_optional_time||epoch_millis",
		  "type": "date"
		},
		"port": {
		  "type": "long"
		},
		"query": {
		  "properties": {
			"query_string": {
			  "properties": {
				"query": {
				  "type": "string"
				}
			  }
			}
		  }
		},
		"@version": {
		  "type": "string"
		},
		"host": {
		  "type": "string"
		},
		"parsed": {
		  "properties": {
			"xmlns:S": {
			  "type": "string"
			},
			"Body": {
			  "properties": {
				"FlightListByAerodromeReply": {
				  "properties": {
					"xmlns:common": {
					  "type": "string"
					},
					"requestReceptionTime": {
					  "type": "string"
					},
					"data": {
					  "properties": {
						"effectiveTrafficWindow": {
						  "properties": {
							"wef": {
							  "type": "string"
							},
							"unt": {
							  "type": "string"
							}
						  }
						},
						"flights": {
						  "type": "nested",  <== Ajout type 'nested'
						  "properties": {
							"flight": {
							  "properties": {
								"flightId": {
								  "properties": {
									"keys": {
									  "properties": {
										"aircraftId": {
										  "type": "string"
										},
										"aerodromeOfDestination": {
										  "type": "string"
										},
										"estimatedOffBlockTime": {
										  "type": "string"
										},
										"airFiled": {
										  "type": "string"
										},
										"nonICAOAerodromeOfDestination": {
										  "type": "string"
										},
										"nonICAOAerodromeOfDeparture": {
										  "type": "string"
										},
										"aerodromeOfDeparture": {
										  "type": "string"
										}
									  }
									},
									"id": {
									  "type": "string"
									}
								  }
								}
							  }
							}
						  }
						}
					  }
					},
					"xmlns:flight": {
					  "type": "string"
					},
					"requestId": {
					  "type": "string"
					},
					"xmlns:flow": {
					  "type": "string"
					},
					"xmlns:airspace": {
					  "type": "string"
					},
					"sendTime": {
					  "type": "string"
					},
					"status": {
					  "type": "string"
					}
				  }
				}
			  }
			}
		  }
		},
		"type": {
		  "type": "string"
		}
	  }
	}
  },
  "aliases": {}
}

J'ai ensuite essayé de faire une recherche, dans un premier temps, sans tester si le champ id existe afin de voir si les modifications fonctionnent. Voici la requête que j'essaye d'effectuer qui me permettrait de retrouver un flight en fonction de son id :

POST /nmb2b-2017.03.03/FlightListByAerodromeReply/_search
{
	"query": {
		"nested": {
			"path": "flights",
			"query": {
				"match": {
					"flights.flight.flightId.id": "AT02698286"
				}
			},
			"inner_hits": {}
		}
	}
}

Mais cette dernière me retourne l'erreur suivante:

{
   "error": {
	  "root_cause": [
		 {
			"type": "query_parsing_exception",
			"reason": "[nested] failed to find nested object under path [flights]",
			"index": "nmb2b-2017.03.03",
			"line": 4,
			"col": 21
		 }
	  ],
	  "type": "search_phase_execution_exception",
	  "reason": "all shards failed",
	  "phase": "query",
	  "grouped": true,
	  "failed_shards": [
		 {
			"shard": 0,
			"index": "nmb2b-2017.03.03",
			"node": "m7ZiycvQSeWaEslyS4QRWA",
			"reason": {
			   "type": "query_parsing_exception",
			   "reason": "[nested] failed to find nested object under path [flights]",
			   "index": "nmb2b-2017.03.03",
			   "line": 4,
			   "col": 21
			}
		 }
	  ]
   },
   "status": 400
}

Faut-il aussi typer les objets enfants de flights avec du nested ?

Tu as modifié un template apparement. As-tu supprimé l'index et recréé?

Quel est le mapping de l'index?

Idéalement, essaye d'envoyer des exemples beaucoup plus simples de façon à ce que cela soit plus facile à lire.

Après avoir ajouté un type nested aux champs enfants de flights, j'arrive à requeter:

POST /nmb2b-2017.03.03/FlightListByAerodromeReply/_search
{
	"query": {
		"nested": {
			"path": "parsed.Body.FlightListByAerodromeReply.data.flights",
			"query": {
				"nested": {
				   "path": "parsed.Body.FlightListByAerodromeReply.data.flights.flight",
				   "query": {
					   "nested": {
						  "path": "parsed.Body.FlightListByAerodromeReply.data.flights.flight.flightId",
						  "query": {
							  "match": {
								 "parsed.Body.FlightListByAerodromeReply.data.flights.flight.flightId.id": "AT02698286"
							}
						  }
					   }
				   }
				}
			},
			"inner_hits": {}
		}
	}
}

Mais le résultat que j'obtiens ne correspond pas à mes attentes.
Comme tu l'as indiqué plus tôt, il serait plus judicieux de créer un document par flight.
Si jamais cela peut intéresser quelqu'un, voilà comment j'ai procédé.

A l'aide de Logstash, je parse le document XML et je le split en plusieurs documents:

input {
		tcp {
				port => "7001"
				type => "FlightListByAerodromeReply"
		}
}
filter {
		if [type] == "FlightListByAerodromeReply" {
				xml {
						source => "message"
						#store_xml => false
						target => "parsed"
						#force_array => false
				}
				split {
						field => "parsed[Body]"
				}
				split {
						field => "parsed[Body][FlightListByAerodromeReply]"
				}
				split {
						field => "parsed[Body][FlightListByAerodromeReply][data]"
				}
				split {
						field => "parsed[Body][FlightListByAerodromeReply][data][effectiveTrafficWindow]"
				}
				split {
						field => "parsed[Body][FlightListByAerodromeReply][data][flights]"
				}
				split {
						field => "parsed[Body][FlightListByAerodromeReply][data][flights][flight]"
				}
				split {
						field => "parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId]"
				}
				split {
						field => "parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys]"
				}
				mutate {
						add_field => { flightId => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][id]}" }
						add_field => { aircraftId => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys][aircraftId]}" }
						add_field => { aerodromeOfDestination => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys][aerodromeOfDestination]}" }
						add_field => { estimatedOffBlockTime => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys][estimatedOffBlockTime]}" }
						add_field => { airFiled => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys][airFiled]}" }
						add_field => { nonICAOAerodromeOfDestination => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys][nonICAOAerodromeOfDestination]}" }
						add_field => { nonICAOAerodromeOfDeparture => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys][nonICAOAerodromeOfDeparture]}" }
						add_field => { aerodromeOfDeparture => "%{parsed[Body][FlightListByAerodromeReply][data][flights][flight][flightId][keys][aerodromeOfDeparture]}" }
						add_field => { wef => "%{parsed[Body][FlightListByAerodromeReply][data][effectiveTrafficWindow][wef]}" }
						add_field => { unt => "%{parsed[Body][FlightListByAerodromeReply][data][effectiveTrafficWindow][unt]}" }
						add_field => { requestReceptionTime => "%{parsed[Body][FlightListByAerodromeReply][requestReceptionTime]}" }
						add_field => { sendTime => "%{parsed[Body][FlightListByAerodromeReply][sendTime]}" }
						add_field => { status => "%{parsed[Body][FlightListByAerodromeReply][status]}" }
						remove_field => [ "message" ]
						remove_field => [ "parsed" ]
				}
		}
}

Ce qui me permet d'obtenir, une fois le tout indexé:

POST /nmb2b-2017.03.03/FlightListByAerodromeReply/_search

	{
   "took": 3,
   "timed_out": false,
   "_shards": {
	  "total": 5,
	  "successful": 5,
	  "failed": 0
   },
   "hits": {
	  "total": 18863,
	  "max_score": 1,
	  "hits": [
		 {
			"_index": "nmb2b-2017.03.03",
			"_type": "FlightListByAerodromeReply",
			"_id": "AVqU5s__qXFr_vbJPt1p",
			"_score": 1,
			"_source": {
			   "requestReceptionTime": "2017-03-03 16:12:34",
			   "flightId": "AT02698316",
			   "airFiled": "false",
			   "type": "FlightListByAerodromeReply",
			   "tags": [],
			   "sendTime": "2017-03-03 16:12:35",
			   "aircraftId": "TAY123G",
			   "@timestamp": "2017-03-03T16:00:32.479Z",
			   "port": 36157,
			   "aerodromeOfDestination": "LFBO",
			   "estimatedOffBlockTime": "2017-03-03 00:15",
			   "@version": "1",
			   "host": "192.168.10.195",
			   "wef": "2017-03-03 00:00",
			   "unt": "2017-03-03 23:59",
			   "nonICAOAerodromeOfDestination": "false",
			   "nonICAOAerodromeOfDeparture": "false",
			   "aerodromeOfDeparture": "LFPG",
			   "status": "OK"
			}
		 },
		 {
			"_index": "nmb2b-2017.03.03",
			"_type": "FlightListByAerodromeReply",
			"_id": "AVqU5s__qXFr_vbJPt1q",
			"_score": 1,
			"_source": {
			   "requestReceptionTime": "2017-03-03 16:12:34",
			   "flightId": "AT02697997",
			   "airFiled": "false",
			   "type": "FlightListByAerodromeReply",
			   "tags": [],
			   "sendTime": "2017-03-03 16:12:35",
			   "aircraftId": "FPO701",
			   "@timestamp": "2017-03-03T16:00:32.479Z",
			   "port": 36157,
			   "aerodromeOfDestination": "LFML",
			   "estimatedOffBlockTime": "2017-03-03 00:35",
			   "@version": "1",
			   "host": "192.168.10.195",
			   "wef": "2017-03-03 00:00",
			   "unt": "2017-03-03 23:59",
			   "nonICAOAerodromeOfDestination": "false",
			   "nonICAOAerodromeOfDeparture": "false",
			   "aerodromeOfDeparture": "LFPG",
			   "status": "OK"
			}
		 }

Merci pour ton aide et à bientôt !
Cordialement, Benjamin

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.