Bonjour Dadoonet , je vais essayer d'être plus clair .
L’utilisateur saisie « 61 RUE DU PARADIS » et on souhaite ce résultat`
Mon CPU est tres vite saturé à 100% car il manipule 130Millions de données pour les trier par la suite alors que je lui demande seulement 30 premiere lignes, mon CPU n 'aime pas trop ça
Voici ma requete a optimiser
Le champ chainerecherche est un "copy_to" de plusieurs field : par exemple libelle_voie,libelle_commune,libelle_ligne_5,ligne3, ... , voir le shema ci-dessous
Requête :
{
"from": 0,
"size": 30,
"query": {
"query_string": {
"query": "(chainerecherche:/.61./ )AND( chainerecherche:/.RUE./ )AND( chainerecherche:/.DU./ )AND( chainerecherche:/.PARADIS./) AND (type: ligne3 OR type: pdi OR type: voie OR type: commune ) AND actif:1 AND flag_diffusable:1",
"fields": ,
"type": "best_fields",
"default_operator": "and",
"max_determinized_states": 10000,
"enable_position_increments": true,
"fuzziness": "AUTO",
"fuzzy_prefix_length": 0,
"fuzzy_max_expansions": 50,
"phrase_slop": 0,
"escape": false,
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1
}
},
"version": true,
"track_scores": true,
"highlight": {
"pre_tags": [
"<span style='color:"
],
"post_tags": [
""
],
"require_field_match": false,
"fields": {
"ligne3": {},
"numero": {},
"ext_courte": {},
"ext_longue": {},
"libelle_voie": {},
"libelle_voie_syn": {},
"libelle_ligne_5": {},
"libelle_acheminement": {},
"code_postal": {}
}
}
}
Voici les infos infra :
Mon index contient toutes les rues de France:
• 25 462 993 entrées
• Taille de l’indice de 10 Go
Nous avons 3 primaires et 1 répliquat sur un cluster de 6 serveurs (32 Go de RAM / 8cpu)
Nous avons appliqué toutes les recommandations elastic.co « tune-for-search-speed »
La configuration java est comme ceci :
root@ELK001:~# ps aux | grep java
elastic+ 45085 170 61.9 31052544 20460004 ? SLsl 03:18 860:27 /bin/java -Xms16g -Xmx16g -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=75 -XX:+UseCMSInitiatingOccupancyOnly -Des.networkaddress.cache.ttl=60 -Des.networkaddress.cache.negative.ttl=10 -XX:+AlwaysPreTouch -Xss1m -Djava.awt.headless=true -Dfile.encoding=UTF-8 -Djna.nosys=true -XX:-OmitStackTraceInFastThrow -Dio.netty.noUnsafe=true -Dio.netty.noKeySetOptimization=true -Dio.netty.recycler.maxCapacityPerThread=0 -Dlog4j.shutdownHookEnabled=false -Dlog4j2.disable.jmx=true -Djava.io.tmpdir=/tmp/elasticsearch-3573362637254362332 -XX:ErrorFile=/var/log/elasticsearch/hs_err_pid%p.log -Des.path.home=/usr/share/elasticsearch -Des.path.conf=/etc/elasticsearch -Des.distribution.flavor=default -Des.distribution.type=rpm -cp /usr/share/elasticsearch/lib/* org.elasticsearch.bootstrap.Elasticsearch -p /var/run/elasticsearch/elasticsearch.pid
Les requêtes les plus lentes sont celles qui utilisent des termes très fréquents dans l’indice out comme « 61 » ou « rue » (rue en français) « rue » est présent 13.287.097 fois plus de 25.462.993 enregistrements
Lorsque j’utilise le profileur, nous voyons que le temps est principalement consacré aux mots clés les plus courants de l’index : 1 (nombre de la rue) rue (rue en français) du (de en français) et dans la phase « build_scorer »
Type Auto temps Total Time % Temps
BooleanQuery +querystring:/parad./ +requête... 0.5ms 116.6ms 99.25%
MultiTermQueryConstantScoreWrapper requête:/1./ 52.0ms 52.0ms 44.25%
MultiTermQueryConstantScoreWrapper requête:/rue./ 38.1ms 38.1ms 32.45%
MultiTermQueryConstantScoreWrapper requête:/du./ 24.7ms 24.7ms 21.03%
MultiTermQueryConstantScoreWrapper requête:/parad.*/ 1.3ms 1.3ms 1.07%
Ventilation du calendrier
build_scorer 24,6 m 99,7 %
avance de 60,4μs 0,2 %
score 24.6μs 0.1%
create_weight 110,0ns 0,0%
match 0.0ns 0.0%
next_doc 0,0ns 0,0%
Cartographie de l’index :
{
"mapping": {
"proficDocument": {
"properties": {
"cea": {
"type": "keyword"
},
"querystring": {
"type": "text"
},
"querystring_ligne4": {
"type": "text"
},
"querystring_ligne6": {
"type": "text"
},
"code_afnor": {
"type": "keyword",
"index": false
},
"code_cedex": {
"type": "keyword",
"copy_to": [
"querystring"
]
},
"code_insee": {
"type": "keyword"
},
"code_insee_ancienne_commune": {
"type": "keyword"
},
"code_postal": {
"type": "keyword"
},
"commentaires": {
"type": "text"
},
"coordonnees": {
"type": "geo_point"
},
"dateExport": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dateRef": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"desc_voie": {
"type": "text",
"index": false
},
"desc_voie_syn": {
"type": "text",
"index": false
},
"ext_courte": {
"type": "keyword",
"copy_to": [
"querystring"
]
},
"ext_longue": {
"type": "keyword",
"copy_to": [
"querystring",
"querystring_ligne4"
]
},
"id": {
"type": "alias",
"path": "_id"
},
"idza": {
"type": "keyword",
"index": false
},
"libelle_acheminement": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring",
"querystring_ligne6",
"libelle_acheminement_str"
]
},
"libelle_acheminement_cedex": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_acheminement_str": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"libelle_commune": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_ligne_5": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_pays": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_pays_syn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"libelle_projection": {
"type": "keyword",
"index": false
},
"libelle_raison": {
"type": "keyword",
"index": false
},
"libelle_voie": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring",
"querystring_ligne4"
]
},
"libelle_voie_syn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne1": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne2": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"ligne3": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"querystring"
]
},
"matvoie": {
"type": "keyword",
"index": false
},
"matvoie_syn": {
"type": "keyword",
"index": false
},
"mention_speciale": {
"type": "text"
},
"mot_directeur": {
"type": "keyword",
"index": false
},
"mot_directeur_syn": {
"type": "keyword",
"index": false
},
"new_cea": {
"type": "keyword"
},
"numero": {
"type": "integer",
"copy_to": [
"querystring",
"querystring_ligne4",
"numero_str"
]
},
"numero_mention_speciale": {
"type": "text"
},
"numero_str": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"type_pays": {
"type": "keyword",
"index": false
},
"type_projection": {
"type": "integer",
"index": false
},
"type_raison": {
"type": "keyword",
"index": false
},
"type_synonyme": {
"type": "keyword",
"index": false
},
"type_voie": {
"type": "keyword",
"index": false
},
"type_voie_syn": {
"type": "keyword",
"index": false
},
"x": {
"type": "float"
},
"y": {
"type": "float"
}
}
}
}
}