Searching with & without spaces


(iluvcode) #1

I am using Elasticsearch 5.3 and would like to get the same results if the words have space or without space.
example : Some of the documents have "SM 58" (with space) and some have "SM58" (without space). So when I search for "SM58"/"SM 58", I would like to get both the records. Is it possible to do this with Elasticsearch?


(Lars van der Spek) #2

You could type "SM 58" OR "SM58" in the searchbar. The same query can be applied to a filter if you like.


(iluvcode) #3

@lars.vanderspek, what filter are you referring to?


(Lars van der Spek) #4

I made the assumption you are using Kibana. If you are, you can use a filter to query data in a certain field. If you look for your text in the _id field, for example, it would look like this:

image


(iluvcode) #5

I am not using Kibana. This is for a public facing website. There are multiple scenarios where we have words with/without spaces. Is there any analyzer that i can use?


(Lars van der Spek) #6

How are you accessing your data now? Have you formulated any other queries through your current method?


(iluvcode) #7

This is how i created mapping. Shortdescription & Metawords fields have words with or without spaces but the context is same. So when customer searched for SM 58 or SM58, I would like to get all the documents regardless of whitespace.
{
"settings": {
"analysis": {
"analyzer": {
"pluralwordsAnalyzer": {
"type": "snowball",
"language": "English"
},
"htmlanalyzer": {
"type": "custom",
"char_filter": ["html_strip"],
"tokenizer": "standard"
},
"trigram": {
"type": "custom",
"filter": ["standard", "shingle"],
"tokenizer": "standard"
},
"reverse": {
"type": "custom",
"filter": ["standard", "reverse"],
"tokenizer": "standard"
}
},
"filter": {
"shingle": {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 20
}
},
"normalizer": {
"lowercasenormalizer": {
"type": "custom",
"filter": ["lowercase", "asciifolding"]
}
}
}
},
"mappings": {
"smartsiteitemsearchentry": {
"properties": {
"itemId": {
"type": "integer"
},
"shortDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "lowercasenormalizer"
},
"snowball": {
"type": "text",
"analyzer": "pluralwordsAnalyzer",
"search_analyzer": "pluralwordsAnalyzer"
}
},
"index": true
},
"itemSnapLongDesc": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
},
"longDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "lowercasenormalizer"
},
"snowball": {
"type": "text",
"analyzer": "htmlanalyzer",
"search_analyzer": "pluralwordsAnalyzer"
}
},
"index": true
},
"itemType": {
"type": "integer"
},
"searchDesc": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
},
"keyWords": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "lowercasenormalizer"
},
"snowball": {
"type": "text",
"analyzer": "pluralwordsAnalyzer",
"search_analyzer": "pluralwordsAnalyzer"
}
},
"index": true
},
"specification": {
"type": "text",
"fields": {
"raw": {
"type": "keyword",
"normalizer": "lowercasenormalizer"
},
"snowball": {
"type": "text",
"analyzer": "htmlanalyzer",
"search_analyzer": "pluralwordsAnalyzer"
}
},
"index": true
},

				"itemGridSpecifications": {
					"properties": {
						"itemId": {
							"type": "integer"
						},
						"gridItemId": {
							"type": "integer"
						},
						"valueId": {
							"type": "integer"
						},
						"shortDescriptionSpecification": {
							"fields": {
								"keyword": {
									"ignore_above": 256,
									"type": "keyword"
								}
							},
							"type": "text"
						},
						"description": {
							"fields": {
								"keyword": {
									"ignore_above": 256,
									"type": "keyword"
								}
							},
							"type": "text"
						},
						"displayInSearchFilter": {
							"type": "boolean"
						},
						"value": {
							"fields": {
								"keyword": {
									"ignore_above": 256,
									"type": "keyword"
								}
							},
							"type": "text"
						},
						"seq": {
							"type": "integer"
						}
					},
					"type": "object"
				},
				"itemAttributes": {
					"properties": {
						"itemId": {
							"type": "integer"
						},
						"shortDescriptionAttribute": {
							"fields": {
								"keyword": {
									"ignore_above": 256,
									"type": "keyword"
								}
							},
							"type": "text"
						},
						"description": {
							"fields": {
								"keyword": {
									"ignore_above": 256,
									"type": "keyword"
								}
							},
							"type": "text"
						},
						"attributeValueId": {
							"type": "integer"
						},
						"value": {
							"fields": {
								"keyword": {
									"ignore_above": 256,
									"type": "keyword"
								}
							},
							"type": "text"
						},
						"displayInSearch": {
							"type": "boolean"
						},
						"hasRange": {
							"type": "boolean"
						},
						"sequence": {
							"type": "integer"
						}
					},
					"type": "object"
				},
			
				
				"id": {
					"type": "long"
				},
				"hasMoreRecord": {
					"type": "keyword"
				},
				"contentTypeId": {
					"type": "keyword"
				},
				"token": {
					"type": "keyword"
				},
				"itemAttributes.shortDescriptionAttribute": {
					"type": "text",
					"fields": {
						"raw": {
							"type": "keyword",
							"normalizer": "lowercasenormalizer"
						},
						"snowball": {
							"type": "text",
							"analyzer": "htmlanalyzer",
							"search_analyzer": "pluralwordsAnalyzer"
						}
					},
					"index": true
				},
				"itemGridSpecifications.shortDescriptionSpecification": {
					"type": "text",
					"fields": {
						"raw": {
							"type": "keyword",
							"normalizer": "lowercasenormalizer"
						},
						"snowball": {
							"type": "text",
							"analyzer": "htmlanalyzer",
							"search_analyzer": "pluralwordsAnalyzer"
						}
					},
					"index": true
				}
			}
		}
	}
}

(system) #8

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.