Try to search with Synonyms in "Thai Word"


(Kittapas Jewjaitham) #1

I'm using HPE CM with Elastic Search 5.6.3. and try to config elastic for archive synonym search.

This is my command used to created index :

PUT /hpecm_dev
{
  "settings": {
    "analysis": {
      "filter": {
        "drm_synonyms": {
          "type": "synonym",
          "synonyms_path": "synonyms.txt",
          "tokenizer": "icu_tokenizer"
        }
      },
      "analyzer": {
        "thai_tokenizer": {
          "type": "custom",
          "tokenizer": "icu_tokenizer",
          "filter": ["lowercase", "drm_synonyms"]
        }
      }
    }
  },
  "mappings": {
    "user": {
      "properties": {
        "LoginDetails": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "SecurityKeys": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        }
      }
    },
    "record": {
      "properties": {
        "Category": {
          "type": "text",
          "analyzer": "thai_tokenizer",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "Classification": {
          "properties": {
            "Name": {
              "type": "text",
              "analyzer": "thai_tokenizer"
            },
            "uri": {
              "type": "long"
            }
          }
        },
        "Contacts": {
          "properties": {
            "Name": {
              "type": "text",
              "analyzer": "thai_tokenizer"
            },
            "uri": {
              "type": "long"
            }
          }
        },
        "Container": {
          "properties": {
            "Name": {
              "type": "text",
              "analyzer": "thai_tokenizer"
            },
            "uri": {
              "type": "long"
            }
          }
        },
        "Contents": {
          "properties": {
            "Document": {
              "properties": {
                "Content": {
                  "type": "text",
                  "fields": {
                    "exact": {
                      "type": "text",
                      "analyzer": "standard"
                    }
                  },
                  "analyzer": "thai_tokenizer"
                },
                "Path": {
                  "type": "text",
                  "fields": {
                    "keyword": {
                      "type": "keyword",
                      "ignore_above": 256
                    }
                  }
                }
              }
            }
          }
        },
        "ConversationId": {
          "type": "text"
        },
        "DatabaseId": {
          "type": "text"
        },
        "DateCreated": {
          "type": "date",
          "format": "yyyy-MM-dd HH:mm:ss"
        },
        "DateRegistered": {
          "type": "date",
          "format": "yyyy-MM-dd HH:mm:ss"
        },
        "DeleteThisRecordAfter50Days": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "Extension": {
          "type": "text"
        },
        "ExternalReference": {
          "type": "text"
        },
        "FolderID": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "Notes": {
          "type": "text",
          "analyzer": "thai_tokenizer"
        },
        "Number": {
          "type": "text"
        },
        "OriginatedFrom": {
          "properties": {
            "Name": {
              "type": "text",
              "analyzer": "thai_tokenizer"
            },
            "uri": {
              "type": "long"
            }
          }
        },
        "OwnerLocation": {
          "properties": {
            "Name": {
              "type": "text",
              "analyzer": "thai_tokenizer"
            },
            "uri": {
              "type": "long"
            }
          }
        },
        "PartID": {
          "type": "text",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "SecurityLocks": {
          "type": "text"
        },
        "Title": {
          "type": "text",
          "analyzer": "thai_tokenizer"
        },
        "uri": {
          "type": "long"
        }
      }
    }
  }
}

And I already put "synonyms.txt" at D:\Program Files\Elastic\Elasticsearch\config. For the example synonym word that i put in .txt file as the following :

ธนาคารแห่งประเทศไทย,ธปท.,Bank of Thailand,BOT,แบงค์ชาติ
สกุลเงินดิจิทัล,Cryptocurrency,Cryptocurrencies,Cryptocurrencies,คริปโตเคอเรนซี

So, Please help or suggest me some solution.

Thanks a lot


(Christoph) #2

What might your problem be?