Hi!
I'm in the process of indexing a couple of millions of documents, and the
requirements for the search are:
- Be able to search for documents in a case-insensitive way
- Be able to search for documents in a case-sensitive way
- Be able to search using wildcards (also in phrases)
As far as I understand, to achieve case-insensitivity, I should have two
indexes; one lower-cased, and one where everything is stored as it is.
So far, I've tried the following code:
#!/usr/bin/env perl
use strict;
use warnings;
use Data::Dumper;
use utf8;
use ElasticSearch;
my $ES = ElasticSearch->new(
servers => [ 'foobar.com:9200' ],
transport => 'httplite',
);
$ES->delete_index(
index => 'foobar',
ignore_missing => 1,
);
$ES->create_index(
index => 'foobar',
settings => {
number_of_shards => 1,
number_of_replicas => 0,
analysis => {
analyzer => {
complex => {
type => 'custom',
tokenizer => 'keyword',
filter => [ 'trim' ],
char_filter => [ 'html_strip' ],
stopwords => [],
},
},
},
},
mappings => {
article => {
properties => {
id => { type => 'integer' },
body => { type => 'string', analyzer => 'complex' },
},
},
},
);
$ES->index(
index => 'foobar',
type => 'document',
refresh => 1,
id => 1,
data => {
id => 1,
body => 'Mexico's military has notched an important success for
President Felipe Calderon with the arrest of Jorge Eduardo Costilla
Sanchez, boss of the powerful Gulf drug cartel.',
},
);
my $res = $ES->searchqs(
index => 'foobar',
type => 'document',
fields => [ 'body' ],
q => '"Felipe Calder*"',
analyze_wildcard => 1,
explain => 1,
analyzer => 'complex',
);
print Dumper( $res );
...but this doesn't work. Any idea how I should proceed?
Thanks in advance!
--