How to create indexes in bulk using Elasticseach_dsl?

Hi,

I was able to create a single index using below code:
Single index code

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Index,Search,Document, analyzer, tokenizer,\
                           connections,Mapping, Nested, Text, Keyword, InnerDoc
from elasticsearch_dsl.field import Text

connections.create_connection(hosts=['localhost'])

my_analyzer = analyzer( \
    "my_analyzer", \
    type = "custom", \
    tokenizer=tokenizer("whitespace"),\
    filter=['lowercase'])

index1 = Index('myindex')
index1.analyzer(my_analyzer)
index1.create()

class File(InnerDoc):
    filename = Keyword(store=True)

class ResumeIndex(Document):
    content = Text(analyzer = my_analyzer)
    file = Nested()

    class Index:
        name = "myindex"

ResumeIndex.init()
response = ResumeIndex._index.analyze(body={'analyzer':'my_analyzer', 'text': 'c# developer'})
print(response)

The mapping output for single index code is: (BEFORE REINDEXING USING FSCRAWLER)

{
  "myindex" : {
    "mappings" : {
      "properties" : {
        "content" : {
          "type" : "text",
          "analyzer" : "my_analyzer"
        },
        "file" : {
          "type" : "nested",
          "properties" : {
            "filename" : {
              "type" : "keyword",
              "store" : true
            }
          }
        }
      }
    }
  }
}

After this i am re-indexing using FSCrawler.
But I need to create multiple indexes for different resources.

I am trying the below code but when I check the mapping, the index is not quite same as with the single index code.

from elasticsearch import Elasticsearch
from elasticsearch_dsl import Index,Search,Document, analyzer, tokenizer,\
                           connections,Mapping, Nested, Text, Keyword, InnerDoc
from elasticsearch_dsl.field import Text
from pprint import pprint

connections.create_connection(hosts=['localhost'])

my_analyzer = analyzer( \
    "my_analyzer", \
    type = "custom", \
    tokenizer=tokenizer("whitespace"),\
    filter=['lowercase'])

index = ["index_trial1", "index_trial2"]

class File(InnerDoc):
    filename = Keyword(store=True)

class ResumeIndex(Document):
    content = Text(analyzer = my_analyzer)
    file = Nested(File)

    class Index:
        name = "myindex"

def createIndex():
    indeces = []
    #global my_analyzer
    for ind in index:
        index_dummy = Index(ind)
        index_dummy.analyzer(my_analyzer)
        index_dummy.create()

        R = ResumeIndex()
        R.Index.name = ind
        R.init()
        #indeces.append(R.Index.name)
        #print("indeces", indeces)
        #response = R._index.analyze(body={'analyzer':'my_analyzer', 'text': 'c# developer'})
        #print(response)

if __name__ == "__main__":
    createIndex()

It is creating index_trial1, and index_trial2 but the mappings are empty.
the mapping output for the multiple index code:(BEFORE REINDEXING)

`GET /index_trial1/_mapping` 
{
  "index_trial1" : {
    "mappings" : { }
  }
}

Could anyone tell me how to create multiple indexes?

-Lisa

1 Like

Hello,

Below worked for me!

    from elasticsearch import Elasticsearch
    from elasticsearch_dsl import Index, Search, Document, analyzer, tokenizer, \
    connections, Mapping, Nested, Text, Keyword, InnerDoc
    from elasticsearch_dsl.field import Text
    from pprint import pprint

    es =connections.create_connection(hosts=['localhost'])

    my_analyzer = analyzer( \
    "my_analyzer", \
    type="custom", \
    tokenizer=tokenizer("whitespace"), \
    filter=['lowercase'])

    #N = 2  # no.of indexes to create
    index = ["index_trial1", "index_trial2", "index_trial3", "index_trial4"]

    def createIndex():

    m = Mapping()
    m.field('content', 'text', analyzer=my_analyzer)
    file = Nested( properties={ \
                       'filename': Keyword(store= True)})
    m.field('file', file)

    for ind in index:
        if es.indices.exists(index=ind):
            print(str(ind)+" exists already!!")
        else:
            m.save(ind)

    if __name__ == "__main__":
    createIndex()

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.