Can someone suggest a strategy for importing json results into ElasticSearch from the AntWeb.org API: https://www.antweb.org/api.do
example json (of 630,000 records):
"specimens": [
{
"antwebTaxonName": "myrmicinaetetramorium densopilosum",
"basisOfRecord": "preserved specimen",
"biogeographicregion": "Palearctic",
"bioregion": "Palearctic",
"caste": "queen",
"classVal": "insecta",
"code": "antweb1008964",
"collectedby": "A. Radchenko",
"collectionCode": "ANTWEB",
"country": "Armenia",
"dateCollected": "1986-06-14",
"dateIdentified": null,
"dctermsModified": "Thu, 22 Mar 2018 13:03:33 GMT",
"decimalLatitude": null,
"decimalLongitude": null,
"family": "formicidae",
"fieldNotes": null,
"fieldNumber": "ANTC43083",
"fossil": false,
"genus": "Tetramorium",
"georeferenceRemarks": null,
"habitat": null,
"habitats": null,
"higherClassification": "animalia;arthropoda;insecta;hymenoptera;hymenoptera;formicidae;myrmicinae",
"identifiedBy": null,
"institutionCode": "CAS",
"intraspecificEpithet": null,
"kingdom": "animalia",
"locality": "Khosrov reserve",
"locatedat": "SIZK",
"locationRemark": null,
"microhabitat": null,
"minimumEleationInMeters": null,
"museum": null,
"nomenclaturalCode": "ICZN",
"occurrenceId": "CAS:ANTWEB:antweb1008964",
"occurrenceRemarks": "237-86",
"order": "hymenoptera",
"ownedby": "SIZK, Kiev, Ukraine",
"ownerInstitutionCode": "SIZK, Kiev, Ukraine",
"phylum": "arthropoda",
"preparations": "pin",
"recordedBy": "A. Radchenko",
"samplingProtocol": null,
"scientificName": "tetramorium densopilosum",
"sex": "1 dealate queen",
"specificEpithet": "densopilosum",
"stateProvince": null,
"status": "valid",
"subfamily": "myrmicinae",
"subgenus": null,
"type": "paratype of Tetramorium densopilosum",
"verbatimEventDate": "14 Jun 1986"
},
Note the api results are not in ndjson format. Should I first format with jq then use curl or would you suggest using a python script. I also realize I need to map the index and not sure how that should be part of the workflow from api to elasticsearch.