|
|
I've been having difficulties with creating custom ES indices with the Java API. At this point, I'm going to write-off the Java API and just use the REST endpoints for index management. Please let me know if you see anything wrong with my process below:
We create the index + settings where client is a TransportClient: CreateIndexRequestBuilder builder = client.admin().indices().prepareCreate("files"); builder.setSettings(settings); CreateIndexResponse response = builder.execute().actionGet();
settings is a String variable that contains this: { "settings" : { "analysis" : { "analyzer" : { "filename_search" : { "tokenizer" : "filename", "filter" : ["lowercase"] }, "filename_index" : { "tokenizer" : "filename", "filter" : ["lowercase","edge_ngram"] } }, "tokenizer" : { "filename" : { "pattern" : "[^\\p{L}\\d]+", "type" : "pattern" } }, "filter" : { "edge_ngram" : { "side" : "front", "max_gram" : 20, "min_gram" : 1, "type" : "edgeNGram" } } } } }
After executing this Java code, I want to verify that the index is created with the above settings: curl -XGET localhost:9200/files/_settings?pretty=true { "files" : { "settings" : { "index.settings.analysis.filter.edge_ngram.side" : "front", "index.settings.analysis.filter.edge_ngram.max_gram" : "20", "index.settings.analysis.filter.edge_ngram.type" : "edgeNGram", "index.settings.analysis.tokenizer.filename.pattern" : "[^\\p{L}\\d]+", "index.settings.analysis.analyzer.filename_index.filter.1" : "edge_ngram", "index.settings.analysis.analyzer.filename_index.filter.0" : "lowercase", "index.settings.analysis.analyzer.filename_index.tokenizer" : "filename", "index.settings.analysis.analyzer.filename_search.tokenizer" : "filename", "index.settings.analysis.analyzer.filename_search.filter.0" : "lowercase", "index.settings.analysis.filter.edge_ngram.min_gram" : "1", "index.settings.analysis.tokenizer.filename.type" : "pattern", "index.number_of_shards" : "5", "index.number_of_replicas" : "1", "index.version.created" : "200499" } } }
Looks good, right? Let's test the analyzer: curl -XGET 'http://localhost:9200/files/_analyze?pretty=1&text=My_first_file_2012.01.13.doc&analyzer=filename_search' { "error" : "ElasticSearchIllegalArgumentException[failed to find analyzer [filename_search]]", "status" : 400 }
Hmm, didn't work. Let's blow this index away and recreate it using the REST api. curl -XPUT 'http://localhost:9200/files/?pretty=1' -d ' > { > "settings" : { > "analysis" : { > "analyzer" : { > "filename_search" : { > "tokenizer" : "filename", > "filter" : ["lowercase"] > }, > "filename_index" : { > "tokenizer" : "filename", > "filter" : ["lowercase","edge_ngram"] > } > }, > "tokenizer" : { > "filename" : { > "pattern" : "[^\\p{L}\\d]+", > "type" : "pattern" > } > }, > "filter" : { > "edge_ngram" : { > "side" : "front", > "max_gram" : 20, > "min_gram" : 1, > "type" : "edgeNGram" > } > } > } > } > } > ' { "ok" : true, "acknowledged" : true }
And we'll verify that the settings look the same as they did when we used the Java API, except for some values in a different order: curl -XGET localhost:9200/files/_settings?pretty=true { "files" : { "settings" : { "index.analysis.analyzer.filename_search.tokenizer" : "filename", "index.analysis.filter.edge_ngram.side" : "front", "index.analysis.filter.edge_ngram.type" : "edgeNGram", "index.analysis.analyzer.filename_index.filter.0" : "lowercase", "index.analysis.analyzer.filename_search.filter.0" : "lowercase", "index.analysis.analyzer.filename_index.filter.1" : "edge_ngram", "index.analysis.analyzer.filename_index.tokenizer" : "filename", "index.analysis.filter.edge_ngram.max_gram" : "20", "index.analysis.filter.edge_ngram.min_gram" : "1", "index.analysis.tokenizer.filename.type" : "pattern", "index.analysis.tokenizer.filename.pattern" : "[^\\p{L}\\d]+", "index.number_of_shards" : "5", "index.number_of_replicas" : "1", "index.version.created" : "200499" } } }
Finally, we see the expected result after recreating the index with the REST api:
curl -XGET 'http://localhost:9200/files/_analyze?pretty=1&text=My_first_file_2012.01.13.doc&analyzer=filename_search' { "tokens" : [ { "token" : "my", "start_offset" : 0, "end_offset" : 2, "type" : "word", "position" : 1 }, { "token" : "first", "start_offset" : 3, "end_offset" : 8, "type" : "word", "position" : 2 }, { "token" : "file", "start_offset" : 9, "end_offset" : 13, "type" : "word", "position" : 3 }, { "token" : "2012", "start_offset" : 14, "end_offset" : 18, "type" : "word", "position" : 4 }, { "token" : "01", "start_offset" : 19, "end_offset" : 21, "type" : "word", "position" : 5 }, { "token" : "13", "start_offset" : 22, "end_offset" : 24, "type" : "word", "position" : 6 }, { "token" : "doc", "start_offset" : 25, "end_offset" : 28, "type" : "word", "position" : 7 } ] }
--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to [hidden email].
For more options, visit https://groups.google.com/groups/opt_out.
|