Quantcast

[0.20.4] Java API for index creation(CreateIndexRequestBuilder) doesn't work?

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[0.20.4] Java API for index creation(CreateIndexRequestBuilder) doesn't work?

Ross Martin
I've been having difficulties with creating custom ES indices with the Java API. At this point, I'm going to write-off the Java API and just use the REST endpoints for index management. Please let me know if you see anything wrong with my process below:


We create the index + settings where client is a TransportClient:
CreateIndexRequestBuilder builder = client.admin().indices().prepareCreate("files");
builder
.setSettings(settings);
CreateIndexResponse response =  builder.execute().actionGet();


settings is a String variable that contains this:
{   "settings" : {
     
"analysis" : {
         
"analyzer" : {
           
"filename_search" : {
               
"tokenizer" : "filename",
               
"filter" : ["lowercase"]
           
},
           
"filename_index" : {
               
"tokenizer" : "filename",
               
"filter" : ["lowercase","edge_ngram"]
           
}
         
},
         
"tokenizer" : {
           
"filename" : {
               
"pattern" : "[^\\p{L}\\d]+",
               
"type" : "pattern"
           
}
         
},
         
"filter" : {
           
"edge_ngram" : {
               
"side" : "front",
               
"max_gram" : 20,
               
"min_gram" : 1,
               
"type" : "edgeNGram"
           
}
         
}
     
}
   
}
}


After executing this Java code, I want to verify that the index is created with the above settings:
 curl -XGET localhost:9200/files/_settings?pretty=true
{
 
"files" : {
   
"settings" : {
     
"index.settings.analysis.filter.edge_ngram.side" : "front",
     
"index.settings.analysis.filter.edge_ngram.max_gram" : "20",
     
"index.settings.analysis.filter.edge_ngram.type" : "edgeNGram",
     
"index.settings.analysis.tokenizer.filename.pattern" : "[^\\p{L}\\d]+",
     
"index.settings.analysis.analyzer.filename_index.filter.1" : "edge_ngram",
     
"index.settings.analysis.analyzer.filename_index.filter.0" : "lowercase",
     
"index.settings.analysis.analyzer.filename_index.tokenizer" : "filename",
     
"index.settings.analysis.analyzer.filename_search.tokenizer" : "filename",
     
"index.settings.analysis.analyzer.filename_search.filter.0" : "lowercase",
     
"index.settings.analysis.filter.edge_ngram.min_gram" : "1",
     
"index.settings.analysis.tokenizer.filename.type" : "pattern",
     
"index.number_of_shards" : "5",
     
"index.number_of_replicas" : "1",
     
"index.version.created" : "200499"
   
}
 
}
}



Looks good, right? Let's test the analyzer:
curl -XGET 'http://localhost:9200/files/_analyze?pretty=1&text=My_first_file_2012.01.13.doc&analyzer=filename_search'
{
 
"error" : "ElasticSearchIllegalArgumentException[failed to find analyzer [filename_search]]",
 
"status" : 400
}



Hmm, didn't work. Let's blow this index away and recreate it using the REST api.
curl -XPUT 'http://localhost:9200/files/?pretty=1'  -d '

> {
>    "settings" : {
>       "analysis" : {
>          "analyzer" : {
>             "filename_search" : {
>                "tokenizer" : "filename",
>                "filter" : ["lowercase"]
>             },
>             "filename_index" : {
>                "tokenizer" : "filename",
>                "filter" : ["lowercase","edge_ngram"]
>             }
>          },
>          "tokenizer" : {
>             "filename" : {
>                "pattern" : "[^\\p{L}\\d]+",
>                "type" : "pattern"
>             }
>          },
>          "filter" : {
>             "edge_ngram" : {
>                "side" : "front",
>                "max_gram" : 20,
>                "min_gram" : 1,
>                "type" : "edgeNGram"
>             }
>          }
>       }
>    }
> }
> '
{
 
"ok" : true,
 
"acknowledged" : true
}



And we'll verify that the settings look the same as they did when we used the Java API, except for some values in a different order:
 curl -XGET localhost:9200/files/_settings?pretty=true
{
 
"files" : {
   
"settings" : {
     
"index.analysis.analyzer.filename_search.tokenizer" : "filename",
     
"index.analysis.filter.edge_ngram.side" : "front",
     
"index.analysis.filter.edge_ngram.type" : "edgeNGram",
     
"index.analysis.analyzer.filename_index.filter.0" : "lowercase",
     
"index.analysis.analyzer.filename_search.filter.0" : "lowercase",
     
"index.analysis.analyzer.filename_index.filter.1" : "edge_ngram",
     
"index.analysis.analyzer.filename_index.tokenizer" : "filename",
     
"index.analysis.filter.edge_ngram.max_gram" : "20",
     
"index.analysis.filter.edge_ngram.min_gram" : "1",
     
"index.analysis.tokenizer.filename.type" : "pattern",
     
"index.analysis.tokenizer.filename.pattern" : "[^\\p{L}\\d]+",
     
"index.number_of_shards" : "5",
     
"index.number_of_replicas" : "1",
     
"index.version.created" : "200499"
   
}
 
}
}



Finally, we see the expected result after recreating the index with the REST api:

curl -XGET 'http://localhost:9200/files/_analyze?pretty=1&text=My_first_file_2012.01.13.doc&analyzer=filename_search'
{
 
"tokens" : [ {
   
"token" : "my",
   
"start_offset" : 0,
   
"end_offset" : 2,
   
"type" : "word",
   
"position" : 1
 
}, {
   
"token" : "first",
   
"start_offset" : 3,
   
"end_offset" : 8,
   
"type" : "word",
   
"position" : 2
 
}, {
   
"token" : "file",
   
"start_offset" : 9,
   
"end_offset" : 13,
   
"type" : "word",
   
"position" : 3
 
}, {
   
"token" : "2012",
   
"start_offset" : 14,
   
"end_offset" : 18,
   
"type" : "word",
   
"position" : 4
 
}, {
   
"token" : "01",
   
"start_offset" : 19,
   
"end_offset" : 21,
   
"type" : "word",
   
"position" : 5
 
}, {
   
"token" : "13",
   
"start_offset" : 22,
   
"end_offset" : 24,
   
"type" : "word",
   
"position" : 6
 
}, {
   
"token" : "doc",
   
"start_offset" : 25,
   
"end_offset" : 28,
   
"type" : "word",
   
"position" : 7
 
} ]
}


--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to [hidden email].
For more options, visit https://groups.google.com/groups/opt_out.
 
 
Loading...