Bulk Uploading

classic Classic list List threaded Threaded
11 messages Options
Reply | Threaded
Open this post in threaded view
|

Bulk Uploading

prashant5375
Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
                // TODO Auto-generated method stub

                 Node node = nodeBuilder().local(true).
                                        settings(ImmutableSettings.settingsBuilder().
                                        put("index.number_of_shards", 1).
                                        put("index.number_of_replicas", 1).
                                        build()).build().start();
                Client client = node.client();
                         String mapping =
XContentFactory.jsonBuilder().startObject().startObject("type1")
                                    .startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
                                    .endObject().endObject().string();
       
client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
                BulkRequestBuilder brb = client.prepareBulk();

                  FileInputStream fstream = new FileInputStream("e:\\yp_CA.txt");
                  // Get the object of DataInputStream
                  DataInputStream in = new DataInputStream(fstream);
                  BufferedReader br = new BufferedReader(new InputStreamReader(in));
                  String strLine;
                  long start=System.currentTimeMillis();
                  int i=0;
                  //Read File Line By Line
                  try{
                          XContentBuilder person = null;
                  while ((strLine = br.readLine()) != null)   {
                          String tmp[] =strLine.replaceAll("\"", "").split("~\\^\\~");
                                //  System.out.println(tmp[4].split("\\|"));
                                  String mergeField[]=tmp[4].split("\\|");
                                  String lati = mergeField[5];
                                  String longi = mergeField[6];
                                  i++;
                                 // System.out.println(i);
                          brb.add(client.prepareIndex("test", "type1", i+"")
                                                .setSource(jsonBuilder().startObject()
                                                    // .field("MERGE_FIELD", tmp[4])
                                                     .field("MAPPED_FSN", tmp[0].replaceAll("\"", ""))
                                                     .startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
                                                     .endObject()));


                                  person = jsonBuilder().startObject()

                                                .field("gender", tmp[4]) //aleaList(genders) returns "M"
or "F"
                                                // [....] creation of a random person with some
attributes
                                                .endObject();


                                  //brb.execute().actionGet();
                                //  brb.setRefresh(true);

                                  brb.add(client.prepareIndex("toto", "tata")
                .setRefresh(false)
                .setReplicationType(ReplicationType.ASYNC)
                                                .setSource(person));
          if(i==100000){
                                  i=0;
                                brb.execute().actionGet();


                                 // break;
                          }




                  }

                  brb.execute().actionGet();
                  }catch(Exception e){
                          brb.execute().actionGet();
                  }
                  brb.execute().actionGet();




                System.out.println("done!!!");

        }


After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant

Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

Craig Brown
I'd probably try inserting something less that 100k records at a time. We usually do blocks of 10K at a time. Even with multiple threads running, I haven't seen any problems over many millions of records.

  - Craig

On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <[hidden email]> wrote:
Hi , there
I have just started using Elastic Search , my problem is i have to
insert 50 million of data rows in the Elastic Search . While doing
this i have used such code in the loop.
public static void main(String[] args) throws Exception {
               // TODO Auto-generated method stub

                Node node = nodeBuilder().local(true).
                                       settings(ImmutableSettings.settingsBuilder().
                                       put("index.number_of_shards", 1).
                                       put("index.number_of_replicas", 1).
                                       build()).build().start();
               Client  client = node.client();
                        String mapping =
XContentFactory.jsonBuilder().startObject().startObject("type1")
                                   .startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
                                   .endObject().endObject().string();

client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
               BulkRequestBuilder brb = client.prepareBulk();

                 FileInputStream fstream = new FileInputStream("e:\\yp_CA.txt");
                 // Get the object of DataInputStream
                 DataInputStream in = new DataInputStream(fstream);
                 BufferedReader br = new BufferedReader(new InputStreamReader(in));
                 String strLine;
                 long start=System.currentTimeMillis();
                 int i=0;
                 //Read File Line By Line
                 try{
                         XContentBuilder person = null;
                 while ((strLine = br.readLine()) != null)   {
                         String tmp[] =strLine.replaceAll("\"", "").split("~\\^\\~");
                               //  System.out.println(tmp[4].split("\\|"));
                                 String mergeField[]=tmp[4].split("\\|");
                                 String lati = mergeField[5];
                                 String longi = mergeField[6];
                                 i++;
                                // System.out.println(i);
                         brb.add(client.prepareIndex("test", "type1", i+"")
                                               .setSource(jsonBuilder().startObject()
                                                   // .field("MERGE_FIELD", tmp[4])
                                                    .field("MAPPED_FSN", tmp[0].replaceAll("\"", ""))
                                                    .startObject("location").field("lat",
Double.parseDouble(lati)).field("lon",
Double.parseDouble(longi)).endObject()
                                                    .endObject()));


                                 person = jsonBuilder().startObject()

                                               .field("gender", tmp[4]) //aleaList(genders) returns "M"
or "F"
                                               // [....] creation of a random person with some
attributes
                                               .endObject();


                                 //brb.execute().actionGet();
                               //  brb.setRefresh(true);

                                 brb.add(client.prepareIndex("toto", "tata")
                                               .setRefresh(false)
                                               .setReplicationType(ReplicationType.ASYNC)
                                               .setSource(person));
         if(i==100000){
                                 i=0;
                               brb.execute().actionGet();


                                // break;
                         }




                 }

                 brb.execute().actionGet();
                 }catch(Exception e){
                         brb.execute().actionGet();
                 }
                 brb.execute().actionGet();




               System.out.println("done!!!");

       }


After every 100000 record i call "brb.execute().actionGet();" to get
data inserted to the ElasticSearch indexes.
But after 100000 record inserted when it goes to insert another 100000
record its goes OutOfMemory Heap size error.
My question where i am wrong .And can any one share complete java
example to insert large data in ES.
Thanks in advance
Regards
Prashant




--
 
CRAIG BROWN
chief architect
youwho, Inc.

 

T:  801.855. 0921
M: 801.913. 0939


Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

prashant5375
Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call   "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action  " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:

> I'd probably try inserting something less that 100k records at a time. We
> usually do blocks of 10K at a time. Even with multiple threads running, I
> haven't seen any problems over many millions of records.
>
>   - Craig
>
> On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <[hidden email]>wrote:
>
>
>
>
>
>
>
>
>
> > Hi , there
> > I have just started using Elastic Search , my problem is i have to
> > insert 50 million of data rows in the Elastic Search . While doing
> > this i have used such code in the loop.
> > public static void main(String[] args) throws Exception {
> >                // TODO Auto-generated method stub
>
> >                 Node node = nodeBuilder().local(true).
>
> >  settings(ImmutableSettings.settingsBuilder().
> >                                        put("index.number_of_shards", 1).
> >                                        put("index.number_of_replicas", 1).
> >                                        build()).build().start();
> >                Client  client = node.client();
> >                         String mapping =
> > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> >  .startObject("properties").startObject("location").field("type",
> > "geo_point").field("lat_lon", true).endObject().endObject()
> >                                    .endObject().endObject().string();
>
> > client.admin().indices().prepareCreate("test").addMapping("type1",
> > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > "1")).execute().actionGet();
> >                BulkRequestBuilder brb = client.prepareBulk();
>
> >                  FileInputStream fstream = new
> > FileInputStream("e:\\yp_CA.txt");
> >                  // Get the object of DataInputStream
> >                  DataInputStream in = new DataInputStream(fstream);
> >                  BufferedReader br = new BufferedReader(new
> > InputStreamReader(in));
> >                  String strLine;
> >                  long start=System.currentTimeMillis();
> >                  int i=0;
> >                  //Read File Line By Line
> >                  try{
> >                          XContentBuilder person = null;
> >                  while ((strLine = br.readLine()) != null)   {
> >                          String tmp[] =strLine.replaceAll("\"",
> > "").split("~\\^\\~");
> >                                //  System.out.println(tmp[4].split("\\|"));
> >                                  String mergeField[]=tmp[4].split("\\|");
> >                                  String lati = mergeField[5];
> >                                  String longi = mergeField[6];
> >                                  i++;
> >                                 // System.out.println(i);
> >                          brb.add(client.prepareIndex("test", "type1", i+"")
>
> >  .setSource(jsonBuilder().startObject()
> >                                                    //
> > .field("MERGE_FIELD", tmp[4])
> >                                                     .field("MAPPED_FSN",
> > tmp[0].replaceAll("\"", ""))
>
> > .startObject("location").field("lat",
> > Double.parseDouble(lati)).field("lon",
> > Double.parseDouble(longi)).endObject()
> >                                                     .endObject()));
>
> >                                  person = jsonBuilder().startObject()
>
> >                                                .field("gender", tmp[4])
> > //aleaList(genders) returns "M"
> > or "F"
> >                                                // [....] creation of a
> > random person with some
> > attributes
> >                                                .endObject();
>
> >                                  //brb.execute().actionGet();
> >                                //  brb.setRefresh(true);
>
> >                                  brb.add(client.prepareIndex("toto",
> > "tata")
> >                                                .setRefresh(false)
>
> >  .setReplicationType(ReplicationType.ASYNC)
> >                                                .setSource(person));
> >          if(i==100000){
> >                                  i=0;
> >                                brb.execute().actionGet();
>
> >                                 // break;
> >                          }
>
> >                  }
>
> >                  brb.execute().actionGet();
> >                  }catch(Exception e){
> >                          brb.execute().actionGet();
> >                  }
> >                  brb.execute().actionGet();
>
> >                System.out.println("done!!!");
>
> >        }
>
> > After every 100000 record i call "brb.execute().actionGet();" to get
> > data inserted to the ElasticSearch indexes.
> > But after 100000 record inserted when it goes to insert another 100000
> > record its goes OutOfMemory Heap size error.
> > My question where i am wrong .And can any one share complete java
> > example to insert large data in ES.
> > Thanks in advance
> > Regards
> > Prashant
>
> --
> …
> CRAIG BROWN
> chief architect
> youwho, Inc.
>
> *www.youwho.com*<http://www.youwho.com/>
>
> T:  801.855. 0921
> M: 801.913. 0939
Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

Berkay Mollamustafaoglu-2
What do you mean by "loose previous data". Do you mean you don't see it in the index? How do you check it, run a query? It may take a while for the docs to be indexed, depending on the size of your docs and resources available.

Even 10K is a lot. You don't have to do such large chunks with ES. Try starting with 100 docs, and see how the performance is.

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype


On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <[hidden email]> wrote:
Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call   "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action  " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
> I'd probably try inserting something less that 100k records at a time. We
> usually do blocks of 10K at a time. Even with multiple threads running, I
> haven't seen any problems over many millions of records.
>
>   - Craig
>
> On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <[hidden email]>wrote:
>
>
>
>
>
>
>
>
>
> > Hi , there
> > I have just started using Elastic Search , my problem is i have to
> > insert 50 million of data rows in the Elastic Search . While doing
> > this i have used such code in the loop.
> > public static void main(String[] args) throws Exception {
> >                // TODO Auto-generated method stub
>
> >                 Node node = nodeBuilder().local(true).
>
> >  settings(ImmutableSettings.settingsBuilder().
> >                                        put("index.number_of_shards", 1).
> >                                        put("index.number_of_replicas", 1).
> >                                        build()).build().start();
> >                Client  client = node.client();
> >                         String mapping =
> > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> >  .startObject("properties").startObject("location").field("type",
> > "geo_point").field("lat_lon", true).endObject().endObject()
> >                                    .endObject().endObject().string();
>
> > client.admin().indices().prepareCreate("test").addMapping("type1",
> > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > "1")).execute().actionGet();
> >                BulkRequestBuilder brb = client.prepareBulk();
>
> >                  FileInputStream fstream = new
> > FileInputStream("e:\\yp_CA.txt");
> >                  // Get the object of DataInputStream
> >                  DataInputStream in = new DataInputStream(fstream);
> >                  BufferedReader br = new BufferedReader(new
> > InputStreamReader(in));
> >                  String strLine;
> >                  long start=System.currentTimeMillis();
> >                  int i=0;
> >                  //Read File Line By Line
> >                  try{
> >                          XContentBuilder person = null;
> >                  while ((strLine = br.readLine()) != null)   {
> >                          String tmp[] =strLine.replaceAll("\"",
> > "").split("~\\^\\~");
> >                                //  System.out.println(tmp[4].split("\\|"));
> >                                  String mergeField[]=tmp[4].split("\\|");
> >                                  String lati = mergeField[5];
> >                                  String longi = mergeField[6];
> >                                  i++;
> >                                 // System.out.println(i);
> >                          brb.add(client.prepareIndex("test", "type1", i+"")
>
> >  .setSource(jsonBuilder().startObject()
> >                                                    //
> > .field("MERGE_FIELD", tmp[4])
> >                                                     .field("MAPPED_FSN",
> > tmp[0].replaceAll("\"", ""))
>
> > .startObject("location").field("lat",
> > Double.parseDouble(lati)).field("lon",
> > Double.parseDouble(longi)).endObject()
> >                                                     .endObject()));
>
> >                                  person = jsonBuilder().startObject()
>
> >                                                .field("gender", tmp[4])
> > //aleaList(genders) returns "M"
> > or "F"
> >                                                // [....] creation of a
> > random person with some
> > attributes
> >                                                .endObject();
>
> >                                  //brb.execute().actionGet();
> >                                //  brb.setRefresh(true);
>
> >                                  brb.add(client.prepareIndex("toto",
> > "tata")
> >                                                .setRefresh(false)
>
> >  .setReplicationType(ReplicationType.ASYNC)
> >                                                .setSource(person));
> >          if(i==100000){
> >                                  i=0;
> >                                brb.execute().actionGet();
>
> >                                 // break;
> >                          }
>
> >                  }
>
> >                  brb.execute().actionGet();
> >                  }catch(Exception e){
> >                          brb.execute().actionGet();
> >                  }
> >                  brb.execute().actionGet();
>
> >                System.out.println("done!!!");
>
> >        }
>
> > After every 100000 record i call "brb.execute().actionGet();" to get
> > data inserted to the ElasticSearch indexes.
> > But after 100000 record inserted when it goes to insert another 100000
> > record its goes OutOfMemory Heap size error.
> > My question where i am wrong .And can any one share complete java
> > example to insert large data in ES.
> > Thanks in advance
> > Regards
> > Prashant
>
> --
> …
> CRAIG BROWN
> chief architect
> youwho, Inc.
>
> *www.youwho.com*<http://www.youwho.com/>
>
> T:  <a href="tel:801.855.%200921" value="+18018550921">801.855. 0921
> M: <a href="tel:801.913.%200939" value="+18019130939">801.913. 0939

Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

Craig Brown
In reply to this post by prashant5375
Strange. Let me look through what you have and compare to what we're doing. Ours runs very stable and we're inserting as many as 10k records/sec for a number of hours.

  - Craig

On Fri, Jan 27, 2012 at 9:29 AM, BeyondLimit <[hidden email]> wrote:
Thanks for the reply. But i have another problem when i call "
brb.execute().actionGet(); " again i loose previous data what i
inserted previously.
I mean to say , Suppose i looped 10k record and then call   "
brb.execute().actionGet(); " and after that i try to insert another
10k by repeating the same action  " brb.execute().actionGet(); " , now
i loose previous data.
It will be great help if you can send me some sample Java Code.
Thanks in advance.
Regards
Prashant

On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
> I'd probably try inserting something less that 100k records at a time. We
> usually do blocks of 10K at a time. Even with multiple threads running, I
> haven't seen any problems over many millions of records.
>
>   - Craig
>
> On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <[hidden email]>wrote:
>
>
>
>
>
>
>
>
>
> > Hi , there
> > I have just started using Elastic Search , my problem is i have to
> > insert 50 million of data rows in the Elastic Search . While doing
> > this i have used such code in the loop.
> > public static void main(String[] args) throws Exception {
> >                // TODO Auto-generated method stub
>
> >                 Node node = nodeBuilder().local(true).
>
> >  settings(ImmutableSettings.settingsBuilder().
> >                                        put("index.number_of_shards", 1).
> >                                        put("index.number_of_replicas", 1).
> >                                        build()).build().start();
> >                Client  client = node.client();
> >                         String mapping =
> > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> >  .startObject("properties").startObject("location").field("type",
> > "geo_point").field("lat_lon", true).endObject().endObject()
> >                                    .endObject().endObject().string();
>
> > client.admin().indices().prepareCreate("test").addMapping("type1",
> > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > "1")).execute().actionGet();
> >                BulkRequestBuilder brb = client.prepareBulk();
>
> >                  FileInputStream fstream = new
> > FileInputStream("e:\\yp_CA.txt");
> >                  // Get the object of DataInputStream
> >                  DataInputStream in = new DataInputStream(fstream);
> >                  BufferedReader br = new BufferedReader(new
> > InputStreamReader(in));
> >                  String strLine;
> >                  long start=System.currentTimeMillis();
> >                  int i=0;
> >                  //Read File Line By Line
> >                  try{
> >                          XContentBuilder person = null;
> >                  while ((strLine = br.readLine()) != null)   {
> >                          String tmp[] =strLine.replaceAll("\"",
> > "").split("~\\^\\~");
> >                                //  System.out.println(tmp[4].split("\\|"));
> >                                  String mergeField[]=tmp[4].split("\\|");
> >                                  String lati = mergeField[5];
> >                                  String longi = mergeField[6];
> >                                  i++;
> >                                 // System.out.println(i);
> >                          brb.add(client.prepareIndex("test", "type1", i+"")
>
> >  .setSource(jsonBuilder().startObject()
> >                                                    //
> > .field("MERGE_FIELD", tmp[4])
> >                                                     .field("MAPPED_FSN",
> > tmp[0].replaceAll("\"", ""))
>
> > .startObject("location").field("lat",
> > Double.parseDouble(lati)).field("lon",
> > Double.parseDouble(longi)).endObject()
> >                                                     .endObject()));
>
> >                                  person = jsonBuilder().startObject()
>
> >                                                .field("gender", tmp[4])
> > //aleaList(genders) returns "M"
> > or "F"
> >                                                // [....] creation of a
> > random person with some
> > attributes
> >                                                .endObject();
>
> >                                  //brb.execute().actionGet();
> >                                //  brb.setRefresh(true);
>
> >                                  brb.add(client.prepareIndex("toto",
> > "tata")
> >                                                .setRefresh(false)
>
> >  .setReplicationType(ReplicationType.ASYNC)
> >                                                .setSource(person));
> >          if(i==100000){
> >                                  i=0;
> >                                brb.execute().actionGet();
>
> >                                 // break;
> >                          }
>
> >                  }
>
> >                  brb.execute().actionGet();
> >                  }catch(Exception e){
> >                          brb.execute().actionGet();
> >                  }
> >                  brb.execute().actionGet();
>
> >                System.out.println("done!!!");
>
> >        }
>
> > After every 100000 record i call "brb.execute().actionGet();" to get
> > data inserted to the ElasticSearch indexes.
> > But after 100000 record inserted when it goes to insert another 100000
> > record its goes OutOfMemory Heap size error.
> > My question where i am wrong .And can any one share complete java
> > example to insert large data in ES.
> > Thanks in advance
> > Regards
> > Prashant
>
> --
> …
> CRAIG BROWN
> chief architect
> youwho, Inc.
>
> *www.youwho.com*<http://www.youwho.com/>
>
> T:  <a href="tel:801.855.%200921" value="+18018550921">801.855. 0921
> M: <a href="tel:801.913.%200939" value="+18019130939">801.913. 0939



--
 
CRAIG BROWN
chief architect
youwho, Inc.

 

T:  801.855. 0921
M: 801.913. 0939


Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

prashant5375
In reply to this post by Berkay Mollamustafaoglu-2
I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant


On Jan 27, 9:37 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:

> What do you mean by "loose previous data". Do you mean you don't see it in
> the index? How do you check it, run a query? It may take a while for the
> docs to be indexed, depending on the size of your docs and resources
> available.
>
> Even 10K is a lot. You don't have to do such large chunks with ES. Try
> starting with 100 docs, and see how the performance is.
>
> Regards,
> Berkay Mollamustafaoglu
> mberkay on yahoo, google and skype
>
> On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <[hidden email]>wrote:
>
>
>
>
>
>
>
> > Thanks for the reply. But i have another problem when i call "
> > brb.execute().actionGet(); " again i loose previous data what i
> > inserted previously.
> > I mean to say , Suppose i looped 10k record and then call   "
> > brb.execute().actionGet(); " and after that i try to insert another
> > 10k by repeating the same action  " brb.execute().actionGet(); " , now
> > i loose previous data.
> > It will be great help if you can send me some sample Java Code.
> > Thanks in advance.
> > Regards
> > Prashant
>
> > On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
> > > I'd probably try inserting something less that 100k records at a time. We
> > > usually do blocks of 10K at a time. Even with multiple threads running, I
> > > haven't seen any problems over many millions of records.
>
> > >   - Craig
>
> > > On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <[hidden email]
> > >wrote:
>
> > > > Hi , there
> > > > I have just started using Elastic Search , my problem is i have to
> > > > insert 50 million of data rows in the Elastic Search . While doing
> > > > this i have used such code in the loop.
> > > > public static void main(String[] args) throws Exception {
> > > >                // TODO Auto-generated method stub
>
> > > >                 Node node = nodeBuilder().local(true).
>
> > > >  settings(ImmutableSettings.settingsBuilder().
> > > >                                        put("index.number_of_shards",
> > 1).
> > > >                                        put("index.number_of_replicas",
> > 1).
> > > >                                        build()).build().start();
> > > >                Client  client = node.client();
> > > >                         String mapping =
> > > > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> > > >  .startObject("properties").startObject("location").field("type",
> > > > "geo_point").field("lat_lon", true).endObject().endObject()
> > > >                                    .endObject().endObject().string();
>
> > > > client.admin().indices().prepareCreate("test").addMapping("type1",
> > > > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > > > "1")).execute().actionGet();
> > > >                BulkRequestBuilder brb = client.prepareBulk();
>
> > > >                  FileInputStream fstream = new
> > > > FileInputStream("e:\\yp_CA.txt");
> > > >                  // Get the object of DataInputStream
> > > >                  DataInputStream in = new DataInputStream(fstream);
> > > >                  BufferedReader br = new BufferedReader(new
> > > > InputStreamReader(in));
> > > >                  String strLine;
> > > >                  long start=System.currentTimeMillis();
> > > >                  int i=0;
> > > >                  //Read File Line By Line
> > > >                  try{
> > > >                          XContentBuilder person = null;
> > > >                  while ((strLine = br.readLine()) != null)   {
> > > >                          String tmp[] =strLine.replaceAll("\"",
> > > > "").split("~\\^\\~");
> > > >                                //
> >  System.out.println(tmp[4].split("\\|"));
> > > >                                  String
> > mergeField[]=tmp[4].split("\\|");
> > > >                                  String lati = mergeField[5];
> > > >                                  String longi = mergeField[6];
> > > >                                  i++;
> > > >                                 // System.out.println(i);
> > > >                          brb.add(client.prepareIndex("test", "type1",
> > i+"")
>
> > > >  .setSource(jsonBuilder().startObject()
> > > >                                                    //
> > > > .field("MERGE_FIELD", tmp[4])
>
> > .field("MAPPED_FSN",
> > > > tmp[0].replaceAll("\"", ""))
>
> > > > .startObject("location").field("lat",
> > > > Double.parseDouble(lati)).field("lon",
> > > > Double.parseDouble(longi)).endObject()
> > > >                                                     .endObject()));
>
> > > >                                  person = jsonBuilder().startObject()
>
> > > >                                                .field("gender", tmp[4])
> > > > //aleaList(genders) returns "M"
> > > > or "F"
> > > >                                                // [....] creation of a
> > > > random person with some
> > > > attributes
> > > >                                                .endObject();
>
> > > >                                  //brb.execute().actionGet();
> > > >                                //  brb.setRefresh(true);
>
> > > >                                  brb.add(client.prepareIndex("toto",
> > > > "tata")
> > > >                                                .setRefresh(false)
>
> > > >  .setReplicationType(ReplicationType.ASYNC)
> > > >                                                .setSource(person));
> > > >          if(i==100000){
> > > >                                  i=0;
> > > >                                brb.execute().actionGet();
>
> > > >                                 // break;
> > > >                          }
>
> > > >                  }
>
> > > >                  brb.execute().actionGet();
> > > >                  }catch(Exception e){
> > > >                          brb.execute().actionGet();
> > > >                  }
> > > >                  brb.execute().actionGet();
>
> > > >                System.out.println("done!!!");
>
> > > >        }
>
> > > > After every 100000 record i call "brb.execute().actionGet();" to get
> > > > data inserted to the ElasticSearch indexes.
> > > > But after 100000 record inserted when it goes to insert another 100000
> > > > record its goes OutOfMemory Heap size error.
> > > > My question where i am wrong .And can any one share complete java
> > > > example to insert large data in ES.
> > > > Thanks in advance
> > > > Regards
> > > > Prashant
>
> > > --
> > > …
> > > CRAIG BROWN
> > > chief architect
> > > youwho, Inc.
>
> > > *www.youwho.com*<http://www.youwho.com/>
>
> > > T:  801.855. 0921
> > > M: 801.913. 0939
Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

Berkay Mollamustafaoglu-2
You can check the number of documents via the API or using BigDesk to get a better indication of how many docs are indexed. 

Regards,
Berkay Mollamustafaoglu
mberkay on yahoo, google and skype


On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit <[hidden email]> wrote:
I mean to say , while doing first insert i see the size of index
changes to 13 mb , then after 2nd insert the size changes to some 12
mb of so.According to the logic it should grow the size to 25 mb or
more .... it should keep growing.
Regards
Prashant


On Jan 27, 9:37 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
> What do you mean by "loose previous data". Do you mean you don't see it in
> the index? How do you check it, run a query? It may take a while for the
> docs to be indexed, depending on the size of your docs and resources
> available.
>
> Even 10K is a lot. You don't have to do such large chunks with ES. Try
> starting with 100 docs, and see how the performance is.
>
> Regards,
> Berkay Mollamustafaoglu
> mberkay on yahoo, google and skype
>
> On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <[hidden email]>wrote:
>
>
>
>
>
>
>
> > Thanks for the reply. But i have another problem when i call "
> > brb.execute().actionGet(); " again i loose previous data what i
> > inserted previously.
> > I mean to say , Suppose i looped 10k record and then call   "
> > brb.execute().actionGet(); " and after that i try to insert another
> > 10k by repeating the same action  " brb.execute().actionGet(); " , now
> > i loose previous data.
> > It will be great help if you can send me some sample Java Code.
> > Thanks in advance.
> > Regards
> > Prashant
>
> > On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
> > > I'd probably try inserting something less that 100k records at a time. We
> > > usually do blocks of 10K at a time. Even with multiple threads running, I
> > > haven't seen any problems over many millions of records.
>
> > >   - Craig
>
> > > On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <[hidden email]
> > >wrote:
>
> > > > Hi , there
> > > > I have just started using Elastic Search , my problem is i have to
> > > > insert 50 million of data rows in the Elastic Search . While doing
> > > > this i have used such code in the loop.
> > > > public static void main(String[] args) throws Exception {
> > > >                // TODO Auto-generated method stub
>
> > > >                 Node node = nodeBuilder().local(true).
>
> > > >  settings(ImmutableSettings.settingsBuilder().
> > > >                                        put("index.number_of_shards",
> > 1).
> > > >                                        put("index.number_of_replicas",
> > 1).
> > > >                                        build()).build().start();
> > > >                Client  client = node.client();
> > > >                         String mapping =
> > > > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> > > >  .startObject("properties").startObject("location").field("type",
> > > > "geo_point").field("lat_lon", true).endObject().endObject()
> > > >                                    .endObject().endObject().string();
>
> > > > client.admin().indices().prepareCreate("test").addMapping("type1",
> > > > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > > > "1")).execute().actionGet();
> > > >                BulkRequestBuilder brb = client.prepareBulk();
>
> > > >                  FileInputStream fstream = new
> > > > FileInputStream("e:\\yp_CA.txt");
> > > >                  // Get the object of DataInputStream
> > > >                  DataInputStream in = new DataInputStream(fstream);
> > > >                  BufferedReader br = new BufferedReader(new
> > > > InputStreamReader(in));
> > > >                  String strLine;
> > > >                  long start=System.currentTimeMillis();
> > > >                  int i=0;
> > > >                  //Read File Line By Line
> > > >                  try{
> > > >                          XContentBuilder person = null;
> > > >                  while ((strLine = br.readLine()) != null)   {
> > > >                          String tmp[] =strLine.replaceAll("\"",
> > > > "").split("~\\^\\~");
> > > >                                //
> >  System.out.println(tmp[4].split("\\|"));
> > > >                                  String
> > mergeField[]=tmp[4].split("\\|");
> > > >                                  String lati = mergeField[5];
> > > >                                  String longi = mergeField[6];
> > > >                                  i++;
> > > >                                 // System.out.println(i);
> > > >                          brb.add(client.prepareIndex("test", "type1",
> > i+"")
>
> > > >  .setSource(jsonBuilder().startObject()
> > > >                                                    //
> > > > .field("MERGE_FIELD", tmp[4])
>
> > .field("MAPPED_FSN",
> > > > tmp[0].replaceAll("\"", ""))
>
> > > > .startObject("location").field("lat",
> > > > Double.parseDouble(lati)).field("lon",
> > > > Double.parseDouble(longi)).endObject()
> > > >                                                     .endObject()));
>
> > > >                                  person = jsonBuilder().startObject()
>
> > > >                                                .field("gender", tmp[4])
> > > > //aleaList(genders) returns "M"
> > > > or "F"
> > > >                                                // [....] creation of a
> > > > random person with some
> > > > attributes
> > > >                                                .endObject();
>
> > > >                                  //brb.execute().actionGet();
> > > >                                //  brb.setRefresh(true);
>
> > > >                                  brb.add(client.prepareIndex("toto",
> > > > "tata")
> > > >                                                .setRefresh(false)
>
> > > >  .setReplicationType(ReplicationType.ASYNC)
> > > >                                                .setSource(person));
> > > >          if(i==100000){
> > > >                                  i=0;
> > > >                                brb.execute().actionGet();
>
> > > >                                 // break;
> > > >                          }
>
> > > >                  }
>
> > > >                  brb.execute().actionGet();
> > > >                  }catch(Exception e){
> > > >                          brb.execute().actionGet();
> > > >                  }
> > > >                  brb.execute().actionGet();
>
> > > >                System.out.println("done!!!");
>
> > > >        }
>
> > > > After every 100000 record i call "brb.execute().actionGet();" to get
> > > > data inserted to the ElasticSearch indexes.
> > > > But after 100000 record inserted when it goes to insert another 100000
> > > > record its goes OutOfMemory Heap size error.
> > > > My question where i am wrong .And can any one share complete java
> > > > example to insert large data in ES.
> > > > Thanks in advance
> > > > Regards
> > > > Prashant
>
> > > --
> > > …
> > > CRAIG BROWN
> > > chief architect
> > > youwho, Inc.
>
> > > *www.youwho.com*<http://www.youwho.com/>
>
> > > T:  <a href="tel:801.855.%200921" value="+18018550921">801.855. 0921
> > > M: <a href="tel:801.913.%200939" value="+18019130939">801.913. 0939

Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

prashant5375
I know what you are trying to say , but if i have a data of 2 gb which
i am trying to insert in to ES , its not be possible that the index
size will remain only 13 mb.
I thing i am doing something wrong , but dont know what , its like i
am adding data not in append mode , it replaces the old data.
Regards
Prashant

On Jan 27, 9:53 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:

> You can check the number of documents via the API or using BigDesk to get a
> better indication of how many docs are indexed.
>
> Regards,
> Berkay Mollamustafaoglu
> mberkay on yahoo, google and skype
>
> On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit <[hidden email]>wrote:
>
>
>
>
>
>
>
> > I mean to say , while doing first insert i see the size of index
> > changes to 13 mb , then after 2nd insert the size changes to some 12
> > mb of so.According to the logic it should grow the size to 25 mb or
> > more .... it should keep growing.
> > Regards
> > Prashant
>
> > On Jan 27, 9:37 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
> > > What do you mean by "loose previous data". Do you mean you don't see it
> > in
> > > the index? How do you check it, run a query? It may take a while for the
> > > docs to be indexed, depending on the size of your docs and resources
> > > available.
>
> > > Even 10K is a lot. You don't have to do such large chunks with ES. Try
> > > starting with 100 docs, and see how the performance is.
>
> > > Regards,
> > > Berkay Mollamustafaoglu
> > > mberkay on yahoo, google and skype
>
> > > On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <[hidden email]
> > >wrote:
>
> > > > Thanks for the reply. But i have another problem when i call "
> > > > brb.execute().actionGet(); " again i loose previous data what i
> > > > inserted previously.
> > > > I mean to say , Suppose i looped 10k record and then call   "
> > > > brb.execute().actionGet(); " and after that i try to insert another
> > > > 10k by repeating the same action  " brb.execute().actionGet(); " , now
> > > > i loose previous data.
> > > > It will be great help if you can send me some sample Java Code.
> > > > Thanks in advance.
> > > > Regards
> > > > Prashant
>
> > > > On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
> > > > > I'd probably try inserting something less that 100k records at a
> > time. We
> > > > > usually do blocks of 10K at a time. Even with multiple threads
> > running, I
> > > > > haven't seen any problems over many millions of records.
>
> > > > >   - Craig
>
> > > > > On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
> > [hidden email]
> > > > >wrote:
>
> > > > > > Hi , there
> > > > > > I have just started using Elastic Search , my problem is i have to
> > > > > > insert 50 million of data rows in the Elastic Search . While doing
> > > > > > this i have used such code in the loop.
> > > > > > public static void main(String[] args) throws Exception {
> > > > > >                // TODO Auto-generated method stub
>
> > > > > >                 Node node = nodeBuilder().local(true).
>
> > > > > >  settings(ImmutableSettings.settingsBuilder().
>
> >  put("index.number_of_shards",
> > > > 1).
>
> >  put("index.number_of_replicas",
> > > > 1).
> > > > > >                                        build()).build().start();
> > > > > >                Client  client = node.client();
> > > > > >                         String mapping =
> > > > > > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> > > > > >  .startObject("properties").startObject("location").field("type",
> > > > > > "geo_point").field("lat_lon", true).endObject().endObject()
>
> >  .endObject().endObject().string();
>
> > > > > > client.admin().indices().prepareCreate("test").addMapping("type1",
> > > > > > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > > > > > "1")).execute().actionGet();
> > > > > >                BulkRequestBuilder brb = client.prepareBulk();
>
> > > > > >                  FileInputStream fstream = new
> > > > > > FileInputStream("e:\\yp_CA.txt");
> > > > > >                  // Get the object of DataInputStream
> > > > > >                  DataInputStream in = new DataInputStream(fstream);
> > > > > >                  BufferedReader br = new BufferedReader(new
> > > > > > InputStreamReader(in));
> > > > > >                  String strLine;
> > > > > >                  long start=System.currentTimeMillis();
> > > > > >                  int i=0;
> > > > > >                  //Read File Line By Line
> > > > > >                  try{
> > > > > >                          XContentBuilder person = null;
> > > > > >                  while ((strLine = br.readLine()) != null)   {
> > > > > >                          String tmp[] =strLine.replaceAll("\"",
> > > > > > "").split("~\\^\\~");
> > > > > >                                //
> > > >  System.out.println(tmp[4].split("\\|"));
> > > > > >                                  String
> > > > mergeField[]=tmp[4].split("\\|");
> > > > > >                                  String lati = mergeField[5];
> > > > > >                                  String longi = mergeField[6];
> > > > > >                                  i++;
> > > > > >                                 // System.out.println(i);
> > > > > >                          brb.add(client.prepareIndex("test",
> > "type1",
> > > > i+"")
>
> > > > > >  .setSource(jsonBuilder().startObject()
> > > > > >                                                    //
> > > > > > .field("MERGE_FIELD", tmp[4])
>
> > > > .field("MAPPED_FSN",
> > > > > > tmp[0].replaceAll("\"", ""))
>
> > > > > > .startObject("location").field("lat",
> > > > > > Double.parseDouble(lati)).field("lon",
> > > > > > Double.parseDouble(longi)).endObject()
> > > > > >                                                     .endObject()));
>
> > > > > >                                  person =
> > jsonBuilder().startObject()
>
> > > > > >                                                .field("gender",
> > tmp[4])
> > > > > > //aleaList(genders) returns "M"
> > > > > > or "F"
> > > > > >                                                // [....] creation
> > of a
> > > > > > random person with some
> > > > > > attributes
> > > > > >                                                .endObject();
>
> > > > > >                                  //brb.execute().actionGet();
> > > > > >                                //  brb.setRefresh(true);
>
> >  brb.add(client.prepareIndex("toto",
> > > > > > "tata")
> > > > > >                                                .setRefresh(false)
>
> > > > > >  .setReplicationType(ReplicationType.ASYNC)
> > > > > >                                                .setSource(person));
> > > > > >          if(i==100000){
> > > > > >                                  i=0;
> > > > > >                                brb.execute().actionGet();
>
> > > > > >                                 // break;
> > > > > >                          }
>
> > > > > >                  }
>
> > > > > >                  brb.execute().actionGet();
> > > > > >                  }catch(Exception e){
> > > > > >                          brb.execute().actionGet();
> > > > > >                  }
> > > > > >                  brb.execute().actionGet();
>
> > > > > >                System.out.println("done!!!");
>
> > > > > >        }
>
> > > > > > After every 100000 record i call "brb.execute().actionGet();" to
> > get
> > > > > > data inserted to the ElasticSearch indexes.
> > > > > > But after 100000 record inserted when it goes to insert another
> > 100000
> > > > > > record its goes OutOfMemory Heap size error.
> > > > > > My question where i am wrong .And can any one share complete java
> > > > > > example to insert large data in ES.
> > > > > > Thanks in advance
> > > > > > Regards
> > > > > > Prashant
>
> > > > > --
> > > > > …
> > > > > CRAIG BROWN
> > > > > chief architect
> > > > > youwho, Inc.
>
> > > > > *www.youwho.com*<http://www.youwho.com/>
>
> > > > > T:  801.855. 0921
> > > > > M: 801.913. 0939
Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

prashant5375
If you run the given code you will see its not adding index.

import static
org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static
org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;

import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Date;

import org.elasticsearch.action.support.replication.ReplicationType;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.node.Node;


public class NewTest2 {

        /**
         * @param args
         * @throws Exception
         */
        public static void main(String[] args) throws Exception {
                // TODO Auto-generated method stub

                 Node node = nodeBuilder().local(true).
                                        settings(ImmutableSettings.settingsBuilder().
                                        put("index.number_of_shards", 1).
                                        put("index.number_of_replicas", 1).
                                        build()).build().start();
                Client client = node.client();
                         String mapping =
XContentFactory.jsonBuilder().startObject().startObject("type1")
                                    .startObject("properties").startObject("location").field("type",
"geo_point").field("lat_lon", true).endObject().endObject()
                                    .endObject().endObject().string();
       
client.admin().indices().prepareCreate("test").addMapping("type1",
mapping).setSettings(settingsBuilder().put("number_of_shards",
"1")).execute().actionGet();
                BulkRequestBuilder brb = client.prepareBulk();
                  long start=System.currentTimeMillis();
                  int i=0;
                  //Read File Line By Line
                  try{
                          XContentBuilder person = null;
                  for (int j=0;j<999999999;j++)   {

                                  i++;
                          brb.add(client.prepareIndex("test", "type1", "1")
                                                .setSource(jsonBuilder().startObject()
                                                    // .field("MERGE_FIELD", tmp[4])
                                                     .field("MAPPED_FSN", "Just a tem data"+j)
                                                     .startObject("location").field("lat",
Double.parseDouble("-117.40")).field("lon",
Double.parseDouble("32.00")).endObject()
                                                     .endObject()));

                          if(i==1000){
                                  i=0;
                                  System.out.println("in 1000");
                                  brb.execute().actionGet();
                                  System.out.println("cominted..."+(System.currentTimeMillis()-
start));
                          }

                  }

                  }catch(Exception e){
                  }


                System.out.println("done!!!");

        }

}

On Jan 27, 10:00 pm, BeyondLimit <[hidden email]> wrote:

> I know what you are trying to say , but if i have a data of 2 gb which
> i am trying to insert in to ES , its not be possible that the index
> size will remain only 13 mb.
> I thing i am doing something wrong , but dont know what , its like i
> am adding data not in append mode , it replaces the old data.
> Regards
> Prashant
>
> On Jan 27, 9:53 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
>
>
>
>
>
>
>
> > You can check the number of documents via the API or using BigDesk to get a
> > better indication of how many docs are indexed.
>
> > Regards,
> > Berkay Mollamustafaoglu
> > mberkay on yahoo, google and skype
>
> > On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit <[hidden email]>wrote:
>
> > > I mean to say , while doing first insert i see the size of index
> > > changes to 13 mb , then after 2nd insert the size changes to some 12
> > > mb of so.According to the logic it should grow the size to 25 mb or
> > > more .... it should keep growing.
> > > Regards
> > > Prashant
>
> > > On Jan 27, 9:37 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
> > > > What do you mean by "loose previous data". Do you mean you don't see it
> > > in
> > > > the index? How do you check it, run a query? It may take a while for the
> > > > docs to be indexed, depending on the size of your docs and resources
> > > > available.
>
> > > > Even 10K is a lot. You don't have to do such large chunks with ES. Try
> > > > starting with 100 docs, and see how the performance is.
>
> > > > Regards,
> > > > Berkay Mollamustafaoglu
> > > > mberkay on yahoo, google and skype
>
> > > > On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <[hidden email]
> > > >wrote:
>
> > > > > Thanks for the reply. But i have another problem when i call "
> > > > > brb.execute().actionGet(); " again i loose previous data what i
> > > > > inserted previously.
> > > > > I mean to say , Suppose i looped 10k record and then call   "
> > > > > brb.execute().actionGet(); " and after that i try to insert another
> > > > > 10k by repeating the same action  " brb.execute().actionGet(); " , now
> > > > > i loose previous data.
> > > > > It will be great help if you can send me some sample Java Code.
> > > > > Thanks in advance.
> > > > > Regards
> > > > > Prashant
>
> > > > > On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
> > > > > > I'd probably try inserting something less that 100k records at a
> > > time. We
> > > > > > usually do blocks of 10K at a time. Even with multiple threads
> > > running, I
> > > > > > haven't seen any problems over many millions of records.
>
> > > > > >   - Craig
>
> > > > > > On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
> > > [hidden email]
> > > > > >wrote:
>
> > > > > > > Hi , there
> > > > > > > I have just started using Elastic Search , my problem is i have to
> > > > > > > insert 50 million of data rows in the Elastic Search . While doing
> > > > > > > this i have used such code in the loop.
> > > > > > > public static void main(String[] args) throws Exception {
> > > > > > >                // TODO Auto-generated method stub
>
> > > > > > >                 Node node = nodeBuilder().local(true).
>
> > > > > > >  settings(ImmutableSettings.settingsBuilder().
>
> > >  put("index.number_of_shards",
> > > > > 1).
>
> > >  put("index.number_of_replicas",
> > > > > 1).
> > > > > > >                                        build()).build().start();
> > > > > > >                Client  client = node.client();
> > > > > > >                         String mapping =
> > > > > > > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> > > > > > >  .startObject("properties").startObject("location").field("type",
> > > > > > > "geo_point").field("lat_lon", true).endObject().endObject()
>
> > >  .endObject().endObject().string();
>
> > > > > > > client.admin().indices().prepareCreate("test").addMapping("type1",
> > > > > > > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > > > > > > "1")).execute().actionGet();
> > > > > > >                BulkRequestBuilder brb = client.prepareBulk();
>
> > > > > > >                  FileInputStream fstream = new
> > > > > > > FileInputStream("e:\\yp_CA.txt");
> > > > > > >                  // Get the object of DataInputStream
> > > > > > >                  DataInputStream in = new DataInputStream(fstream);
> > > > > > >                  BufferedReader br = new BufferedReader(new
> > > > > > > InputStreamReader(in));
> > > > > > >                  String strLine;
> > > > > > >                  long start=System.currentTimeMillis();
> > > > > > >                  int i=0;
> > > > > > >                  //Read File Line By Line
> > > > > > >                  try{
> > > > > > >                          XContentBuilder person = null;
> > > > > > >                  while ((strLine = br.readLine()) != null)   {
> > > > > > >                          String tmp[] =strLine.replaceAll("\"",
> > > > > > > "").split("~\\^\\~");
> > > > > > >                                //
> > > > >  System.out.println(tmp[4].split("\\|"));
> > > > > > >                                  String
> > > > > mergeField[]=tmp[4].split("\\|");
> > > > > > >                                  String lati = mergeField[5];
> > > > > > >                                  String longi = mergeField[6];
> > > > > > >                                  i++;
> > > > > > >                                 // System.out.println(i);
> > > > > > >                          brb.add(client.prepareIndex("test",
> > > "type1",
> > > > > i+"")
>
> > > > > > >  .setSource(jsonBuilder().startObject()
> > > > > > >                                                    //
> > > > > > > .field("MERGE_FIELD", tmp[4])
>
> > > > > .field("MAPPED_FSN",
> > > > > > > tmp[0].replaceAll("\"", ""))
>
> > > > > > > .startObject("location").field("lat",
> > > > > > > Double.parseDouble(lati)).field("lon",
> > > > > > > Double.parseDouble(longi)).endObject()
> > > > > > >                                                     .endObject()));
>
> > > > > > >                                  person =
> > > jsonBuilder().startObject()
>
> > > > > > >                                                .field("gender",
> > > tmp[4])
> > > > > > > //aleaList(genders) returns "M"
> > > > > > > or "F"
> > > > > > >                                                // [....] creation
> > > of a
> > > > > > > random person with some
> > > > > > > attributes
> > > > > > >                                                .endObject();
>
> > > > > > >                                  //brb.execute().actionGet();
> > > > > > >                                //  brb.setRefresh(true);
>
> > >  brb.add(client.prepareIndex("toto",
> > > > > > > "tata")
> > > > > > >                                                .setRefresh(false)
>
> > > > > > >  .setReplicationType(ReplicationType.ASYNC)
> > > > > > >                                                .setSource(person));
> > > > > > >          if(i==100000){
> > > > > > >                                  i=0;
> > > > > > >                                brb.execute().actionGet();
>
> > > > > > >                                 // break;
> > > > > > >                          }
>
> > > > > > >                  }
>
> > > > > > >                  brb.execute().actionGet();
> > > > > > >                  }catch(Exception e){
> > > > > > >                          brb.execute().actionGet();
> > > > > > >                  }
> > > > > > >                  brb.execute().actionGet();
>
> > > > > > >                System.out.println("done!!!");
>
> > > > > > >        }
>
> > > > > > > After every 100000 record i call "brb.execute().actionGet();" to
> > > get
> > > > > > > data inserted to the ElasticSearch indexes.
> > > > > > > But after 100000 record inserted when it goes to insert another
> > > 100000
> > > > > > > record its goes OutOfMemory Heap size error.
> > > > > > > My question where i am wrong .And can any one share complete java
> > > > > > > example to insert large data in ES.
> > > > > > > Thanks in advance
> > > > > > > Regards
> > > > > > > Prashant
>
> > > > > > --
> > > > > > …
> > > > > > CRAIG BROWN
> > > > > > chief architect
> > > > > > youwho, Inc.
>
> > > > > > *www.youwho.com*<http://www.youwho.com/>
>
> > > > > > T:  801.855. 0921
> > > > > > M: 801.913. 0939
Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

Ivan Brusic
First, please gist your code since it is quite long. https://gist.github.com/

Pay attention to the prepareIndex method signature:
https://github.com/elasticsearch/elasticsearch/blob/master/src/main/java/org/elasticsearch/client/Client.java#L185

IndexRequestBuilder prepareIndex(String index, String type, @Nullable
String id);

The last param is the unique id. If you call prepareIndex with the
exact same params, then your last update will be overwritten. The size
of the index might be going down because the index was optimized after
you re-added the same documents.

Your original code has
brb.add(client.prepareIndex("test", "type1", i+"")
...
if(i==100000) {
  i=0;
}

You are overwriting the documents during the next pass. Try:
if( i % 100000 == 0)

--
Ivan

On Fri, Jan 27, 2012 at 9:28 AM, BeyondLimit <[hidden email]> wrote:

> If you run the given code you will see its not adding index.
>
> import static
> org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
> import static
> org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
> import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
>
> import java.io.BufferedReader;
> import java.io.DataInputStream;
> import java.io.FileInputStream;
> import java.io.InputStreamReader;
> import java.util.Date;
>
> import org.elasticsearch.action.support.replication.ReplicationType;
> import org.elasticsearch.client.Client;
> import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
> import org.elasticsearch.common.settings.ImmutableSettings;
> import org.elasticsearch.common.xcontent.XContentBuilder;
> import org.elasticsearch.common.xcontent.XContentFactory;
> import org.elasticsearch.node.Node;
>
>
> public class NewTest2 {
>
>        /**
>         * @param args
>         * @throws Exception
>         */
>        public static void main(String[] args) throws Exception {
>                // TODO Auto-generated method stub
>
>                 Node node = nodeBuilder().local(true).
>                                        settings(ImmutableSettings.settingsBuilder().
>                                        put("index.number_of_shards", 1).
>                                        put("index.number_of_replicas", 1).
>                                        build()).build().start();
>                Client  client = node.client();
>                         String mapping =
> XContentFactory.jsonBuilder().startObject().startObject("type1")
>                                    .startObject("properties").startObject("location").field("type",
> "geo_point").field("lat_lon", true).endObject().endObject()
>                                    .endObject().endObject().string();
>
> client.admin().indices().prepareCreate("test").addMapping("type1",
> mapping).setSettings(settingsBuilder().put("number_of_shards",
> "1")).execute().actionGet();
>                BulkRequestBuilder brb = client.prepareBulk();
>                  long start=System.currentTimeMillis();
>                  int i=0;
>                  //Read File Line By Line
>                  try{
>                          XContentBuilder person = null;
>                  for (int j=0;j<999999999;j++)   {
>
>                                  i++;
>                          brb.add(client.prepareIndex("test", "type1", "1")
>                                                .setSource(jsonBuilder().startObject()
>                                                    // .field("MERGE_FIELD", tmp[4])
>                                                     .field("MAPPED_FSN", "Just a tem data"+j)
>                                                     .startObject("location").field("lat",
> Double.parseDouble("-117.40")).field("lon",
> Double.parseDouble("32.00")).endObject()
>                                                     .endObject()));
>
>                          if(i==1000){
>                                  i=0;
>                                  System.out.println("in 1000");
>                                  brb.execute().actionGet();
>                                  System.out.println("cominted..."+(System.currentTimeMillis()-
> start));
>                          }
>
>                  }
>
>                  }catch(Exception e){
>                  }
>
>
>                System.out.println("done!!!");
>
>        }
>
> }
>
> On Jan 27, 10:00 pm, BeyondLimit <[hidden email]> wrote:
>> I know what you are trying to say , but if i have a data of 2 gb which
>> i am trying to insert in to ES , its not be possible that the index
>> size will remain only 13 mb.
>> I thing i am doing something wrong , but dont know what , its like i
>> am adding data not in append mode , it replaces the old data.
>> Regards
>> Prashant
>>
>> On Jan 27, 9:53 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
>>
>>
>>
>>
>>
>>
>>
>> > You can check the number of documents via the API or using BigDesk to get a
>> > better indication of how many docs are indexed.
>>
>> > Regards,
>> > Berkay Mollamustafaoglu
>> > mberkay on yahoo, google and skype
>>
>> > On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit <[hidden email]>wrote:
>>
>> > > I mean to say , while doing first insert i see the size of index
>> > > changes to 13 mb , then after 2nd insert the size changes to some 12
>> > > mb of so.According to the logic it should grow the size to 25 mb or
>> > > more .... it should keep growing.
>> > > Regards
>> > > Prashant
>>
>> > > On Jan 27, 9:37 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
>> > > > What do you mean by "loose previous data". Do you mean you don't see it
>> > > in
>> > > > the index? How do you check it, run a query? It may take a while for the
>> > > > docs to be indexed, depending on the size of your docs and resources
>> > > > available.
>>
>> > > > Even 10K is a lot. You don't have to do such large chunks with ES. Try
>> > > > starting with 100 docs, and see how the performance is.
>>
>> > > > Regards,
>> > > > Berkay Mollamustafaoglu
>> > > > mberkay on yahoo, google and skype
>>
>> > > > On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <[hidden email]
>> > > >wrote:
>>
>> > > > > Thanks for the reply. But i have another problem when i call "
>> > > > > brb.execute().actionGet(); " again i loose previous data what i
>> > > > > inserted previously.
>> > > > > I mean to say , Suppose i looped 10k record and then call   "
>> > > > > brb.execute().actionGet(); " and after that i try to insert another
>> > > > > 10k by repeating the same action  " brb.execute().actionGet(); " , now
>> > > > > i loose previous data.
>> > > > > It will be great help if you can send me some sample Java Code.
>> > > > > Thanks in advance.
>> > > > > Regards
>> > > > > Prashant
>>
>> > > > > On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
>> > > > > > I'd probably try inserting something less that 100k records at a
>> > > time. We
>> > > > > > usually do blocks of 10K at a time. Even with multiple threads
>> > > running, I
>> > > > > > haven't seen any problems over many millions of records.
>>
>> > > > > >   - Craig
>>
>> > > > > > On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
>> > > [hidden email]
>> > > > > >wrote:
>>
>> > > > > > > Hi , there
>> > > > > > > I have just started using Elastic Search , my problem is i have to
>> > > > > > > insert 50 million of data rows in the Elastic Search . While doing
>> > > > > > > this i have used such code in the loop.
>> > > > > > > public static void main(String[] args) throws Exception {
>> > > > > > >                // TODO Auto-generated method stub
>>
>> > > > > > >                 Node node = nodeBuilder().local(true).
>>
>> > > > > > >  settings(ImmutableSettings.settingsBuilder().
>>
>> > >  put("index.number_of_shards",
>> > > > > 1).
>>
>> > >  put("index.number_of_replicas",
>> > > > > 1).
>> > > > > > >                                        build()).build().start();
>> > > > > > >                Client  client = node.client();
>> > > > > > >                         String mapping =
>> > > > > > > XContentFactory.jsonBuilder().startObject().startObject("type1")
>>
>> > > > > > >  .startObject("properties").startObject("location").field("type",
>> > > > > > > "geo_point").field("lat_lon", true).endObject().endObject()
>>
>> > >  .endObject().endObject().string();
>>
>> > > > > > > client.admin().indices().prepareCreate("test").addMapping("type1",
>> > > > > > > mapping).setSettings(settingsBuilder().put("number_of_shards",
>> > > > > > > "1")).execute().actionGet();
>> > > > > > >                BulkRequestBuilder brb = client.prepareBulk();
>>
>> > > > > > >                  FileInputStream fstream = new
>> > > > > > > FileInputStream("e:\\yp_CA.txt");
>> > > > > > >                  // Get the object of DataInputStream
>> > > > > > >                  DataInputStream in = new DataInputStream(fstream);
>> > > > > > >                  BufferedReader br = new BufferedReader(new
>> > > > > > > InputStreamReader(in));
>> > > > > > >                  String strLine;
>> > > > > > >                  long start=System.currentTimeMillis();
>> > > > > > >                  int i=0;
>> > > > > > >                  //Read File Line By Line
>> > > > > > >                  try{
>> > > > > > >                          XContentBuilder person = null;
>> > > > > > >                  while ((strLine = br.readLine()) != null)   {
>> > > > > > >                          String tmp[] =strLine.replaceAll("\"",
>> > > > > > > "").split("~\\^\\~");
>> > > > > > >                                //
>> > > > >  System.out.println(tmp[4].split("\\|"));
>> > > > > > >                                  String
>> > > > > mergeField[]=tmp[4].split("\\|");
>> > > > > > >                                  String lati = mergeField[5];
>> > > > > > >                                  String longi = mergeField[6];
>> > > > > > >                                  i++;
>> > > > > > >                                 // System.out.println(i);
>> > > > > > >                          brb.add(client.prepareIndex("test",
>> > > "type1",
>> > > > > i+"")
>>
>> > > > > > >  .setSource(jsonBuilder().startObject()
>> > > > > > >                                                    //
>> > > > > > > .field("MERGE_FIELD", tmp[4])
>>
>> > > > > .field("MAPPED_FSN",
>> > > > > > > tmp[0].replaceAll("\"", ""))
>>
>> > > > > > > .startObject("location").field("lat",
>> > > > > > > Double.parseDouble(lati)).field("lon",
>> > > > > > > Double.parseDouble(longi)).endObject()
>> > > > > > >                                                     .endObject()));
>>
>> > > > > > >                                  person =
>> > > jsonBuilder().startObject()
>>
>> > > > > > >                                                .field("gender",
>> > > tmp[4])
>> > > > > > > //aleaList(genders) returns "M"
>> > > > > > > or "F"
>> > > > > > >                                                // [....] creation
>> > > of a
>> > > > > > > random person with some
>> > > > > > > attributes
>> > > > > > >                                                .endObject();
>>
>> > > > > > >                                  //brb.execute().actionGet();
>> > > > > > >                                //  brb.setRefresh(true);
>>
>> > >  brb.add(client.prepareIndex("toto",
>> > > > > > > "tata")
>> > > > > > >                                                .setRefresh(false)
>>
>> > > > > > >  .setReplicationType(ReplicationType.ASYNC)
>> > > > > > >                                                .setSource(person));
>> > > > > > >          if(i==100000){
>> > > > > > >                                  i=0;
>> > > > > > >                                brb.execute().actionGet();
>>
>> > > > > > >                                 // break;
>> > > > > > >                          }
>>
>> > > > > > >                  }
>>
>> > > > > > >                  brb.execute().actionGet();
>> > > > > > >                  }catch(Exception e){
>> > > > > > >                          brb.execute().actionGet();
>> > > > > > >                  }
>> > > > > > >                  brb.execute().actionGet();
>>
>> > > > > > >                System.out.println("done!!!");
>>
>> > > > > > >        }
>>
>> > > > > > > After every 100000 record i call "brb.execute().actionGet();" to
>> > > get
>> > > > > > > data inserted to the ElasticSearch indexes.
>> > > > > > > But after 100000 record inserted when it goes to insert another
>> > > 100000
>> > > > > > > record its goes OutOfMemory Heap size error.
>> > > > > > > My question where i am wrong .And can any one share complete java
>> > > > > > > example to insert large data in ES.
>> > > > > > > Thanks in advance
>> > > > > > > Regards
>> > > > > > > Prashant
>>
>> > > > > > --
>> > > > > > …
>> > > > > > CRAIG BROWN
>> > > > > > chief architect
>> > > > > > youwho, Inc.
>>
>> > > > > > *www.youwho.com*<http://www.youwho.com/>
>>
>> > > > > > T:  801.855. 0921
>> > > > > > M: 801.913. 0939
Reply | Threaded
Open this post in threaded view
|

Re: Bulk Uploading

prashant5375
Hi Ivan ,
Thanks a lot for the help. its working now.
Regards
Prashant


On Jan 28, 5:33 am, Ivan Brusic <[hidden email]> wrote:

> First, please gist your code since it is quite long.https://gist.github.com/
>
> Pay attention to the prepareIndex method signature:https://github.com/elasticsearch/elasticsearch/blob/master/src/main/j...
>
> IndexRequestBuilder prepareIndex(String index, String type, @Nullable
> String id);
>
> The last param is the unique id. If you call prepareIndex with the
> exact same params, then your last update will be overwritten. The size
> of the index might be going down because the index was optimized after
> you re-added the same documents.
>
> Your original code has
> brb.add(client.prepareIndex("test", "type1", i+"")
> ...
> if(i==100000) {
>   i=0;
>
> }
>
> You are overwriting the documents during the next pass. Try:
> if( i % 100000 == 0)
>
> --
> Ivan
>
>
>
>
>
>
>
> On Fri, Jan 27, 2012 at 9:28 AM, BeyondLimit <[hidden email]> wrote:
> > If you run the given code you will see its not adding index.
>
> > import static
> > org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
> > import static
> > org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
> > import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
>
> > import java.io.BufferedReader;
> > import java.io.DataInputStream;
> > import java.io.FileInputStream;
> > import java.io.InputStreamReader;
> > import java.util.Date;
>
> > import org.elasticsearch.action.support.replication.ReplicationType;
> > import org.elasticsearch.client.Client;
> > import org.elasticsearch.client.action.bulk.BulkRequestBuilder;
> > import org.elasticsearch.common.settings.ImmutableSettings;
> > import org.elasticsearch.common.xcontent.XContentBuilder;
> > import org.elasticsearch.common.xcontent.XContentFactory;
> > import org.elasticsearch.node.Node;
>
> > public class NewTest2 {
>
> >        /**
> >         * @param args
> >         * @throws Exception
> >         */
> >        public static void main(String[] args) throws Exception {
> >                // TODO Auto-generated method stub
>
> >                 Node node = nodeBuilder().local(true).
> >                                        settings(ImmutableSettings.settingsBuilder().
> >                                        put("index.number_of_shards", 1).
> >                                        put("index.number_of_replicas", 1).
> >                                        build()).build().start();
> >                Client  client = node.client();
> >                         String mapping =
> > XContentFactory.jsonBuilder().startObject().startObject("type1")
> >                                    .startObject("properties").startObject("location").field("type",
> > "geo_point").field("lat_lon", true).endObject().endObject()
> >                                    .endObject().endObject().string();
>
> > client.admin().indices().prepareCreate("test").addMapping("type1",
> > mapping).setSettings(settingsBuilder().put("number_of_shards",
> > "1")).execute().actionGet();
> >                BulkRequestBuilder brb = client.prepareBulk();
> >                  long start=System.currentTimeMillis();
> >                  int i=0;
> >                  //Read File Line By Line
> >                  try{
> >                          XContentBuilder person = null;
> >                  for (int j=0;j<999999999;j++)   {
>
> >                                  i++;
> >                          brb.add(client.prepareIndex("test", "type1", "1")
> >                                                .setSource(jsonBuilder().startObject()
> >                                                    // .field("MERGE_FIELD", tmp[4])
> >                                                     .field("MAPPED_FSN", "Just a tem data"+j)
> >                                                     .startObject("location").field("lat",
> > Double.parseDouble("-117.40")).field("lon",
> > Double.parseDouble("32.00")).endObject()
> >                                                     .endObject()));
>
> >                          if(i==1000){
> >                                  i=0;
> >                                  System.out.println("in 1000");
> >                                  brb.execute().actionGet();
> >                                  System.out.println("cominted..."+(System.currentTimeMillis()-
> > start));
> >                          }
>
> >                  }
>
> >                  }catch(Exception e){
> >                  }
>
> >                System.out.println("done!!!");
>
> >        }
>
> > }
>
> > On Jan 27, 10:00 pm, BeyondLimit <[hidden email]> wrote:
> >> I know what you are trying to say , but if i have a data of 2 gb which
> >> i am trying to insert in to ES , its not be possible that the index
> >> size will remain only 13 mb.
> >> I thing i am doing something wrong , but dont know what , its like i
> >> am adding data not in append mode , it replaces the old data.
> >> Regards
> >> Prashant
>
> >> On Jan 27, 9:53 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
>
> >> > You can check the number of documents via the API or using BigDesk to get a
> >> > better indication of how many docs are indexed.
>
> >> > Regards,
> >> > Berkay Mollamustafaoglu
> >> > mberkay on yahoo, google and skype
>
> >> > On Fri, Jan 27, 2012 at 11:43 AM, BeyondLimit <[hidden email]>wrote:
>
> >> > > I mean to say , while doing first insert i see the size of index
> >> > > changes to 13 mb , then after 2nd insert the size changes to some 12
> >> > > mb of so.According to the logic it should grow the size to 25 mb or
> >> > > more .... it should keep growing.
> >> > > Regards
> >> > > Prashant
>
> >> > > On Jan 27, 9:37 pm, Berkay Mollamustafaoglu <[hidden email]> wrote:
> >> > > > What do you mean by "loose previous data". Do you mean you don't see it
> >> > > in
> >> > > > the index? How do you check it, run a query? It may take a while for the
> >> > > > docs to be indexed, depending on the size of your docs and resources
> >> > > > available.
>
> >> > > > Even 10K is a lot. You don't have to do such large chunks with ES. Try
> >> > > > starting with 100 docs, and see how the performance is.
>
> >> > > > Regards,
> >> > > > Berkay Mollamustafaoglu
> >> > > > mberkay on yahoo, google and skype
>
> >> > > > On Fri, Jan 27, 2012 at 11:29 AM, BeyondLimit <[hidden email]
> >> > > >wrote:
>
> >> > > > > Thanks for the reply. But i have another problem when i call "
> >> > > > > brb.execute().actionGet(); " again i loose previous data what i
> >> > > > > inserted previously.
> >> > > > > I mean to say , Suppose i looped 10k record and then call   "
> >> > > > > brb.execute().actionGet(); " and after that i try to insert another
> >> > > > > 10k by repeating the same action  " brb.execute().actionGet(); " , now
> >> > > > > i loose previous data.
> >> > > > > It will be great help if you can send me some sample Java Code.
> >> > > > > Thanks in advance.
> >> > > > > Regards
> >> > > > > Prashant
>
> >> > > > > On Jan 27, 9:21 pm, Craig Brown <[hidden email]> wrote:
> >> > > > > > I'd probably try inserting something less that 100k records at a
> >> > > time. We
> >> > > > > > usually do blocks of 10K at a time. Even with multiple threads
> >> > > running, I
> >> > > > > > haven't seen any problems over many millions of records.
>
> >> > > > > >   - Craig
>
> >> > > > > > On Thu, Jan 26, 2012 at 11:31 PM, BeyondLimit <
> >> > > [hidden email]
> >> > > > > >wrote:
>
> >> > > > > > > Hi , there
> >> > > > > > > I have just started using Elastic Search , my problem is i have to
> >> > > > > > > insert 50 million of data rows in the Elastic Search . While doing
> >> > > > > > > this i have used such code in the loop.
> >> > > > > > > public static void main(String[] args) throws Exception {
> >> > > > > > >                // TODO Auto-generated method stub
>
> >> > > > > > >                 Node node = nodeBuilder().local(true).
>
> >> > > > > > >  settings(ImmutableSettings.settingsBuilder().
>
> >> > >  put("index.number_of_shards",
> >> > > > > 1).
>
> >> > >  put("index.number_of_replicas",
> >> > > > > 1).
> >> > > > > > >                                        build()).build().start();
> >> > > > > > >                Client  client = node.client();
> >> > > > > > >                         String mapping =
> >> > > > > > > XContentFactory.jsonBuilder().startObject().startObject("type1")
>
> >> > > > > > >  .startObject("properties").startObject("location").field("type",
> >> > > > > > > "geo_point").field("lat_lon", true).endObject().endObject()
>
> >> > >  .endObject().endObject().string();
>
> >> > > > > > > client.admin().indices().prepareCreate("test").addMapping("type1",
> >> > > > > > > mapping).setSettings(settingsBuilder().put("number_of_shards",
> >> > > > > > > "1")).execute().actionGet();
> >> > > > > > >                BulkRequestBuilder brb = client.prepareBulk();
>
> >> > > > > > >                  FileInputStream fstream = new
> >> > > > > > > FileInputStream("e:\\yp_CA.txt");
> >> > > > > > >                  // Get the object of DataInputStream
> >> > > > > > >                  DataInputStream in = new DataInputStream(fstream);
> >> > > > > > >                  BufferedReader br = new BufferedReader(new
> >> > > > > > > InputStreamReader(in));
> >> > > > > > >                  String strLine;
> >> > > > > > >                  long start=System.currentTimeMillis();
> >> > > > > > >                  int i=0;
> >> > > > > > >                  //Read File Line By Line
> >> > > > > > >                  try{
> >> > > > > > >                        ...
>
> read more »