1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.client;
20
21 import org.apache.hadoop.hbase.classification.InterfaceAudience;
22 import org.apache.hadoop.hbase.HRegionLocation;
23
24 import java.io.IOException;
25 import java.io.InterruptedIOException;
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.List;
29 import java.util.Map;
30
31 /**
32 * Utility class for HTable.
33 *
34 * @deprecated since 1.0
35 */
36 @InterfaceAudience.Private
37 @Deprecated
38 public class HTableUtil {
39
40 private static final int INITIAL_LIST_SIZE = 250;
41
42 /**
43 * Processes a List of Puts and writes them to an HTable instance in RegionServer buckets via the htable.put method.
44 * This will utilize the writeBuffer, thus the writeBuffer flush frequency may be tuned accordingly via htable.setWriteBufferSize.
45 * <br><br>
46 * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs in each flush.
47 * <br><br>
48 * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region,
49 * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this.
50 * <br>
51 * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem.
52 * <br>
53 * Assumption #3: That the input list of Puts is big enough to be useful (in the thousands or more). The intent of this
54 * method is to process larger chunks of data.
55 * <br>
56 * Assumption #4: htable.setAutoFlush(false) has been set. This is a requirement to use the writeBuffer.
57 * <br><br>
58 * @param htable HTable instance for target HBase table
59 * @param puts List of Put instances
60 * @throws IOException if a remote or network exception occurs
61 *
62 */
63 public static void bucketRsPut(HTable htable, List<Put> puts) throws IOException {
64
65 Map<String, List<Put>> putMap = createRsPutMap(htable.getRegionLocator(), puts);
66 for (List<Put> rsPuts: putMap.values()) {
67 htable.put( rsPuts );
68 }
69 htable.flushCommits();
70 }
71
72 /**
73 * Processes a List of Rows (Put, Delete) and writes them to an HTable instance in RegionServer buckets via the htable.batch method.
74 * <br><br>
75 * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs, thus this will
76 * produce one RPC of Puts per RegionServer.
77 * <br><br>
78 * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region,
79 * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this.
80 * <br>
81 * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem.
82 * <br>
83 * Assumption #3: That the input list of Rows is big enough to be useful (in the thousands or more). The intent of this
84 * method is to process larger chunks of data.
85 * <br><br>
86 * This method accepts a list of Row objects because the underlying .batch method accepts a list of Row objects.
87 * <br><br>
88 * @param htable HTable instance for target HBase table
89 * @param rows List of Row instances
90 * @throws IOException if a remote or network exception occurs
91 */
92 public static void bucketRsBatch(HTable htable, List<Row> rows) throws IOException {
93
94 try {
95 Map<String, List<Row>> rowMap = createRsRowMap(htable.getRegionLocator(), rows);
96 for (List<Row> rsRows: rowMap.values()) {
97 htable.batch( rsRows );
98 }
99 } catch (InterruptedException e) {
100 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
101 }
102
103 }
104
105 private static Map<String,List<Put>> createRsPutMap(RegionLocator htable, List<Put> puts) throws IOException {
106
107 Map<String, List<Put>> putMap = new HashMap<String, List<Put>>();
108 for (Put put: puts) {
109 HRegionLocation rl = htable.getRegionLocation( put.getRow() );
110 String hostname = rl.getHostname();
111 List<Put> recs = putMap.get( hostname);
112 if (recs == null) {
113 recs = new ArrayList<Put>(INITIAL_LIST_SIZE);
114 putMap.put( hostname, recs);
115 }
116 recs.add(put);
117 }
118 return putMap;
119 }
120
121 private static Map<String,List<Row>> createRsRowMap(RegionLocator htable, List<Row> rows) throws IOException {
122
123 Map<String, List<Row>> rowMap = new HashMap<String, List<Row>>();
124 for (Row row: rows) {
125 HRegionLocation rl = htable.getRegionLocation( row.getRow() );
126 String hostname = rl.getHostname();
127 List<Row> recs = rowMap.get( hostname);
128 if (recs == null) {
129 recs = new ArrayList<Row>(INITIAL_LIST_SIZE);
130 rowMap.put( hostname, recs);
131 }
132 recs.add(row);
133 }
134 return rowMap;
135 }
136
137 }