1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.List;
23 import java.util.Map;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.classification.InterfaceStability;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.hbase.HTableDescriptor;
31 import org.apache.hadoop.hbase.KeyValue;
32 import org.apache.hadoop.hbase.client.HTable;
33 import org.apache.hadoop.hbase.client.Table;
34 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
35 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
36 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
37 import org.apache.hadoop.hbase.regionserver.BloomType;
38 import org.apache.hadoop.mapreduce.Job;
39 import org.apache.hadoop.mapreduce.RecordWriter;
40 import org.apache.hadoop.mapreduce.TaskAttemptContext;
41 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
42
43 import com.google.common.annotations.VisibleForTesting;
44
45 /**
46 * Writes HFiles. Passed KeyValues must arrive in order.
47 * Writes current time as the sequence id for the file. Sets the major compacted
48 * attribute on created hfiles. Calling write(null,null) will forcibly roll
49 * all HFiles being written.
50 * <p>
51 * Using this class as part of a MapReduce job is best done
52 * using {@link #configureIncrementalLoad(Job, HTable)}.
53 * @see KeyValueSortReducer
54 * @deprecated use {@link HFileOutputFormat2} instead.
55 */
56 @Deprecated
57 @InterfaceAudience.Public
58 @InterfaceStability.Stable
59 public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable, KeyValue> {
60 static Log LOG = LogFactory.getLog(HFileOutputFormat.class);
61
62 // This constant is public since the client can modify this when setting
63 // up their conf object and thus refer to this symbol.
64 // It is present for backwards compatibility reasons. Use it only to
65 // override the auto-detection of datablock encoding.
66 public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
67 HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;
68
69 @Override
70 public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(
71 final TaskAttemptContext context) throws IOException, InterruptedException {
72 return HFileOutputFormat2.createRecordWriter(context);
73 }
74
75 /**
76 * Configure a MapReduce Job to perform an incremental load into the given
77 * table. This
78 * <ul>
79 * <li>Inspects the table to configure a total order partitioner</li>
80 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
81 * <li>Sets the number of reduce tasks to match the current number of regions</li>
82 * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li>
83 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
84 * PutSortReducer)</li>
85 * </ul>
86 * The user should be sure to set the map output value class to either KeyValue or Put before
87 * running this function.
88 */
89 public static void configureIncrementalLoad(Job job, HTable table)
90 throws IOException {
91 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(),
92 table.getRegionLocator());
93 }
94
95 /**
96 * Runs inside the task to deserialize column family to compression algorithm
97 * map from the configuration.
98 *
99 * @param conf to read the serialized values from
100 * @return a map from column family to the configured compression algorithm
101 */
102 @VisibleForTesting
103 static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
104 conf) {
105 return HFileOutputFormat2.createFamilyCompressionMap(conf);
106 }
107
108 /**
109 * Runs inside the task to deserialize column family to bloom filter type
110 * map from the configuration.
111 *
112 * @param conf to read the serialized values from
113 * @return a map from column family to the the configured bloom filter type
114 */
115 @VisibleForTesting
116 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
117 return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
118 }
119
120 /**
121 * Runs inside the task to deserialize column family to block size
122 * map from the configuration.
123 *
124 * @param conf to read the serialized values from
125 * @return a map from column family to the configured block size
126 */
127 @VisibleForTesting
128 static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
129 return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
130 }
131
132 /**
133 * Runs inside the task to deserialize column family to data block encoding
134 * type map from the configuration.
135 *
136 * @param conf to read the serialized values from
137 * @return a map from column family to HFileDataBlockEncoder for the
138 * configured data block type for the family
139 */
140 @VisibleForTesting
141 static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
142 Configuration conf) {
143 return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
144 }
145
146 /**
147 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
148 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
149 */
150 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
151 throws IOException {
152 HFileOutputFormat2.configurePartitioner(job, splitPoints);
153 }
154
155 static void configureCompression(Table table, Configuration conf) throws IOException {
156 HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());
157 }
158
159 /**
160 * Serialize column family to block size map to configuration.
161 * Invoked while configuring the MR job for incremental load.
162 *
163 * @param table to read the properties from
164 * @param conf to persist serialized values into
165 * @throws IOException
166 * on failure to read column family descriptors
167 */
168 @VisibleForTesting
169 static void configureBlockSize(Table table, Configuration conf) throws IOException {
170 HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);
171 }
172
173 /**
174 * Serialize column family to bloom type map to configuration.
175 * Invoked while configuring the MR job for incremental load.
176 *
177 * @param table to read the properties from
178 * @param conf to persist serialized values into
179 * @throws IOException
180 * on failure to read column family descriptors
181 */
182 @VisibleForTesting
183 static void configureBloomType(Table table, Configuration conf) throws IOException {
184 HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);
185 }
186
187 /**
188 * Serialize column family to data block encoding map to configuration.
189 * Invoked while configuring the MR job for incremental load.
190 *
191 * @param table to read the properties from
192 * @param conf to persist serialized values into
193 * @throws IOException
194 * on failure to read column family descriptors
195 */
196 @VisibleForTesting
197 static void configureDataBlockEncoding(Table table,
198 Configuration conf) throws IOException {
199 HTableDescriptor tableDescriptor = table.getTableDescriptor();
200 HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);
201 }
202 }