1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.List;
23
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.classification.InterfaceStability;
26 import org.apache.hadoop.conf.Configurable;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.hbase.client.Scan;
29
30 /**
31 * Convert HBase tabular data from multiple scanners into a format that
32 * is consumable by Map/Reduce.
33 *
34 * <p>
35 * Usage example
36 * </p>
37 *
38 * <pre>
39 * List<Scan> scans = new ArrayList<Scan>();
40 *
41 * Scan scan1 = new Scan();
42 * scan1.setStartRow(firstRow1);
43 * scan1.setStopRow(lastRow1);
44 * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table1);
45 * scans.add(scan1);
46 *
47 * Scan scan2 = new Scan();
48 * scan2.setStartRow(firstRow2);
49 * scan2.setStopRow(lastRow2);
50 * scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, table2);
51 * scans.add(scan2);
52 *
53 * TableMapReduceUtil.initTableMapperJob(scans, TableMapper.class, Text.class,
54 * IntWritable.class, job);
55 * </pre>
56 */
57 @InterfaceAudience.Public
58 @InterfaceStability.Evolving
59 public class MultiTableInputFormat extends MultiTableInputFormatBase implements
60 Configurable {
61
62 /** Job parameter that specifies the scan list. */
63 public static final String SCANS = "hbase.mapreduce.scans";
64
65 /** The configuration. */
66 private Configuration conf = null;
67
68 /**
69 * Returns the current configuration.
70 *
71 * @return The current configuration.
72 * @see org.apache.hadoop.conf.Configurable#getConf()
73 */
74 @Override
75 public Configuration getConf() {
76 return conf;
77 }
78
79 /**
80 * Sets the configuration. This is used to set the details for the tables to
81 * be scanned.
82 *
83 * @param configuration The configuration to set.
84 * @see org.apache.hadoop.conf.Configurable#setConf(
85 * org.apache.hadoop.conf.Configuration)
86 */
87 @Override
88 public void setConf(Configuration configuration) {
89 this.conf = configuration;
90 String[] rawScans = conf.getStrings(SCANS);
91 if (rawScans.length <= 0) {
92 throw new IllegalArgumentException("There must be at least 1 scan configuration set to : "
93 + SCANS);
94 }
95 List<Scan> scans = new ArrayList<Scan>();
96
97 for (int i = 0; i < rawScans.length; i++) {
98 try {
99 scans.add(TableMapReduceUtil.convertStringToScan(rawScans[i]));
100 } catch (IOException e) {
101 throw new RuntimeException("Failed to convert Scan : " + rawScans[i] + " to string", e);
102 }
103 }
104 this.setScans(scans);
105 }
106 }