1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with this
4 * work for additional information regarding copyright ownership. The ASF
5 * licenses this file to you under the Apache License, Version 2.0 (the
6 * "License"); you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 * License for the specific language governing permissions and limitations
15 * under the License.
16 */
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import java.io.DataInputStream;
20 import java.io.DataOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.Cell;
26 import org.apache.hadoop.hbase.KeyValue.KVComparator;
27 import org.apache.hadoop.hbase.io.hfile.HFileContext;
28
29 /**
30 * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
31 * <ul>
32 * <li>the KeyValues are stored sorted by key</li>
33 * <li>we know the structure of KeyValue</li>
34 * <li>the values are always iterated forward from beginning of block</li>
35 * <li>knowledge of Key Value format</li>
36 * </ul>
37 * It is designed to work fast enough to be feasible as in memory compression.
38 */
39 @InterfaceAudience.Private
40 public interface DataBlockEncoder {
41
42 /**
43 * Starts encoding for a block of KeyValues. Call
44 * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish
45 * encoding of a block.
46 * @param encodingCtx
47 * @param out
48 * @throws IOException
49 */
50 void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out)
51 throws IOException;
52
53 /**
54 * Encodes a KeyValue.
55 * @param cell
56 * @param encodingCtx
57 * @param out
58 * @return unencoded kv size written
59 * @throws IOException
60 */
61 int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
62 throws IOException;
63
64 /**
65 * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing
66 * stuff for the encoded block. It must be called at the end of block encoding.
67 * @param encodingCtx
68 * @param out
69 * @param uncompressedBytesWithHeader
70 * @throws IOException
71 */
72 void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
73 byte[] uncompressedBytesWithHeader) throws IOException;
74
75 /**
76 * Decode.
77 * @param source Compressed stream of KeyValues.
78 * @param decodingCtx
79 * @return Uncompressed block of KeyValues.
80 * @throws IOException If there is an error in source.
81 */
82 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
83 throws IOException;
84
85 /**
86 * Return first key in block. Useful for indexing. Typically does not make
87 * a deep copy but returns a buffer wrapping a segment of the actual block's
88 * byte array. This is because the first key in block is usually stored
89 * unencoded.
90 * @param block encoded block we want index, the position will not change
91 * @return First key in block.
92 */
93 ByteBuffer getFirstKeyInBlock(ByteBuffer block);
94
95 /**
96 * Create a HFileBlock seeker which find KeyValues within a block.
97 * @param comparator what kind of comparison should be used
98 * @param decodingCtx
99 * @return A newly created seeker.
100 */
101 EncodedSeeker createSeeker(KVComparator comparator,
102 HFileBlockDecodingContext decodingCtx);
103
104 /**
105 * Creates a encoder specific encoding context
106 *
107 * @param encoding
108 * encoding strategy used
109 * @param headerBytes
110 * header bytes to be written, put a dummy header here if the header
111 * is unknown
112 * @param meta
113 * HFile meta data
114 * @return a newly created encoding context
115 */
116 HFileBlockEncodingContext newDataBlockEncodingContext(
117 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
118
119 /**
120 * Creates an encoder specific decoding context, which will prepare the data
121 * before actual decoding
122 *
123 * @param meta
124 * HFile meta data
125 * @return a newly created decoding context
126 */
127 HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta);
128
129 /**
130 * An interface which enable to seek while underlying data is encoded.
131 *
132 * It works on one HFileBlock, but it is reusable. See
133 * {@link #setCurrentBuffer(ByteBuffer)}.
134 */
135 interface EncodedSeeker {
136 /**
137 * Set on which buffer there will be done seeking.
138 * @param buffer Used for seeking.
139 */
140 void setCurrentBuffer(ByteBuffer buffer);
141
142 /**
143 * Does a deep copy of the key at the current position. A deep copy is
144 * necessary because buffers are reused in the decoder.
145 * @return key at current position
146 */
147 ByteBuffer getKeyDeepCopy();
148
149 /**
150 * Does a shallow copy of the value at the current position. A shallow
151 * copy is possible because the returned buffer refers to the backing array
152 * of the original encoded buffer.
153 * @return value at current position
154 */
155 ByteBuffer getValueShallowCopy();
156
157 //TODO : to be removed - currently used in testcases only
158 /** @return a key value buffer with the position set at the beginning of the buffer */
159 ByteBuffer getKeyValueBuffer();
160
161 /**
162 * @return the KeyValue object at the current position. Includes memstore
163 * timestamp.
164 */
165 Cell getKeyValue();
166
167 /** Set position to beginning of given block */
168 void rewind();
169
170 /**
171 * Move to next position
172 * @return true on success, false if there is no more positions.
173 */
174 boolean next();
175
176 /**
177 * Moves the seeker position within the current block to:
178 * <ul>
179 * <li>the last key that that is less than or equal to the given key if
180 * <code>seekBefore</code> is false</li>
181 * <li>the last key that is strictly less than the given key if <code>
182 * seekBefore</code> is true. The caller is responsible for loading the
183 * previous block if the requested key turns out to be the first key of the
184 * current block.</li>
185 * </ul>
186 * @param key byte array containing the key
187 * @param offset key position the array
188 * @param length key length in bytes
189 * @param seekBefore find the key strictly less than the given key in case
190 * of an exact match. Does not matter in case of an inexact match.
191 * @return 0 on exact match, 1 on inexact match.
192 */
193 @Deprecated
194 int seekToKeyInBlock(
195 byte[] key, int offset, int length, boolean seekBefore
196 );
197 /**
198 * Moves the seeker position within the current block to:
199 * <ul>
200 * <li>the last key that that is less than or equal to the given key if
201 * <code>seekBefore</code> is false</li>
202 * <li>the last key that is strictly less than the given key if <code>
203 * seekBefore</code> is true. The caller is responsible for loading the
204 * previous block if the requested key turns out to be the first key of the
205 * current block.</li>
206 * </ul>
207 * @param key - Cell to which the seek should happen
208 * @param seekBefore find the key strictly less than the given key in case
209 * of an exact match. Does not matter in case of an inexact match.
210 * @return 0 on exact match, 1 on inexact match.
211 */
212 int seekToKeyInBlock(Cell key, boolean seekBefore);
213
214 /**
215 * Compare the given key against the current key
216 * @param comparator
217 * @param key
218 * @param offset
219 * @param length
220 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
221 */
222 public int compareKey(KVComparator comparator, byte[] key, int offset, int length);
223
224 public int compareKey(KVComparator comparator, Cell key);
225 }
226 }