1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.util;
19
20 import static org.apache.hadoop.hbase.util.Order.ASCENDING;
21 import static org.apache.hadoop.hbase.util.Order.DESCENDING;
22
23 import java.math.BigDecimal;
24 import java.math.BigInteger;
25 import java.math.MathContext;
26 import java.math.RoundingMode;
27 import java.nio.charset.Charset;
28
29 import org.apache.hadoop.hbase.classification.InterfaceAudience;
30 import org.apache.hadoop.hbase.classification.InterfaceStability;
31
32 import com.google.common.annotations.VisibleForTesting;
33
34 /**
35 * Utility class that handles ordered byte arrays. That is, unlike
36 * {@link Bytes}, these methods produce byte arrays which maintain the sort
37 * order of the original values.
38 * <h3>Encoding Format summary</h3>
39 * <p>
40 * Each value is encoded as one or more bytes. The first byte of the encoding,
41 * its meaning, and a terse description of the bytes that follow is given by
42 * the following table:
43 * <table>
44 * <tr><th>Content Type</th><th>Encoding</th></tr>
45 * <tr><td>NULL</td><td>0x05</td></tr>
46 * <tr><td>negative infinity</td><td>0x07</td></tr>
47 * <tr><td>negative large</td><td>0x08, ~E, ~M</td></tr>
48 * <tr><td>negative medium</td><td>0x13-E, ~M</td></tr>
49 * <tr><td>negative small</td><td>0x14, -E, ~M</td></tr>
50 * <tr><td>zero</td><td>0x15</td></tr>
51 * <tr><td>positive small</td><td>0x16, ~-E, M</td></tr>
52 * <tr><td>positive medium</td><td>0x17+E, M</td></tr>
53 * <tr><td>positive large</td><td>0x22, E, M</td></tr>
54 * <tr><td>positive infinity</td><td>0x23</td></tr>
55 * <tr><td>NaN</td><td>0x25</td></tr>
56 * <tr><td>fixed-length 32-bit integer</td><td>0x27, I</td></tr>
57 * <tr><td>fixed-length 64-bit integer</td><td>0x28, I</td></tr>
58 * <tr><td>fixed-length 8-bit integer</td><td>0x29</td></tr>
59 * <tr><td>fixed-length 16-bit integer</td><td>0x2a</td></tr>
60 * <tr><td>fixed-length 32-bit float</td><td>0x30, F</td></tr>
61 * <tr><td>fixed-length 64-bit float</td><td>0x31, F</td></tr>
62 * <tr><td>TEXT</td><td>0x33, T</td></tr>
63 * <tr><td>variable length BLOB</td><td>0x35, B</td></tr>
64 * <tr><td>byte-for-byte BLOB</td><td>0x36, X</td></tr>
65 * </table>
66 * </p>
67 *
68 * <h3>Null Encoding</h3>
69 * <p>
70 * Each value that is a NULL encodes as a single byte of 0x05. Since every
71 * other value encoding begins with a byte greater than 0x05, this forces NULL
72 * values to sort first.
73 * </p>
74 * <h3>Text Encoding</h3>
75 * <p>
76 * Each text value begins with a single byte of 0x33 and ends with a single
77 * byte of 0x00. There are zero or more intervening bytes that encode the text
78 * value. The intervening bytes are chosen so that the encoding will sort in
79 * the desired collating order. The intervening bytes may not contain a 0x00
80 * character; the only 0x00 byte allowed in a text encoding is the final byte.
81 * </p>
82 * <p>
83 * The text encoding ends in 0x00 in order to ensure that when there are two
84 * strings where one is a prefix of the other that the shorter string will
85 * sort first.
86 * </p>
87 * <h3>Binary Encoding</h3>
88 * <p>
89 * There are two encoding strategies for binary fields, referred to as
90 * "BlobVar" and "BlobCopy". BlobVar is less efficient in both space and
91 * encoding time. It has no limitations on the range of encoded values.
92 * BlobCopy is a byte-for-byte copy of the input data followed by a
93 * termination byte. It is extremely fast to encode and decode. It carries the
94 * restriction of not allowing a 0x00 value in the input byte[] as this value
95 * is used as the termination byte.
96 * </p>
97 * <h4>BlobVar</h4>
98 * <p>
99 * "BlobVar" encodes the input byte[] in a manner similar to a variable length
100 * integer encoding. As with the other {@code OrderedBytes} encodings,
101 * the first encoded byte is used to indicate what kind of value follows. This
102 * header byte is 0x37 for BlobVar encoded values. As with the traditional
103 * varint encoding, the most significant bit of each subsequent encoded
104 * {@code byte} is used as a continuation marker. The 7 remaining bits
105 * contain the 7 most significant bits of the first unencoded byte. The next
106 * encoded byte starts with a continuation marker in the MSB. The least
107 * significant bit from the first unencoded byte follows, and the remaining 6
108 * bits contain the 6 MSBs of the second unencoded byte. The encoding
109 * continues, encoding 7 bytes on to 8 encoded bytes. The MSB of the final
110 * encoded byte contains a termination marker rather than a continuation
111 * marker, and any remaining bits from the final input byte. Any trailing bits
112 * in the final encoded byte are zeros.
113 * </p>
114 * <h4>BlobCopy</h4>
115 * <p>
116 * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x38
117 * as the header byte, and is terminated by 0x00 in the DESCENDING case. This
118 * alternative encoding is faster and more space-efficient, but it cannot
119 * accept values containing a 0x00 byte in DESCENDING order.
120 * </p>
121 * <h3>Variable-length Numeric Encoding</h3>
122 * <p>
123 * Numeric values must be coded so as to sort in numeric order. We assume that
124 * numeric values can be both integer and floating point values. Clients must
125 * be careful to use inspection methods for encoded values (such as
126 * {@link #isNumericInfinite(PositionedByteRange)} and
127 * {@link #isNumericNaN(PositionedByteRange)} to protect against decoding
128 * values into object which do not support these numeric concepts (such as
129 * {@link Long} and {@link BigDecimal}).
130 * </p>
131 * <p>
132 * Simplest cases first: If the numeric value is a NaN, then the encoding is a
133 * single byte of 0x25. This causes NaN values to sort after every other
134 * numeric value.
135 * </p>
136 * <p>
137 * If the numeric value is a negative infinity then the encoding is a single
138 * byte of 0x07. Since every other numeric value except NaN has a larger
139 * initial byte, this encoding ensures that negative infinity will sort prior
140 * to every other numeric value other than NaN.
141 * </p>
142 * <p>
143 * If the numeric value is a positive infinity then the encoding is a single
144 * byte of 0x23. Every other numeric value encoding begins with a smaller
145 * byte, ensuring that positive infinity always sorts last among numeric
146 * values. 0x23 is also smaller than 0x33, the initial byte of a text value,
147 * ensuring that every numeric value sorts before every text value.
148 * </p>
149 * <p>
150 * If the numeric value is exactly zero then it is encoded as a single byte of
151 * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14
152 * and finite positive values will have initial bytes of 0x16 through 0x22.
153 * </p>
154 * <p>
155 * For all numeric values, we compute a mantissa M and an exponent E. The
156 * mantissa is a base-100 representation of the value. The exponent E
157 * determines where to put the decimal point.
158 * </p>
159 * <p>
160 * Each centimal digit of the mantissa is stored in a byte. If the value of
161 * the centimal digit is X (hence X≥0 and X≤99) then the byte value will
162 * be 2*X+1 for every byte of the mantissa, except for the last byte which
163 * will be 2*X+0. The mantissa must be the minimum number of bytes necessary
164 * to represent the value; trailing X==0 digits are omitted. This means that
165 * the mantissa will never contain a byte with the value 0x00.
166 * </p>
167 * <p>
168 * If we assume all digits of the mantissa occur to the right of the decimal
169 * point, then the exponent E is the power of one hundred by which one must
170 * multiply the mantissa to recover the original value.
171 * </p>
172 * <p>
173 * Values are classified as large, medium, or small according to the value of
174 * E. If E is 11 or more, the value is large. For E between 0 and 10, the
175 * value is medium. For E less than zero, the value is small.
176 * </p>
177 * <p>
178 * Large positive values are encoded as a single byte 0x22 followed by E as a
179 * varint and then M. Medium positive values are a single byte of 0x17+E
180 * followed by M. Small positive values are encoded as a single byte 0x16
181 * followed by the ones-complement of the varint for -E followed by M.
182 * </p>
183 * <p>
184 * Small negative values are encoded as a single byte 0x14 followed by -E as a
185 * varint and then the ones-complement of M. Medium negative values are
186 * encoded as a byte 0x13-E followed by the ones-complement of M. Large
187 * negative values consist of the single byte 0x08 followed by the
188 * ones-complement of the varint encoding of E followed by the ones-complement
189 * of M.
190 * </p>
191 * <h3>Fixed-length Integer Encoding</h3>
192 * <p>
193 * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte
194 * format. All 8-byte integers are serialized to the equivelant 9-byte format.
195 * Serialization is performed by writing a header byte, inverting the integer
196 * sign bit and writing the resulting bytes to the byte array in big endian
197 * order.
198 * </p>
199 * <h3>Fixed-length Floating Point Encoding</h3>
200 * <p>
201 * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte
202 * encoding format, respectively. The format is identical, save for the
203 * precision respected in each step of the operation.
204 * <p>
205 * This format ensures the following total ordering of floating point values:
206 * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... <
207 * -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ... <
208 * Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN
209 * </p>
210 * <p>
211 * Floating point numbers are encoded as specified in IEEE 754. A 32-bit
212 * single precision float consists of a sign bit, 8-bit unsigned exponent
213 * encoded in offset-127 notation, and a 23-bit significand. The format is
214 * described further in the <a
215 * href="http://en.wikipedia.org/wiki/Single_precision"> Single Precision
216 * Floating Point Wikipedia page</a>
217 * </p>
218 * <p>
219 * The value of a normal float is -1 <sup>sign bit</sup> ×
220 * 2<sup>exponent - 127</sup> × 1.significand
221 * </p>
222 * <p>
223 * The IEE754 floating point format already preserves sort ordering for
224 * positive floating point numbers when the raw bytes are compared in most
225 * significant byte order. This is discussed further at <a href=
226 * "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm">
227 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm</a>
228 * </p>
229 * <p>
230 * Thus, we need only ensure that negative numbers sort in the the exact
231 * opposite order as positive numbers (so that say, negative infinity is less
232 * than negative 1), and that all negative numbers compare less than any
233 * positive number. To accomplish this, we invert the sign bit of all floating
234 * point numbers, and we also invert the exponent and significand bits if the
235 * floating point number was negative.
236 * </p>
237 * <p>
238 * More specifically, we first store the floating point bits into a 32-bit int
239 * {@code j} using {@link Float#floatToIntBits}. This method collapses
240 * all NaNs into a single, canonical NaN value but otherwise leaves the bits
241 * unchanged. We then compute
242 * </p>
243 *
244 * <pre>
245 * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE
246 * </pre>
247 * <p>
248 * which inverts the sign bit and XOR's all other bits with the sign bit
249 * itself. Comparing the raw bytes of {@code j} in most significant byte
250 * order is equivalent to performing a single precision floating point
251 * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't
252 * compare equal to anything when performing floating point comparisons).
253 * </p>
254 * <p>
255 * The resulting integer is then converted into a byte array by serializing
256 * the integer one byte at a time in most significant byte order. The
257 * serialized integer is prefixed by a single header byte. All serialized
258 * values are 5 bytes in length.
259 * </p>
260 * <p>
261 * {@code OrderedBytes} encodings are heavily influenced by the <a href="
262 * http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki">SQLite4 Key
263 * Encoding</a>. Slight deviations are make in the interest of order
264 * correctness and user extensibility. Fixed-width {@code Long} and
265 * {@link Double} encodings are based on implementations from the now defunct
266 * Orderly library.
267 * </p>
268 */
269 @InterfaceAudience.Public
270 @InterfaceStability.Evolving
271 public class OrderedBytes {
272
273 /*
274 * These constants define header bytes used to identify encoded values. Note
275 * that the values here are not exhaustive as the Numeric format encodes
276 * portions of its value within the header byte. The values listed here are
277 * directly applied to persisted data -- DO NOT modify the values specified
278 * here. Instead, gaps are placed intentionally between values so that new
279 * implementations can be inserted into the total ordering enforced here.
280 */
281 private static final byte NULL = 0x05;
282 // room for 1 expansion type
283 private static final byte NEG_INF = 0x07;
284 private static final byte NEG_LARGE = 0x08;
285 private static final byte NEG_MED_MIN = 0x09;
286 private static final byte NEG_MED_MAX = 0x13;
287 private static final byte NEG_SMALL = 0x14;
288 private static final byte ZERO = 0x15;
289 private static final byte POS_SMALL = 0x16;
290 private static final byte POS_MED_MIN = 0x17;
291 private static final byte POS_MED_MAX = 0x21;
292 private static final byte POS_LARGE = 0x22;
293 private static final byte POS_INF = 0x23;
294 // room for 2 expansion type
295 private static final byte NAN = 0x26;
296 // room for 2 expansion types
297 private static final byte FIXED_INT8 = 0x29;
298 private static final byte FIXED_INT16 = 0x2a;
299 private static final byte FIXED_INT32 = 0x2b;
300 private static final byte FIXED_INT64 = 0x2c;
301 // room for 3 expansion types
302 private static final byte FIXED_FLOAT32 = 0x30;
303 private static final byte FIXED_FLOAT64 = 0x31;
304 // room for 2 expansion type
305 private static final byte TEXT = 0x34;
306 // room for 2 expansion type
307 private static final byte BLOB_VAR = 0x37;
308 private static final byte BLOB_COPY = 0x38;
309
310 /*
311 * The following constant values are used by encoding implementations
312 */
313
314 public static final Charset UTF8 = Charset.forName("UTF-8");
315 private static final byte TERM = 0x00;
316 private static final BigDecimal E8 = BigDecimal.valueOf(1e8);
317 private static final BigDecimal E32 = BigDecimal.valueOf(1e32);
318 private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2);
319 private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10);
320
321 /**
322 * Max precision guaranteed to fit into a {@code long}.
323 */
324 public static final int MAX_PRECISION = 31;
325
326 /**
327 * The context used to normalize {@link BigDecimal} values.
328 */
329 public static final MathContext DEFAULT_MATH_CONTEXT =
330 new MathContext(MAX_PRECISION, RoundingMode.HALF_UP);
331
332 /**
333 * Creates the standard exception when the encoded header byte is unexpected for the decoding
334 * context.
335 * @param header value used in error message.
336 */
337 private static IllegalArgumentException unexpectedHeader(byte header) {
338 throw new IllegalArgumentException("unexpected value in first byte: 0x"
339 + Long.toHexString(header));
340 }
341
342 /**
343 * Perform unsigned comparison between two long values. Conforms to the same interface as
344 * {@link Comparator#compare(Object, Object)}.
345 */
346 private static int unsignedCmp(long x1, long x2) {
347 int cmp;
348 if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0;
349 // invert the result when either value is negative
350 if ((x1 < 0) != (x2 < 0)) return -cmp;
351 return cmp;
352 }
353
354 /**
355 * Write a 32-bit unsigned integer to {@code dst} as 4 big-endian bytes.
356 * @return number of bytes written.
357 */
358 private static int putUint32(PositionedByteRange dst, int val) {
359 dst.put((byte) (val >>> 24))
360 .put((byte) (val >>> 16))
361 .put((byte) (val >>> 8))
362 .put((byte) val);
363 return 4;
364 }
365
366 /**
367 * Encode an unsigned 64-bit unsigned integer {@code val} into {@code dst}.
368 * @param dst The destination to which encoded bytes are written.
369 * @param val The value to write.
370 * @param comp Compliment the encoded value when {@code comp} is true.
371 * @return number of bytes written.
372 */
373 @VisibleForTesting
374 static int putVaruint64(PositionedByteRange dst, long val, boolean comp) {
375 int w, y, len = 0;
376 final int offset = dst.getOffset(), start = dst.getPosition();
377 byte[] a = dst.getBytes();
378 Order ord = comp ? DESCENDING : ASCENDING;
379 if (-1 == unsignedCmp(val, 241L)) {
380 dst.put((byte) val);
381 len = dst.getPosition() - start;
382 ord.apply(a, offset + start, len);
383 return len;
384 }
385 if (-1 == unsignedCmp(val, 2288L)) {
386 y = (int) (val - 240);
387 dst.put((byte) (y / 256 + 241))
388 .put((byte) (y % 256));
389 len = dst.getPosition() - start;
390 ord.apply(a, offset + start, len);
391 return len;
392 }
393 if (-1 == unsignedCmp(val, 67824L)) {
394 y = (int) (val - 2288);
395 dst.put((byte) 249)
396 .put((byte) (y / 256))
397 .put((byte) (y % 256));
398 len = dst.getPosition() - start;
399 ord.apply(a, offset + start, len);
400 return len;
401 }
402 y = (int) val;
403 w = (int) (val >>> 32);
404 if (w == 0) {
405 if (-1 == unsignedCmp(y, 16777216L)) {
406 dst.put((byte) 250)
407 .put((byte) (y >>> 16))
408 .put((byte) (y >>> 8))
409 .put((byte) y);
410 len = dst.getPosition() - start;
411 ord.apply(a, offset + start, len);
412 return len;
413 }
414 dst.put((byte) 251);
415 putUint32(dst, y);
416 len = dst.getPosition() - start;
417 ord.apply(a, offset + start, len);
418 return len;
419 }
420 if (-1 == unsignedCmp(w, 256L)) {
421 dst.put((byte) 252)
422 .put((byte) w);
423 putUint32(dst, y);
424 len = dst.getPosition() - start;
425 ord.apply(a, offset + start, len);
426 return len;
427 }
428 if (-1 == unsignedCmp(w, 65536L)) {
429 dst.put((byte) 253)
430 .put((byte) (w >>> 8))
431 .put((byte) w);
432 putUint32(dst, y);
433 len = dst.getPosition() - start;
434 ord.apply(a, offset + start, len);
435 return len;
436 }
437 if (-1 == unsignedCmp(w, 16777216L)) {
438 dst.put((byte) 254)
439 .put((byte) (w >>> 16))
440 .put((byte) (w >>> 8))
441 .put((byte) w);
442 putUint32(dst, y);
443 len = dst.getPosition() - start;
444 ord.apply(a, offset + start, len);
445 return len;
446 }
447 dst.put((byte) 255);
448 putUint32(dst, w);
449 putUint32(dst, y);
450 len = dst.getPosition() - start;
451 ord.apply(a, offset + start, len);
452 return len;
453 }
454
455 /**
456 * Inspect {@code src} for an encoded varuint64 for its length in bytes.
457 * Preserves the state of {@code src}.
458 * @param src source buffer
459 * @param comp if true, parse the compliment of the value.
460 * @return the number of bytes consumed by this value.
461 */
462 @VisibleForTesting
463 static int lengthVaruint64(PositionedByteRange src, boolean comp) {
464 int a0 = (comp ? DESCENDING : ASCENDING).apply(src.peek()) & 0xff;
465 if (a0 <= 240) return 1;
466 if (a0 >= 241 && a0 <= 248) return 2;
467 if (a0 == 249) return 3;
468 if (a0 == 250) return 4;
469 if (a0 == 251) return 5;
470 if (a0 == 252) return 6;
471 if (a0 == 253) return 7;
472 if (a0 == 254) return 8;
473 if (a0 == 255) return 9;
474 throw unexpectedHeader(src.peek());
475 }
476
477 /**
478 * Skip {@code src} over the encoded varuint64.
479 * @param src source buffer
480 * @param cmp if true, parse the compliment of the value.
481 * @return the number of bytes skipped.
482 */
483 @VisibleForTesting
484 static int skipVaruint64(PositionedByteRange src, boolean cmp) {
485 final int len = lengthVaruint64(src, cmp);
486 src.setPosition(src.getPosition() + len);
487 return len;
488 }
489
490 /**
491 * Decode a sequence of bytes in {@code src} as a varuint64. Compliment the
492 * encoded value when {@code comp} is true.
493 * @return the decoded value.
494 */
495 @VisibleForTesting
496 static long getVaruint64(PositionedByteRange src, boolean comp) {
497 assert src.getRemaining() >= lengthVaruint64(src, comp);
498 final long ret;
499 Order ord = comp ? DESCENDING : ASCENDING;
500 byte x = src.get();
501 final int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8;
502 if (-1 == unsignedCmp(a0, 241)) {
503 return a0;
504 }
505 x = src.get();
506 a1 = ord.apply(x) & 0xff;
507 if (-1 == unsignedCmp(a0, 249)) {
508 return (a0 - 241) * 256 + a1 + 240;
509 }
510 x = src.get();
511 a2 = ord.apply(x) & 0xff;
512 if (a0 == 249) {
513 return 2288 + 256 * a1 + a2;
514 }
515 x = src.get();
516 a3 = ord.apply(x) & 0xff;
517 if (a0 == 250) {
518 return (a1 << 16) | (a2 << 8) | a3;
519 }
520 x = src.get();
521 a4 = ord.apply(x) & 0xff;
522 ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4;
523 if (a0 == 251) {
524 return ret;
525 }
526 x = src.get();
527 a5 = ord.apply(x) & 0xff;
528 if (a0 == 252) {
529 return (ret << 8) | a5;
530 }
531 x = src.get();
532 a6 = ord.apply(x) & 0xff;
533 if (a0 == 253) {
534 return (ret << 16) | (a5 << 8) | a6;
535 }
536 x = src.get();
537 a7 = ord.apply(x) & 0xff;
538 if (a0 == 254) {
539 return (ret << 24) | (a5 << 16) | (a6 << 8) | a7;
540 }
541 x = src.get();
542 a8 = ord.apply(x) & 0xff;
543 return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8;
544 }
545
546 /**
547 * Strip all trailing zeros to ensure that no digit will be zero and round
548 * using our default context to ensure precision doesn't exceed max allowed.
549 * From Phoenix's {@code NumberUtil}.
550 * @return new {@link BigDecimal} instance
551 */
552 @VisibleForTesting
553 static BigDecimal normalize(BigDecimal val) {
554 return null == val ? null : val.stripTrailingZeros().round(DEFAULT_MATH_CONTEXT);
555 }
556
557 /**
558 * Read significand digits from {@code src} according to the magnitude
559 * of {@code e}.
560 * @param src The source from which to read encoded digits.
561 * @param e The magnitude of the first digit read.
562 * @param comp Treat encoded bytes as compliments when {@code comp} is true.
563 * @return The decoded value.
564 * @throws IllegalArgumentException when read exceeds the remaining length
565 * of {@code src}.
566 */
567 private static BigDecimal decodeSignificand(PositionedByteRange src, int e, boolean comp) {
568 // TODO: can this be made faster?
569 byte[] a = src.getBytes();
570 final int start = src.getPosition(), offset = src.getOffset(), remaining = src.getRemaining();
571 Order ord = comp ? DESCENDING : ASCENDING;
572 BigDecimal m = BigDecimal.ZERO;
573 e--;
574 for (int i = 0;; i++) {
575 if (i > remaining) {
576 // we've exceeded this range's window
577 src.setPosition(start);
578 throw new IllegalArgumentException(
579 "Read exceeds range before termination byte found. offset: " + offset + " position: "
580 + (start + i));
581 }
582 // base-100 digits are encoded as val * 2 + 1 except for the termination digit.
583 m = m.add( // m +=
584 new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit]
585 BigDecimal.valueOf((ord.apply(a[offset + start + i]) & 0xff) / 2)));
586 e--;
587 // detect termination digit
588 if ((ord.apply(a[offset + start + i]) & 1) == 0) {
589 src.setPosition(start + i + 1);
590 break;
591 }
592 }
593 return normalize(m);
594 }
595
596 /**
597 * Skip {@code src} over the significand bytes.
598 * @param src The source from which to read encoded digits.
599 * @param comp Treat encoded bytes as compliments when {@code comp} is true.
600 * @return the number of bytes skipped.
601 */
602 private static int skipSignificand(PositionedByteRange src, boolean comp) {
603 byte[] a = src.getBytes();
604 final int offset = src.getOffset(), start = src.getPosition();
605 int i = src.getPosition();
606 while (((comp ? DESCENDING : ASCENDING).apply(a[offset + i++]) & 1) != 0)
607 ;
608 src.setPosition(i);
609 return i - start;
610 }
611
612 /**
613 * <p>
614 * Encode the small magnitude floating point number {@code val} using the
615 * key encoding. The caller guarantees that 1.0 > abs(val) > 0.0.
616 * </p>
617 * <p>
618 * A floating point value is encoded as an integer exponent {@code E} and a
619 * mantissa {@code M}. The original value is equal to {@code (M * 100^E)}.
620 * {@code E} is set to the smallest value possible without making {@code M}
621 * greater than or equal to 1.0.
622 * </p>
623 * <p>
624 * For this routine, {@code E} will always be zero or negative, since the
625 * original value is less than one. The encoding written by this routine is
626 * the ones-complement of the varint of the negative of {@code E} followed
627 * by the mantissa:
628 * <pre>
629 * Encoding: ~-E M
630 * </pre>
631 * </p>
632 * @param dst The destination to which encoded digits are written.
633 * @param val The value to encode.
634 * @return the number of bytes written.
635 */
636 private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
637 // TODO: this can be done faster?
638 // assert 1.0 > abs(val) > 0.0
639 BigDecimal abs = val.abs();
640 assert BigDecimal.ZERO.compareTo(abs) < 0 && BigDecimal.ONE.compareTo(abs) > 0;
641 byte[] a = dst.getBytes();
642 boolean isNeg = val.signum() == -1;
643 final int offset = dst.getOffset(), start = dst.getPosition();
644 int e = 0, d, startM;
645
646 if (isNeg) { /* Small negative number: 0x14, -E, ~M */
647 dst.put(NEG_SMALL);
648 } else { /* Small positive number: 0x16, ~-E, M */
649 dst.put(POS_SMALL);
650 }
651
652 // normalize abs(val) to determine E
653 while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; }
654 while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; }
655
656 putVaruint64(dst, e, !isNeg); // encode appropriate E value.
657
658 // encode M by peeling off centimal digits, encoding x as 2x+1
659 startM = dst.getPosition();
660 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
661 // numeric scale.
662 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
663 abs = abs.movePointRight(2);
664 d = abs.intValue();
665 dst.put((byte) ((2 * d + 1) & 0xff));
666 abs = abs.subtract(BigDecimal.valueOf(d));
667 }
668 a[offset + dst.getPosition() - 1] &= 0xfe; // terminal digit should be 2x
669 if (isNeg) {
670 // negative values encoded as ~M
671 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM);
672 }
673 return dst.getPosition() - start;
674 }
675
676 /**
677 * Encode the large magnitude floating point number {@code val} using
678 * the key encoding. The caller guarantees that {@code val} will be
679 * finite and abs(val) >= 1.0.
680 * <p>
681 * A floating point value is encoded as an integer exponent {@code E}
682 * and a mantissa {@code M}. The original value is equal to
683 * {@code (M * 100^E)}. {@code E} is set to the smallest value
684 * possible without making {@code M} greater than or equal to 1.0.
685 * </p>
686 * <p>
687 * Each centimal digit of the mantissa is stored in a byte. If the value of
688 * the centimal digit is {@code X} (hence {@code X>=0} and
689 * {@code X<=99}) then the byte value will be {@code 2*X+1} for
690 * every byte of the mantissa, except for the last byte which will be
691 * {@code 2*X+0}. The mantissa must be the minimum number of bytes
692 * necessary to represent the value; trailing {@code X==0} digits are
693 * omitted. This means that the mantissa will never contain a byte with the
694 * value {@code 0x00}.
695 * </p>
696 * <p>
697 * If {@code E > 10}, then this routine writes of {@code E} as a
698 * varint followed by the mantissa as described above. Otherwise, if
699 * {@code E <= 10}, this routine only writes the mantissa and leaves
700 * the {@code E} value to be encoded as part of the opening byte of the
701 * field by the calling function.
702 *
703 * <pre>
704 * Encoding: M (if E<=10)
705 * E M (if E>10)
706 * </pre>
707 * </p>
708 * @param dst The destination to which encoded digits are written.
709 * @param val The value to encode.
710 * @return the number of bytes written.
711 */
712 private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
713 // TODO: this can be done faster
714 BigDecimal abs = val.abs();
715 byte[] a = dst.getBytes();
716 boolean isNeg = val.signum() == -1;
717 final int start = dst.getPosition(), offset = dst.getOffset();
718 int e = 0, d, startM;
719
720 if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
721 dst.put(NEG_LARGE);
722 } else { /* Large positive number: 0x22, E, M */
723 dst.put(POS_LARGE);
724 }
725
726 // normalize abs(val) to determine E
727 while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; }
728 while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; }
729 while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; }
730
731 // encode appropriate header byte and/or E value.
732 if (e > 10) { /* large number, write out {~,}E */
733 putVaruint64(dst, e, isNeg);
734 } else {
735 if (isNeg) { /* Medium negative number: 0x13-E, ~M */
736 dst.put(start, (byte) (NEG_MED_MAX - e));
737 } else { /* Medium positive number: 0x17+E, M */
738 dst.put(start, (byte) (POS_MED_MIN + e));
739 }
740 }
741
742 // encode M by peeling off centimal digits, encoding x as 2x+1
743 startM = dst.getPosition();
744 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
745 // numeric scale.
746 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
747 abs = abs.movePointRight(2);
748 d = abs.intValue();
749 dst.put((byte) (2 * d + 1));
750 abs = abs.subtract(BigDecimal.valueOf(d));
751 }
752
753 a[offset + dst.getPosition() - 1] &= 0xfe; // terminal digit should be 2x
754 if (isNeg) {
755 // negative values encoded as ~M
756 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM);
757 }
758 return dst.getPosition() - start;
759 }
760
761 /**
762 * Encode a numerical value using the variable-length encoding.
763 * @param dst The destination to which encoded digits are written.
764 * @param val The value to encode.
765 * @param ord The {@link Order} to respect while encoding {@code val}.
766 * @return the number of bytes written.
767 */
768 public static int encodeNumeric(PositionedByteRange dst, long val, Order ord) {
769 return encodeNumeric(dst, BigDecimal.valueOf(val), ord);
770 }
771
772 /**
773 * Encode a numerical value using the variable-length encoding.
774 * @param dst The destination to which encoded digits are written.
775 * @param val The value to encode.
776 * @param ord The {@link Order} to respect while encoding {@code val}.
777 * @return the number of bytes written.
778 */
779 public static int encodeNumeric(PositionedByteRange dst, double val, Order ord) {
780 if (val == 0.0) {
781 dst.put(ord.apply(ZERO));
782 return 1;
783 }
784 if (Double.isNaN(val)) {
785 dst.put(ord.apply(NAN));
786 return 1;
787 }
788 if (val == Double.NEGATIVE_INFINITY) {
789 dst.put(ord.apply(NEG_INF));
790 return 1;
791 }
792 if (val == Double.POSITIVE_INFINITY) {
793 dst.put(ord.apply(POS_INF));
794 return 1;
795 }
796 return encodeNumeric(dst, BigDecimal.valueOf(val), ord);
797 }
798
799 /**
800 * Encode a numerical value using the variable-length encoding.
801 * @param dst The destination to which encoded digits are written.
802 * @param val The value to encode.
803 * @param ord The {@link Order} to respect while encoding {@code val}.
804 * @return the number of bytes written.
805 */
806 public static int encodeNumeric(PositionedByteRange dst, BigDecimal val, Order ord) {
807 final int len, offset = dst.getOffset(), start = dst.getPosition();
808 if (null == val) {
809 return encodeNull(dst, ord);
810 } else if (BigDecimal.ZERO.compareTo(val) == 0) {
811 dst.put(ord.apply(ZERO));
812 return 1;
813 }
814 BigDecimal abs = val.abs();
815 if (BigDecimal.ONE.compareTo(abs) <= 0) { // abs(v) >= 1.0
816 len = encodeNumericLarge(dst, normalize(val));
817 } else { // 1.0 > abs(v) >= 0.0
818 len = encodeNumericSmall(dst, normalize(val));
819 }
820 ord.apply(dst.getBytes(), offset + start, len);
821 return len;
822 }
823
824 /**
825 * Decode a {@link BigDecimal} from {@code src}. Assumes {@code src} encodes
826 * a value in Numeric encoding and is within the valid range of
827 * {@link BigDecimal} values. {@link BigDecimal} does not support {@code NaN}
828 * or {@code Infinte} values.
829 * @see #decodeNumericAsDouble(PositionedByteRange)
830 */
831 private static BigDecimal decodeNumericValue(PositionedByteRange src) {
832 final int e;
833 byte header = src.get();
834 boolean dsc = -1 == Integer.signum(header);
835 header = dsc ? DESCENDING.apply(header) : header;
836
837 if (header == NULL) return null;
838 if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */
839 e = (int) getVaruint64(src, !dsc);
840 return decodeSignificand(src, e, !dsc).negate();
841 }
842 if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) {
843 /* Medium negative number: 0x13-E, ~M */
844 e = NEG_MED_MAX - header;
845 return decodeSignificand(src, e, !dsc).negate();
846 }
847 if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */
848 e = (int) -getVaruint64(src, dsc);
849 return decodeSignificand(src, e, !dsc).negate();
850 }
851 if (header == ZERO) {
852 return BigDecimal.ZERO;
853 }
854 if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */
855 e = (int) -getVaruint64(src, !dsc);
856 return decodeSignificand(src, e, dsc);
857 }
858 if (header >= POS_MED_MIN && header <= POS_MED_MAX) {
859 /* Medium positive number: 0x17+E, M */
860 e = header - POS_MED_MIN;
861 return decodeSignificand(src, e, dsc);
862 }
863 if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */
864 e = (int) getVaruint64(src, dsc);
865 return decodeSignificand(src, e, dsc);
866 }
867 throw unexpectedHeader(header);
868 }
869
870 /**
871 * Decode a primitive {@code double} value from the Numeric encoding. Numeric
872 * encoding is based on {@link BigDecimal}; in the event the encoded value is
873 * larger than can be represented in a {@code double}, this method performs
874 * an implicit narrowing conversion as described in
875 * {@link BigDecimal#doubleValue()}.
876 * @throws NullPointerException when the encoded value is {@code NULL}.
877 * @throws IllegalArgumentException when the encoded value is not a Numeric.
878 * @see #encodeNumeric(PositionedByteRange, double, Order)
879 * @see BigDecimal#doubleValue()
880 */
881 public static double decodeNumericAsDouble(PositionedByteRange src) {
882 // TODO: should an encoded NULL value throw unexpectedHeader() instead?
883 if (isNull(src)) {
884 throw new NullPointerException("A null value cannot be decoded to a double.");
885 }
886 if (isNumericNaN(src)) {
887 src.get();
888 return Double.NaN;
889 }
890 if (isNumericZero(src)) {
891 src.get();
892 return Double.valueOf(0.0);
893 }
894
895 byte header = -1 == Integer.signum(src.peek()) ? DESCENDING.apply(src.peek()) : src.peek();
896
897 if (header == NEG_INF) {
898 src.get();
899 return Double.NEGATIVE_INFINITY;
900 } else if (header == POS_INF) {
901 src.get();
902 return Double.POSITIVE_INFINITY;
903 } else {
904 return decodeNumericValue(src).doubleValue();
905 }
906 }
907
908 /**
909 * Decode a primitive {@code long} value from the Numeric encoding. Numeric
910 * encoding is based on {@link BigDecimal}; in the event the encoded value is
911 * larger than can be represented in a {@code long}, this method performs an
912 * implicit narrowing conversion as described in
913 * {@link BigDecimal#doubleValue()}.
914 * @throws NullPointerException when the encoded value is {@code NULL}.
915 * @throws IllegalArgumentException when the encoded value is not a Numeric.
916 * @see #encodeNumeric(PositionedByteRange, long, Order)
917 * @see BigDecimal#longValue()
918 */
919 public static long decodeNumericAsLong(PositionedByteRange src) {
920 // TODO: should an encoded NULL value throw unexpectedHeader() instead?
921 if (isNull(src)) throw new NullPointerException();
922 if (!isNumeric(src)) throw unexpectedHeader(src.peek());
923 if (isNumericNaN(src)) throw unexpectedHeader(src.peek());
924 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek());
925
926 if (isNumericZero(src)) {
927 src.get();
928 return Long.valueOf(0);
929 }
930 return decodeNumericValue(src).longValue();
931 }
932
933 /**
934 * Decode a {@link BigDecimal} value from the variable-length encoding.
935 * @throws IllegalArgumentException when the encoded value is not a Numeric.
936 * @see #encodeNumeric(PositionedByteRange, BigDecimal, Order)
937 */
938 public static BigDecimal decodeNumericAsBigDecimal(PositionedByteRange src) {
939 if (isNull(src)) {
940 src.get();
941 return null;
942 }
943 if (!isNumeric(src)) throw unexpectedHeader(src.peek());
944 if (isNumericNaN(src)) throw unexpectedHeader(src.peek());
945 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek());
946 return decodeNumericValue(src);
947 }
948
949 /**
950 * Encode a String value. String encoding is 0x00-terminated and so it does
951 * not support {@code \u0000} codepoints in the value.
952 * @param dst The destination to which the encoded value is written.
953 * @param val The value to encode.
954 * @param ord The {@link Order} to respect while encoding {@code val}.
955 * @return the number of bytes written.
956 * @throws IllegalArgumentException when {@code val} contains a {@code \u0000}.
957 */
958 public static int encodeString(PositionedByteRange dst, String val, Order ord) {
959 if (null == val) {
960 return encodeNull(dst, ord);
961 }
962 if (val.contains("\u0000"))
963 throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'");
964 final int offset = dst.getOffset(), start = dst.getPosition();
965 dst.put(TEXT);
966 // TODO: is there no way to decode into dst directly?
967 dst.put(val.getBytes(UTF8));
968 dst.put(TERM);
969 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
970 return dst.getPosition() - start;
971 }
972
973 /**
974 * Decode a String value.
975 */
976 public static String decodeString(PositionedByteRange src) {
977 final byte header = src.get();
978 if (header == NULL || header == DESCENDING.apply(NULL))
979 return null;
980 assert header == TEXT || header == DESCENDING.apply(TEXT);
981 Order ord = header == TEXT ? ASCENDING : DESCENDING;
982 byte[] a = src.getBytes();
983 final int offset = src.getOffset(), start = src.getPosition();
984 final byte terminator = ord.apply(TERM);
985 int rawStartPos = offset + start, rawTermPos = rawStartPos;
986 for (; a[rawTermPos] != terminator; rawTermPos++)
987 ;
988 src.setPosition(rawTermPos - offset + 1); // advance position to TERM + 1
989 if (DESCENDING == ord) {
990 // make a copy so that we don't disturb encoded value with ord.
991 byte[] copy = new byte[rawTermPos - rawStartPos];
992 System.arraycopy(a, rawStartPos, copy, 0, copy.length);
993 ord.apply(copy);
994 return new String(copy, UTF8);
995 } else {
996 return new String(a, rawStartPos, rawTermPos - rawStartPos, UTF8);
997 }
998 }
999
1000 /**
1001 * Calculate the expected BlobVar encoded length based on unencoded length.
1002 */
1003 public static int blobVarEncodedLength(int len) {
1004 if (0 == len)
1005 return 2; // 1-byte header + 1-byte terminator
1006 else
1007 return (int)
1008 Math.ceil(
1009 (len * 8) // 8-bits per input byte
1010 / 7.0) // 7-bits of input data per encoded byte, rounded up
1011 + 1; // + 1-byte header
1012 }
1013
1014 /**
1015 * Calculate the expected BlobVar decoded length based on encoded length.
1016 */
1017 @VisibleForTesting
1018 static int blobVarDecodedLength(int len) {
1019 return
1020 ((len
1021 - 1) // 1-byte header
1022 * 7) // 7-bits of payload per encoded byte
1023 / 8; // 8-bits per byte
1024 }
1025
1026 /**
1027 * Encode a Blob value using a modified varint encoding scheme.
1028 * <p>
1029 * This format encodes a byte[] value such that no limitations on the input
1030 * value are imposed. The first byte encodes the encoding scheme that
1031 * follows, {@link #BLOB_VAR}. Each encoded byte thereafter consists of a
1032 * header bit followed by 7 bits of payload. A header bit of '1' indicates
1033 * continuation of the encoding. A header bit of '0' indicates this byte
1034 * contains the last of the payload. An empty input value is encoded as the
1035 * header byte immediately followed by a termination byte {@code 0x00}. This
1036 * is not ambiguous with the encoded value of {@code []}, which results in
1037 * {@code [0x80, 0x00]}.
1038 * </p>
1039 * @return the number of bytes written.
1040 */
1041 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, int voff, int vlen,
1042 Order ord) {
1043 if (null == val) {
1044 return encodeNull(dst, ord);
1045 }
1046 // Empty value is null-terminated. All other values are encoded as 7-bits per byte.
1047 assert dst.getRemaining() >= blobVarEncodedLength(vlen) : "buffer overflow expected.";
1048 final int offset = dst.getOffset(), start = dst.getPosition();
1049 dst.put(BLOB_VAR);
1050 if (0 == vlen) {
1051 dst.put(TERM);
1052 } else {
1053 byte s = 1, t = 0;
1054 for (int i = voff; i < vlen; i++) {
1055 dst.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s)));
1056 if (s < 7) {
1057 t = (byte) (val[i] << (7 - s));
1058 s++;
1059 } else {
1060 dst.put((byte) (0x80 | val[i]));
1061 s = 1;
1062 t = 0;
1063 }
1064 }
1065 if (s > 1) {
1066 dst.put((byte) (0x7f & t));
1067 } else {
1068 dst.getBytes()[offset + dst.getPosition() - 1] &= 0x7f;
1069 }
1070 }
1071 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
1072 return dst.getPosition() - start;
1073 }
1074
1075 /**
1076 * Encode a blob value using a modified varint encoding scheme.
1077 * @return the number of bytes written.
1078 * @see #encodeBlobVar(PositionedByteRange, byte[], int, int, Order)
1079 */
1080 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, Order ord) {
1081 return encodeBlobVar(dst, val, 0, null != val ? val.length : 0, ord);
1082 }
1083
1084 /**
1085 * Decode a blob value that was encoded using BlobVar encoding.
1086 */
1087 public static byte[] decodeBlobVar(PositionedByteRange src) {
1088 final byte header = src.get();
1089 if (header == NULL || header == DESCENDING.apply(NULL)) {
1090 return null;
1091 }
1092 assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR);
1093 Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING;
1094 if (src.peek() == ord.apply(TERM)) {
1095 // skip empty input buffer.
1096 src.get();
1097 return new byte[0];
1098 }
1099 final int offset = src.getOffset(), start = src.getPosition();
1100 int end;
1101 byte[] a = src.getBytes();
1102 for (end = start; (byte) (ord.apply(a[offset + end]) & 0x80) != TERM; end++)
1103 ;
1104 end++; // increment end to 1-past last byte
1105 // create ret buffer using length of encoded data + 1 (header byte)
1106 PositionedByteRange ret = new SimplePositionedMutableByteRange(blobVarDecodedLength(end - start
1107 + 1));
1108 int s = 6;
1109 byte t = (byte) ((ord.apply(a[offset + start]) << 1) & 0xff);
1110 for (int i = start + 1; i < end; i++) {
1111 if (s == 7) {
1112 ret.put((byte) (t | (ord.apply(a[offset + i]) & 0x7f)));
1113 i++;
1114 // explicitly reset t -- clean up overflow buffer after decoding
1115 // a full cycle and retain assertion condition below. This happens
1116 t = 0; // when the LSB in the last encoded byte is 1. (HBASE-9893)
1117 } else {
1118 ret.put((byte) (t | ((ord.apply(a[offset + i]) & 0x7f) >>> s)));
1119 }
1120 if (i == end) break;
1121 t = (byte) ((ord.apply(a[offset + i]) << 8 - s) & 0xff);
1122 s = s == 1 ? 7 : s - 1;
1123 }
1124 src.setPosition(end);
1125 assert t == 0 : "Unexpected bits remaining after decoding blob.";
1126 assert ret.getPosition() == ret.getLength() : "Allocated unnecessarily large return buffer.";
1127 return ret.getBytes();
1128 }
1129
1130 /**
1131 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in
1132 * DESCENDING order is NULL terminated so as to preserve proper sorting of
1133 * {@code []} and so it does not support {@code 0x00} in the value.
1134 * @return the number of bytes written.
1135 * @throws IllegalArgumentException when {@code ord} is DESCENDING and
1136 * {@code val} contains a {@code 0x00} byte.
1137 */
1138 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, int voff, int vlen,
1139 Order ord) {
1140 if (null == val) {
1141 encodeNull(dst, ord);
1142 if (ASCENDING == ord) return 1;
1143 else {
1144 // DESCENDING ordered BlobCopy requires a termination bit to preserve
1145 // sort-order semantics of null values.
1146 dst.put(ord.apply(TERM));
1147 return 2;
1148 }
1149 }
1150 // Blobs as final entry in a compound key are written unencoded.
1151 assert dst.getRemaining() >= vlen + (ASCENDING == ord ? 1 : 2);
1152 if (DESCENDING == ord) {
1153 for (int i = 0; i < vlen; i++) {
1154 if (TERM == val[voff + i]) {
1155 throw new IllegalArgumentException("0x00 bytes not permitted in value.");
1156 }
1157 }
1158 }
1159 final int offset = dst.getOffset(), start = dst.getPosition();
1160 dst.put(BLOB_COPY);
1161 dst.put(val, voff, vlen);
1162 // DESCENDING ordered BlobCopy requires a termination bit to preserve
1163 // sort-order semantics of null values.
1164 if (DESCENDING == ord) dst.put(TERM);
1165 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
1166 return dst.getPosition() - start;
1167 }
1168
1169 /**
1170 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in
1171 * DESCENDING order is NULL terminated so as to preserve proper sorting of
1172 * {@code []} and so it does not support {@code 0x00} in the value.
1173 * @return the number of bytes written.
1174 * @throws IllegalArgumentException when {@code ord} is DESCENDING and
1175 * {@code val} contains a {@code 0x00} byte.
1176 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order)
1177 */
1178 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, Order ord) {
1179 return encodeBlobCopy(dst, val, 0, null != val ? val.length : 0, ord);
1180 }
1181
1182 /**
1183 * Decode a Blob value, byte-for-byte copy.
1184 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order)
1185 */
1186 public static byte[] decodeBlobCopy(PositionedByteRange src) {
1187 byte header = src.get();
1188 if (header == NULL || header == DESCENDING.apply(NULL)) {
1189 return null;
1190 }
1191 assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY);
1192 Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING;
1193 final int length = src.getRemaining() - (ASCENDING == ord ? 0 : 1);
1194 byte[] ret = new byte[length];
1195 src.get(ret);
1196 ord.apply(ret, 0, ret.length);
1197 // DESCENDING ordered BlobCopy requires a termination bit to preserve
1198 // sort-order semantics of null values.
1199 if (DESCENDING == ord) src.get();
1200 return ret;
1201 }
1202
1203 /**
1204 * Encode a null value.
1205 * @param dst The destination to which encoded digits are written.
1206 * @param ord The {@link Order} to respect while encoding {@code val}.
1207 * @return the number of bytes written.
1208 */
1209 public static int encodeNull(PositionedByteRange dst, Order ord) {
1210 dst.put(ord.apply(NULL));
1211 return 1;
1212 }
1213
1214 /**
1215 * Encode an {@code int8} value using the fixed-length encoding.
1216 * @return the number of bytes written.
1217 * @see #encodeInt64(PositionedByteRange, long, Order)
1218 * @see #decodeInt8(PositionedByteRange)
1219 */
1220 public static int encodeInt8(PositionedByteRange dst, byte val, Order ord) {
1221 final int offset = dst.getOffset(), start = dst.getPosition();
1222 dst.put(FIXED_INT8)
1223 .put((byte) (val ^ 0x80));
1224 ord.apply(dst.getBytes(), offset + start, 2);
1225 return 2;
1226 }
1227
1228 /**
1229 * Decode an {@code int8} value.
1230 * @see #encodeInt8(PositionedByteRange, byte, Order)
1231 */
1232 public static byte decodeInt8(PositionedByteRange src) {
1233 final byte header = src.get();
1234 assert header == FIXED_INT8 || header == DESCENDING.apply(FIXED_INT8);
1235 Order ord = header == FIXED_INT8 ? ASCENDING : DESCENDING;
1236 return (byte)((ord.apply(src.get()) ^ 0x80) & 0xff);
1237 }
1238
1239 /**
1240 * Encode an {@code int16} value using the fixed-length encoding.
1241 * @return the number of bytes written.
1242 * @see #encodeInt64(PositionedByteRange, long, Order)
1243 * @see #decodeInt16(PositionedByteRange)
1244 */
1245 public static int encodeInt16(PositionedByteRange dst, short val, Order ord) {
1246 final int offset = dst.getOffset(), start = dst.getPosition();
1247 dst.put(FIXED_INT16)
1248 .put((byte) ((val >> 8) ^ 0x80))
1249 .put((byte) val);
1250 ord.apply(dst.getBytes(), offset + start, 3);
1251 return 3;
1252 }
1253
1254 /**
1255 * Decode an {@code int16} value.
1256 * @see #encodeInt16(PositionedByteRange, short, Order)
1257 */
1258 public static short decodeInt16(PositionedByteRange src) {
1259 final byte header = src.get();
1260 assert header == FIXED_INT16 || header == DESCENDING.apply(FIXED_INT16);
1261 Order ord = header == FIXED_INT16 ? ASCENDING : DESCENDING;
1262 short val = (short) ((ord.apply(src.get()) ^ 0x80) & 0xff);
1263 val = (short) ((val << 8) + (ord.apply(src.get()) & 0xff));
1264 return val;
1265 }
1266
1267 /**
1268 * Encode an {@code int32} value using the fixed-length encoding.
1269 * @return the number of bytes written.
1270 * @see #encodeInt64(PositionedByteRange, long, Order)
1271 * @see #decodeInt32(PositionedByteRange)
1272 */
1273 public static int encodeInt32(PositionedByteRange dst, int val, Order ord) {
1274 final int offset = dst.getOffset(), start = dst.getPosition();
1275 dst.put(FIXED_INT32)
1276 .put((byte) ((val >> 24) ^ 0x80))
1277 .put((byte) (val >> 16))
1278 .put((byte) (val >> 8))
1279 .put((byte) val);
1280 ord.apply(dst.getBytes(), offset + start, 5);
1281 return 5;
1282 }
1283
1284 /**
1285 * Decode an {@code int32} value.
1286 * @see #encodeInt32(PositionedByteRange, int, Order)
1287 */
1288 public static int decodeInt32(PositionedByteRange src) {
1289 final byte header = src.get();
1290 assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32);
1291 Order ord = header == FIXED_INT32 ? ASCENDING : DESCENDING;
1292 int val = (ord.apply(src.get()) ^ 0x80) & 0xff;
1293 for (int i = 1; i < 4; i++) {
1294 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1295 }
1296 return val;
1297 }
1298
1299 /**
1300 * Encode an {@code int64} value using the fixed-length encoding.
1301 * <p>
1302 * This format ensures that all longs sort in their natural order, as they
1303 * would sort when using signed long comparison.
1304 * </p>
1305 * <p>
1306 * All Longs are serialized to an 8-byte, fixed-width sortable byte format.
1307 * Serialization is performed by inverting the integer sign bit and writing
1308 * the resulting bytes to the byte array in big endian order. The encoded
1309 * value is prefixed by the {@link #FIXED_INT64} header byte. This encoding
1310 * is designed to handle java language primitives and so Null values are NOT
1311 * supported by this implementation.
1312 * </p>
1313 * <p>
1314 * For example:
1315 * </p>
1316 * <pre>
1317 * Input: 0x0000000000000005 (5)
1318 * Result: 0x288000000000000005
1319 *
1320 * Input: 0xfffffffffffffffb (-4)
1321 * Result: 0x280000000000000004
1322 *
1323 * Input: 0x7fffffffffffffff (Long.MAX_VALUE)
1324 * Result: 0x28ffffffffffffffff
1325 *
1326 * Input: 0x8000000000000000 (Long.MIN_VALUE)
1327 * Result: 0x287fffffffffffffff
1328 * </pre>
1329 * <p>
1330 * This encoding format, and much of this documentation string, is based on
1331 * Orderly's {@code FixedIntWritableRowKey}.
1332 * </p>
1333 * @return the number of bytes written.
1334 * @see #decodeInt64(PositionedByteRange)
1335 */
1336 public static int encodeInt64(PositionedByteRange dst, long val, Order ord) {
1337 final int offset = dst.getOffset(), start = dst.getPosition();
1338 dst.put(FIXED_INT64)
1339 .put((byte) ((val >> 56) ^ 0x80))
1340 .put((byte) (val >> 48))
1341 .put((byte) (val >> 40))
1342 .put((byte) (val >> 32))
1343 .put((byte) (val >> 24))
1344 .put((byte) (val >> 16))
1345 .put((byte) (val >> 8))
1346 .put((byte) val);
1347 ord.apply(dst.getBytes(), offset + start, 9);
1348 return 9;
1349 }
1350
1351 /**
1352 * Decode an {@code int64} value.
1353 * @see #encodeInt64(PositionedByteRange, long, Order)
1354 */
1355 public static long decodeInt64(PositionedByteRange src) {
1356 final byte header = src.get();
1357 assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64);
1358 Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING;
1359 long val = (ord.apply(src.get()) ^ 0x80) & 0xff;
1360 for (int i = 1; i < 8; i++) {
1361 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1362 }
1363 return val;
1364 }
1365
1366 /**
1367 * Encode a 32-bit floating point value using the fixed-length encoding.
1368 * Encoding format is described at length in
1369 * {@link #encodeFloat64(PositionedByteRange, double, Order)}.
1370 * @return the number of bytes written.
1371 * @see #decodeFloat32(PositionedByteRange)
1372 * @see #encodeFloat64(PositionedByteRange, double, Order)
1373 */
1374 public static int encodeFloat32(PositionedByteRange dst, float val, Order ord) {
1375 final int offset = dst.getOffset(), start = dst.getPosition();
1376 int i = Float.floatToIntBits(val);
1377 i ^= ((i >> Integer.SIZE - 1) | Integer.MIN_VALUE);
1378 dst.put(FIXED_FLOAT32)
1379 .put((byte) (i >> 24))
1380 .put((byte) (i >> 16))
1381 .put((byte) (i >> 8))
1382 .put((byte) i);
1383 ord.apply(dst.getBytes(), offset + start, 5);
1384 return 5;
1385 }
1386
1387 /**
1388 * Decode a 32-bit floating point value using the fixed-length encoding.
1389 * @see #encodeFloat32(PositionedByteRange, float, Order)
1390 */
1391 public static float decodeFloat32(PositionedByteRange src) {
1392 final byte header = src.get();
1393 assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32);
1394 Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING;
1395 int val = ord.apply(src.get()) & 0xff;
1396 for (int i = 1; i < 4; i++) {
1397 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1398 }
1399 val ^= (~val >> Integer.SIZE - 1) | Integer.MIN_VALUE;
1400 return Float.intBitsToFloat(val);
1401 }
1402
1403 /**
1404 * Encode a 64-bit floating point value using the fixed-length encoding.
1405 * <p>
1406 * This format ensures the following total ordering of floating point
1407 * values: Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... <
1408 * -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ...
1409 * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN
1410 * </p>
1411 * Floating point numbers are encoded as specified in IEEE 754. A 64-bit
1412 * double precision float consists of a sign bit, 11-bit unsigned exponent
1413 * encoded in offset-1023 notation, and a 52-bit significand. The format is
1414 * described further in the <a
1415 * href="http://en.wikipedia.org/wiki/Double_precision"> Double Precision
1416 * Floating Point Wikipedia page</a> </p>
1417 * <p>
1418 * The value of a normal float is -1 <sup>sign bit</sup> ×
1419 * 2<sup>exponent - 1023</sup> × 1.significand
1420 * </p>
1421 * <p>
1422 * The IEE754 floating point format already preserves sort ordering for
1423 * positive floating point numbers when the raw bytes are compared in most
1424 * significant byte order. This is discussed further at <a href=
1425 * "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm"
1426 * > http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.
1427 * htm</a>
1428 * </p>
1429 * <p>
1430 * Thus, we need only ensure that negative numbers sort in the the exact
1431 * opposite order as positive numbers (so that say, negative infinity is
1432 * less than negative 1), and that all negative numbers compare less than
1433 * any positive number. To accomplish this, we invert the sign bit of all
1434 * floating point numbers, and we also invert the exponent and significand
1435 * bits if the floating point number was negative.
1436 * </p>
1437 * <p>
1438 * More specifically, we first store the floating point bits into a 64-bit
1439 * long {@code l} using {@link Double#doubleToLongBits}. This method
1440 * collapses all NaNs into a single, canonical NaN value but otherwise
1441 * leaves the bits unchanged. We then compute
1442 * </p>
1443 * <pre>
1444 * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE
1445 * </pre>
1446 * <p>
1447 * which inverts the sign bit and XOR's all other bits with the sign bit
1448 * itself. Comparing the raw bytes of {@code l} in most significant
1449 * byte order is equivalent to performing a double precision floating point
1450 * comparison on the underlying bits (ignoring NaN comparisons, as NaNs
1451 * don't compare equal to anything when performing floating point
1452 * comparisons).
1453 * </p>
1454 * <p>
1455 * The resulting long integer is then converted into a byte array by
1456 * serializing the long one byte at a time in most significant byte order.
1457 * The serialized integer is prefixed by a single header byte. All
1458 * serialized values are 9 bytes in length.
1459 * </p>
1460 * <p>
1461 * This encoding format, and much of this highly detailed documentation
1462 * string, is based on Orderly's {@code DoubleWritableRowKey}.
1463 * </p>
1464 * @return the number of bytes written.
1465 * @see #decodeFloat64(PositionedByteRange)
1466 */
1467 public static int encodeFloat64(PositionedByteRange dst, double val, Order ord) {
1468 final int offset = dst.getOffset(), start = dst.getPosition();
1469 long lng = Double.doubleToLongBits(val);
1470 lng ^= ((lng >> Long.SIZE - 1) | Long.MIN_VALUE);
1471 dst.put(FIXED_FLOAT64)
1472 .put((byte) (lng >> 56))
1473 .put((byte) (lng >> 48))
1474 .put((byte) (lng >> 40))
1475 .put((byte) (lng >> 32))
1476 .put((byte) (lng >> 24))
1477 .put((byte) (lng >> 16))
1478 .put((byte) (lng >> 8))
1479 .put((byte) lng);
1480 ord.apply(dst.getBytes(), offset + start, 9);
1481 return 9;
1482 }
1483
1484 /**
1485 * Decode a 64-bit floating point value using the fixed-length encoding.
1486 * @see #encodeFloat64(PositionedByteRange, double, Order)
1487 */
1488 public static double decodeFloat64(PositionedByteRange src) {
1489 final byte header = src.get();
1490 assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64);
1491 Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING;
1492 long val = ord.apply(src.get()) & 0xff;
1493 for (int i = 1; i < 8; i++) {
1494 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1495 }
1496 val ^= (~val >> Long.SIZE - 1) | Long.MIN_VALUE;
1497 return Double.longBitsToDouble(val);
1498 }
1499
1500 /**
1501 * Returns true when {@code src} appears to be positioned an encoded value,
1502 * false otherwise.
1503 */
1504 public static boolean isEncodedValue(PositionedByteRange src) {
1505 return isNull(src) || isNumeric(src) || isFixedInt32(src) || isFixedInt64(src)
1506 || isFixedFloat32(src) || isFixedFloat64(src) || isText(src) || isBlobCopy(src)
1507 || isBlobVar(src);
1508 }
1509
1510 /**
1511 * Return true when the next encoded value in {@code src} is null, false
1512 * otherwise.
1513 */
1514 public static boolean isNull(PositionedByteRange src) {
1515 return NULL ==
1516 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1517 }
1518
1519 /**
1520 * Return true when the next encoded value in {@code src} uses Numeric
1521 * encoding, false otherwise. {@code NaN}, {@code +/-Inf} are valid Numeric
1522 * values.
1523 */
1524 public static boolean isNumeric(PositionedByteRange src) {
1525 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1526 return x >= NEG_INF && x <= NAN;
1527 }
1528
1529 /**
1530 * Return true when the next encoded value in {@code src} uses Numeric
1531 * encoding and is {@code Infinite}, false otherwise.
1532 */
1533 public static boolean isNumericInfinite(PositionedByteRange src) {
1534 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1535 return NEG_INF == x || POS_INF == x;
1536 }
1537
1538 /**
1539 * Return true when the next encoded value in {@code src} uses Numeric
1540 * encoding and is {@code NaN}, false otherwise.
1541 */
1542 public static boolean isNumericNaN(PositionedByteRange src) {
1543 return NAN == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1544 }
1545
1546 /**
1547 * Return true when the next encoded value in {@code src} uses Numeric
1548 * encoding and is {@code 0}, false otherwise.
1549 */
1550 public static boolean isNumericZero(PositionedByteRange src) {
1551 return ZERO ==
1552 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1553 }
1554
1555 /**
1556 * Return true when the next encoded value in {@code src} uses fixed-width
1557 * Int32 encoding, false otherwise.
1558 */
1559 public static boolean isFixedInt32(PositionedByteRange src) {
1560 return FIXED_INT32 ==
1561 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1562 }
1563
1564 /**
1565 * Return true when the next encoded value in {@code src} uses fixed-width
1566 * Int64 encoding, false otherwise.
1567 */
1568 public static boolean isFixedInt64(PositionedByteRange src) {
1569 return FIXED_INT64 ==
1570 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1571 }
1572
1573 /**
1574 * Return true when the next encoded value in {@code src} uses fixed-width
1575 * Float32 encoding, false otherwise.
1576 */
1577 public static boolean isFixedFloat32(PositionedByteRange src) {
1578 return FIXED_FLOAT32 ==
1579 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1580 }
1581
1582 /**
1583 * Return true when the next encoded value in {@code src} uses fixed-width
1584 * Float64 encoding, false otherwise.
1585 */
1586 public static boolean isFixedFloat64(PositionedByteRange src) {
1587 return FIXED_FLOAT64 ==
1588 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1589 }
1590
1591 /**
1592 * Return true when the next encoded value in {@code src} uses Text encoding,
1593 * false otherwise.
1594 */
1595 public static boolean isText(PositionedByteRange src) {
1596 return TEXT ==
1597 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1598 }
1599
1600 /**
1601 * Return true when the next encoded value in {@code src} uses BlobVar
1602 * encoding, false otherwise.
1603 */
1604 public static boolean isBlobVar(PositionedByteRange src) {
1605 return BLOB_VAR ==
1606 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1607 }
1608
1609 /**
1610 * Return true when the next encoded value in {@code src} uses BlobCopy
1611 * encoding, false otherwise.
1612 */
1613 public static boolean isBlobCopy(PositionedByteRange src) {
1614 return BLOB_COPY ==
1615 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1616 }
1617
1618 /**
1619 * Skip {@code buff}'s position forward over one encoded value.
1620 * @return number of bytes skipped.
1621 */
1622 public static int skip(PositionedByteRange src) {
1623 final int start = src.getPosition();
1624 byte header = src.get();
1625 Order ord = (-1 == Integer.signum(header)) ? DESCENDING : ASCENDING;
1626 header = ord.apply(header);
1627
1628 switch (header) {
1629 case NULL:
1630 case NEG_INF:
1631 return 1;
1632 case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */
1633 skipVaruint64(src, DESCENDING != ord);
1634 skipSignificand(src, DESCENDING != ord);
1635 return src.getPosition() - start;
1636 case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */
1637 case NEG_MED_MIN + 0x01:
1638 case NEG_MED_MIN + 0x02:
1639 case NEG_MED_MIN + 0x03:
1640 case NEG_MED_MIN + 0x04:
1641 case NEG_MED_MIN + 0x05:
1642 case NEG_MED_MIN + 0x06:
1643 case NEG_MED_MIN + 0x07:
1644 case NEG_MED_MIN + 0x08:
1645 case NEG_MED_MIN + 0x09:
1646 case NEG_MED_MAX:
1647 skipSignificand(src, DESCENDING != ord);
1648 return src.getPosition() - start;
1649 case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */
1650 skipVaruint64(src, DESCENDING == ord);
1651 skipSignificand(src, DESCENDING != ord);
1652 return src.getPosition() - start;
1653 case ZERO:
1654 return 1;
1655 case POS_SMALL: /* Small positive number: 0x16, ~-E, M */
1656 skipVaruint64(src, DESCENDING != ord);
1657 skipSignificand(src, DESCENDING == ord);
1658 return src.getPosition() - start;
1659 case POS_MED_MIN: /* Medium positive number: 0x17+E, M */
1660 case POS_MED_MIN + 0x01:
1661 case POS_MED_MIN + 0x02:
1662 case POS_MED_MIN + 0x03:
1663 case POS_MED_MIN + 0x04:
1664 case POS_MED_MIN + 0x05:
1665 case POS_MED_MIN + 0x06:
1666 case POS_MED_MIN + 0x07:
1667 case POS_MED_MIN + 0x08:
1668 case POS_MED_MIN + 0x09:
1669 case POS_MED_MAX:
1670 skipSignificand(src, DESCENDING == ord);
1671 return src.getPosition() - start;
1672 case POS_LARGE: /* Large positive number: 0x22, E, M */
1673 skipVaruint64(src, DESCENDING == ord);
1674 skipSignificand(src, DESCENDING == ord);
1675 return src.getPosition() - start;
1676 case POS_INF:
1677 return 1;
1678 case NAN:
1679 return 1;
1680 case FIXED_INT8:
1681 src.setPosition(src.getPosition() + 1);
1682 return src.getPosition() - start;
1683 case FIXED_INT16:
1684 src.setPosition(src.getPosition() + 2);
1685 return src.getPosition() - start;
1686 case FIXED_INT32:
1687 src.setPosition(src.getPosition() + 4);
1688 return src.getPosition() - start;
1689 case FIXED_INT64:
1690 src.setPosition(src.getPosition() + 8);
1691 return src.getPosition() - start;
1692 case FIXED_FLOAT32:
1693 src.setPosition(src.getPosition() + 4);
1694 return src.getPosition() - start;
1695 case FIXED_FLOAT64:
1696 src.setPosition(src.getPosition() + 8);
1697 return src.getPosition() - start;
1698 case TEXT:
1699 // for null-terminated values, skip to the end.
1700 do {
1701 header = ord.apply(src.get());
1702 } while (header != TERM);
1703 return src.getPosition() - start;
1704 case BLOB_VAR:
1705 // read until we find a 0 in the MSB
1706 do {
1707 header = ord.apply(src.get());
1708 } while ((byte) (header & 0x80) != TERM);
1709 return src.getPosition() - start;
1710 case BLOB_COPY:
1711 if (Order.DESCENDING == ord) {
1712 // if descending, read to termination byte.
1713 do {
1714 header = ord.apply(src.get());
1715 } while (header != TERM);
1716 return src.getPosition() - start;
1717 } else {
1718 // otherwise, just skip to the end.
1719 src.setPosition(src.getLength());
1720 return src.getPosition() - start;
1721 }
1722 default:
1723 throw unexpectedHeader(header);
1724 }
1725 }
1726
1727 /**
1728 * Return the number of encoded entries remaining in {@code buff}. The
1729 * state of {@code buff} is not modified through use of this method.
1730 */
1731 public static int length(PositionedByteRange buff) {
1732 PositionedByteRange b =
1733 new SimplePositionedMutableByteRange(buff.getBytes(), buff.getOffset(), buff.getLength());
1734 b.setPosition(buff.getPosition());
1735 int cnt = 0;
1736 for (; isEncodedValue(b); skip(buff), cnt++)
1737 ;
1738 return cnt;
1739 }
1740 }