1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  package org.apache.hadoop.hbase.filter;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Comparator;
23  import java.util.List;
24  import java.util.PriorityQueue;
25  
26  import org.apache.hadoop.hbase.Cell;
27  import org.apache.hadoop.hbase.KeyValueUtil;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.exceptions.DeserializationException;
31  import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
32  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
33  import org.apache.hadoop.hbase.util.ByteStringer;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.hbase.util.Pair;
36  import org.apache.hadoop.hbase.util.UnsafeAccess;
37  
38  import com.google.common.annotations.VisibleForTesting;
39  import com.google.protobuf.InvalidProtocolBufferException;
40  
41  
42  
43  
44  
45  
46  
47  
48  
49  
50  
51  
52  
53  
54  
55  
56  
57  
58  
59  @InterfaceAudience.Public
60  @InterfaceStability.Evolving
61  public class FuzzyRowFilter extends FilterBase {
62    private List<Pair<byte[], byte[]>> fuzzyKeysData;
63    private boolean done = false;
64  
65    
66  
67  
68  
69  
70    private int lastFoundIndex = -1;
71  
72    
73  
74  
75    private RowTracker tracker;
76  
77    public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
78      Pair<byte[], byte[]> p;
79      for (int i = 0; i < fuzzyKeysData.size(); i++) {
80        p = fuzzyKeysData.get(i);
81        if (p.getFirst().length != p.getSecond().length) {
82          Pair<String, String> readable =
83              new Pair<String, String>(Bytes.toStringBinary(p.getFirst()), Bytes.toStringBinary(p
84                  .getSecond()));
85          throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
86        }
87        
88        p.setSecond(preprocessMask(p.getSecond()));
89        preprocessSearchKey(p);
90      }
91      this.fuzzyKeysData = fuzzyKeysData;
92      this.tracker = new RowTracker();
93    }
94  
95    private void preprocessSearchKey(Pair<byte[], byte[]> p) {
96      if (UnsafeAccess.isAvailable() == false) {
97        return;
98      }
99      byte[] key = p.getFirst();
100     byte[] mask = p.getSecond();
101     for (int i = 0; i < mask.length; i++) {
102       
103       if (mask[i] == 2) {
104         key[i] = 0;
105       }
106     }
107   }
108 
109   
110 
111 
112 
113 
114 
115   private byte[] preprocessMask(byte[] mask) {
116     if (UnsafeAccess.isAvailable() == false) {
117       return mask;
118     }
119     if (isPreprocessedMask(mask)) return mask;
120     for (int i = 0; i < mask.length; i++) {
121       if (mask[i] == 0) {
122         mask[i] = -1; 
123       } else if (mask[i] == 1) {
124         mask[i] = 2;
125       }
126     }
127     return mask;
128   }
129 
130   private boolean isPreprocessedMask(byte[] mask) {
131     for (int i = 0; i < mask.length; i++) {
132       if (mask[i] != -1 && mask[i] != 2) {
133         return false;
134       }
135     }
136     return true;
137   }
138 
139   @Override
140   public ReturnCode filterKeyValue(Cell c) {
141     final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
142     final int size = fuzzyKeysData.size();
143     for (int i = startIndex; i < size + startIndex; i++) {
144       final int index = i % size;
145       Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
146       
147       for (int j = 0; j < fuzzyData.getSecond().length; j++) {
148         fuzzyData.getSecond()[j] >>= 2;
149       }
150       SatisfiesCode satisfiesCode =
151           satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(),
152             fuzzyData.getFirst(), fuzzyData.getSecond());
153       if (satisfiesCode == SatisfiesCode.YES) {
154         lastFoundIndex = index;
155         return ReturnCode.INCLUDE;
156       }
157     }
158     
159     lastFoundIndex = -1;
160 
161     return ReturnCode.SEEK_NEXT_USING_HINT;
162 
163   }
164 
165   @Override
166   public Cell getNextCellHint(Cell currentCell) {
167     boolean result = tracker.updateTracker(currentCell);
168     if (result == false) {
169       done = true;
170       return null;
171     }
172     byte[] nextRowKey = tracker.nextRow();
173     return KeyValueUtil.createFirstOnRow(nextRowKey);
174   }
175 
176   
177 
178 
179 
180 
181 
182 
183 
184   private class RowTracker {
185     private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
186     private boolean initialized = false;
187 
188     RowTracker() {
189       nextRows =
190           new PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>>(fuzzyKeysData.size(),
191               new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
192                 @Override
193                 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
194                     Pair<byte[], Pair<byte[], byte[]>> o2) {
195                   int compare = Bytes.compareTo(o1.getFirst(), o2.getFirst());
196                   if (!isReversed()) {
197                     return compare;
198                   } else {
199                     return -compare;
200                   }
201                 }
202               });
203     }
204 
205     byte[] nextRow() {
206       if (nextRows.isEmpty()) {
207         throw new IllegalStateException(
208             "NextRows should not be empty, make sure to call nextRow() after updateTracker() return true");
209       } else {
210         return nextRows.peek().getFirst();
211       }
212     }
213 
214     boolean updateTracker(Cell currentCell) {
215       if (!initialized) {
216         for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
217           updateWith(currentCell, fuzzyData);
218         }
219         initialized = true;
220       } else {
221         while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
222           Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
223           Pair<byte[], byte[]> fuzzyData = head.getSecond();
224           updateWith(currentCell, fuzzyData);
225         }
226       }
227       return !nextRows.isEmpty();
228     }
229 
230     boolean lessThan(Cell currentCell, byte[] nextRowKey) {
231       int compareResult =
232           Bytes.compareTo(currentCell.getRowArray(), currentCell.getRowOffset(),
233             currentCell.getRowLength(), nextRowKey, 0, nextRowKey.length);
234       return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
235     }
236 
237     void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
238       byte[] nextRowKeyCandidate =
239           getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
240             currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
241       if (nextRowKeyCandidate != null) {
242         nextRows.add(new Pair<byte[], Pair<byte[], byte[]>>(nextRowKeyCandidate, fuzzyData));
243       }
244     }
245 
246   }
247 
248   @Override
249   public boolean filterAllRemaining() {
250     return done;
251   }
252 
253   
254 
255 
256   public byte[] toByteArray() {
257     FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder();
258     for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
259       BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
260       bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
261       bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
262       builder.addFuzzyKeysData(bbpBuilder);
263     }
264     return builder.build().toByteArray();
265   }
266 
267   
268 
269 
270 
271 
272 
273   public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
274     FilterProtos.FuzzyRowFilter proto;
275     try {
276       proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
277     } catch (InvalidProtocolBufferException e) {
278       throw new DeserializationException(e);
279     }
280     int count = proto.getFuzzyKeysDataCount();
281     ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
282     for (int i = 0; i < count; ++i) {
283       BytesBytesPair current = proto.getFuzzyKeysData(i);
284       byte[] keyBytes = current.getFirst().toByteArray();
285       byte[] keyMeta = current.getSecond().toByteArray();
286       fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
287     }
288     return new FuzzyRowFilter(fuzzyKeysData);
289   }
290 
291   @Override
292   public String toString() {
293     final StringBuilder sb = new StringBuilder();
294     sb.append("FuzzyRowFilter");
295     sb.append("{fuzzyKeysData=");
296     for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
297       sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
298       sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
299     }
300     sb.append("}, ");
301     return sb.toString();
302   }
303 
304   
305 
306   static enum SatisfiesCode {
307     
308     YES,
309     
310     NEXT_EXISTS,
311     
312     NO_NEXT
313   }
314 
315   @VisibleForTesting
316   static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
317     return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
318   }
319 
320   @VisibleForTesting
321   static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
322       byte[] fuzzyKeyMeta) {
323     return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
324   }
325 
326   static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
327       byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
328 
329     if (UnsafeAccess.isAvailable() == false) {
330       return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
331     }
332 
333     if (row == null) {
334       
335       return SatisfiesCode.YES;
336     }
337     length = Math.min(length, fuzzyKeyBytes.length);
338     int numWords = length / Bytes.SIZEOF_LONG;
339     int offsetAdj = offset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
340 
341     int j = numWords << 3; 
342 
343     for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
344 
345       long fuzzyBytes =
346           UnsafeAccess.theUnsafe.getLong(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
347               + (long) i);
348       long fuzzyMeta =
349           UnsafeAccess.theUnsafe.getLong(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
350               + (long) i);
351       long rowValue = UnsafeAccess.theUnsafe.getLong(row, offsetAdj + (long) i);
352       if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
353         
354         return SatisfiesCode.NEXT_EXISTS;
355       }
356     }
357 
358     int off = j;
359 
360     if (length - off >= Bytes.SIZEOF_INT) {
361       int fuzzyBytes =
362           UnsafeAccess.theUnsafe.getInt(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
363               + (long) off);
364       int fuzzyMeta =
365           UnsafeAccess.theUnsafe.getInt(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
366               + (long) off);
367       int rowValue = UnsafeAccess.theUnsafe.getInt(row, offsetAdj + (long) off);
368       if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
369         
370         return SatisfiesCode.NEXT_EXISTS;
371       }
372       off += Bytes.SIZEOF_INT;
373     }
374 
375     if (length - off >= Bytes.SIZEOF_SHORT) {
376       short fuzzyBytes =
377           UnsafeAccess.theUnsafe.getShort(fuzzyKeyBytes, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
378               + (long) off);
379       short fuzzyMeta =
380           UnsafeAccess.theUnsafe.getShort(fuzzyKeyMeta, UnsafeAccess.BYTE_ARRAY_BASE_OFFSET
381               + (long) off);
382       short rowValue = UnsafeAccess.theUnsafe.getShort(row, offsetAdj + (long) off);
383       if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
384         
385         
386         
387         return SatisfiesCode.NEXT_EXISTS;
388       }
389       off += Bytes.SIZEOF_SHORT;
390     }
391 
392     if (length - off >= Bytes.SIZEOF_BYTE) {
393       int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
394       int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
395       int rowValue = row[offset + off] & 0xff;
396       if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
397         
398         return SatisfiesCode.NEXT_EXISTS;
399       }
400     }
401     return SatisfiesCode.YES;
402   }
403 
404   static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
405       byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
406     if (row == null) {
407       
408       return SatisfiesCode.YES;
409     }
410 
411     Order order = Order.orderFor(reverse);
412     boolean nextRowKeyCandidateExists = false;
413 
414     for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
415       
416       boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
417       boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
418       if (fixedByteIncorrect) {
419         
420         if (nextRowKeyCandidateExists) {
421           return SatisfiesCode.NEXT_EXISTS;
422         }
423 
424         
425         
426         
427         boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
428         if (rowByteLessThanFixed && !reverse) {
429           return SatisfiesCode.NEXT_EXISTS;
430         } else if (!rowByteLessThanFixed && reverse) {
431           return SatisfiesCode.NEXT_EXISTS;
432         } else {
433           return SatisfiesCode.NO_NEXT;
434         }
435       }
436 
437       
438       
439       
440       
441       
442       
443       if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
444         nextRowKeyCandidateExists = true;
445       }
446     }
447     return SatisfiesCode.YES;
448   }
449 
450   @VisibleForTesting
451   static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
452     return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
453   }
454 
455   @VisibleForTesting
456   static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
457       byte[] fuzzyKeyMeta) {
458     return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
459   }
460 
461   
462   private enum Order {
463     ASC {
464       public boolean lt(int lhs, int rhs) {
465         return lhs < rhs;
466       }
467 
468       public boolean gt(int lhs, int rhs) {
469         return lhs > rhs;
470       }
471 
472       public byte inc(byte val) {
473         
474         return (byte) (val + 1);
475       }
476 
477       public boolean isMax(byte val) {
478         return val == (byte) 0xff;
479       }
480 
481       public byte min() {
482         return 0;
483       }
484     },
485     DESC {
486       public boolean lt(int lhs, int rhs) {
487         return lhs > rhs;
488       }
489 
490       public boolean gt(int lhs, int rhs) {
491         return lhs < rhs;
492       }
493 
494       public byte inc(byte val) {
495         
496         return (byte) (val - 1);
497       }
498 
499       public boolean isMax(byte val) {
500         return val == 0;
501       }
502 
503       public byte min() {
504         return (byte) 0xFF;
505       }
506     };
507 
508     public static Order orderFor(boolean reverse) {
509       return reverse ? DESC : ASC;
510     }
511 
512     
513     public abstract boolean lt(int lhs, int rhs);
514 
515     
516     public abstract boolean gt(int lhs, int rhs);
517 
518     
519     public abstract byte inc(byte val);
520 
521     
522     public abstract boolean isMax(byte val);
523 
524     
525     public abstract byte min();
526   }
527 
528   
529 
530 
531 
532   @VisibleForTesting
533   static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
534       byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
535     
536     
537     
538     
539     
540 
541     
542     
543     byte[] result =
544         Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
545     if (reverse && length > fuzzyKeyBytes.length) {
546       
547       for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
548         result[i] = (byte) 0xFF;
549       }
550     }
551     int toInc = -1;
552     final Order order = Order.orderFor(reverse);
553 
554     boolean increased = false;
555     for (int i = 0; i < result.length; i++) {
556       if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 
557         result[i] = row[offset + i];
558         if (!order.isMax(row[offset + i])) {
559           
560           toInc = i;
561         }
562       } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1 
563         if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
564           
565           
566           increased = true;
567           break;
568         }
569 
570         if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
571           
572           
573           
574           break;
575         }
576       }
577     }
578 
579     if (!increased) {
580       if (toInc < 0) {
581         return null;
582       }
583       result[toInc] = order.inc(result[toInc]);
584 
585       
586       
587       for (int i = toInc + 1; i < result.length; i++) {
588         if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0 
589           result[i] = order.min();
590         }
591       }
592     }
593 
594     return reverse? result: trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
595   }
596 
597   
598 
599 
600 
601 
602 
603 
604 
605 
606 
607 
608   
609   private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
610     int off = fuzzyKeyMeta.length >= result.length? result.length -1:
611            fuzzyKeyMeta.length -1;  
612     for( ; off >= 0; off--){
613       if(fuzzyKeyMeta[off] != 0) break;
614     }
615     if (off < toInc)  off = toInc;
616     byte[] retValue = new byte[off+1];
617     System.arraycopy(result, 0, retValue, 0, retValue.length);
618     return retValue;
619   }
620 
621   
622 
623 
624 
625   boolean areSerializedFieldsEqual(Filter o) {
626     if (o == this) return true;
627     if (!(o instanceof FuzzyRowFilter)) return false;
628 
629     FuzzyRowFilter other = (FuzzyRowFilter) o;
630     if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
631     for (int i = 0; i < fuzzyKeysData.size(); ++i) {
632       Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
633       Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
634       if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(
635         thisData.getSecond(), otherData.getSecond()))) {
636         return false;
637       }
638     }
639     return true;
640   }
641 }