美文网首页
HBase中的Comparator

HBase中的Comparator

作者: 天之見證 | 来源:发表于2020-06-23 09:31 被阅读0次

    HBase在读取数据的时候, 会用到以下comparator:

    1. KVScannerComparator
    2. CellComparator
    3. Bytes Comparator

    1. KVScannerComparator

    protected static class KVScannerComparator implements Comparator<KeyValueScanner> {
        protected KVComparator kvComparator;
        
         * Constructor
         * @param kvComparator
         */
        public KVScannerComparator(KVComparator kvComparator) {
            this.kvComparator = kvComparator;
        }
        // 代理给KVComparator, 如果cell相同则比较sequenceId
        // sequenceId小的反而大
        public int compare(KeyValueScanner left, KeyValueScanner right) {
          int comparison = compare(left.peek(), right.peek());
          if (comparison != 0) {
                return comparison;
          } else {
                // Since both the keys are exactly the same, we break the tie in favor
                // of the key which came latest.
                long leftSequenceID = left.getSequenceID();
                long rightSequenceID = right.getSequenceID();
                if (leftSequenceID > rightSequenceID) {
                    return -1;
                } else if (leftSequenceID < rightSequenceID) {
                    return 1;
                } else {
                    return 0;
                }
            }
        }
        /**
         * Compares two KeyValue
         * @param left
         * @param right
         * @return less than 0 if left is smaller, 0 if equal etc..
         */
        public int compare(Cell left, Cell right) {
            return this.kvComparator.compare(left, right);
        }
        /**
         * @return KVComparator
         */
        public KVComparator getComparator() {
            return this.kvComparator;
        }
    }
    

    2. CellComparator

    public static int compareRows(final Cell left, final Cell right) {
        return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(),
            right.getRowArray(), right.getRowOffset(), right.getRowLength());
    }
    

    3. Bytes Comparator

    //  Bytes.java
    public static int compareTo(byte[] buffer1, int offset1, int length1,
          byte[] buffer2, int offset2, int length2) {
        return LexicographicalComparerHolder.BEST_COMPARER.
          compareTo(buffer1, offset1, length1, buffer2, offset2, length2);
    }
    

    3.1 Java版本的实现

    这里也体现了一个单例的写法

    enum PureJavaComparer implements Comparer<byte[]> {
        INSTANCE;
    
        @Override
        public int compareTo(byte[] buffer1, int offset1, int length1,
              byte[] buffer2, int offset2, int length2) {
            // Short circuit equal case
            if (buffer1 == buffer2 &&
                offset1 == offset2 &&
                length1 == length2) {
                return 0;
            }
            // Bring WritableComparator code local
            int end1 = offset1 + length1;
            int end2 = offset2 + length2;
            for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
                int a = (buffer1[i] & 0xff);
                int b = (buffer2[j] & 0xff);
                if (a != b) {
                    return a - b;
                }
            }
            return length1 - length2;
        }
    }
    

    3.2 Unsafe的实现

    Unsafe采用逐级比较

    @Override
    public int compareTo(byte[] buffer1, int offset1, int length1,
        byte[] buffer2, int offset2, int length2) {
    
        // Short circuit equal case
        if (buffer1 == buffer2 &&
            offset1 == offset2 &&
            length1 == length2) {
            return 0;
        }
        final int minLength = Math.min(length1, length2);
        final int minWords = minLength / SIZEOF_LONG;
        final long offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
        final long offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
    
        /*
         * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
         * time is no slower than comparing 4 bytes at a time even on 32-bit.
         * On the other hand, it is substantially faster on 64-bit.
         */
        // This is the end offset of long parts.
        int j = minWords << 3; // Same as minWords * SIZEOF_LONG
        for (int i = 0; i < j; i += SIZEOF_LONG) {
            long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
            long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
            long diff = lw ^ rw;
            if (diff != 0) {
                return lessThanUnsignedLong(lw, rw) ? -1 : 1;
            }
        }
        int offset = j;
    
        if (minLength - offset >= SIZEOF_INT) {
            int il = theUnsafe.getInt(buffer1, offset1Adj + offset);
            int ir = theUnsafe.getInt(buffer2, offset2Adj + offset);
            if (il != ir) {
                return lessThanUnsignedInt(il, ir) ? -1: 1;
            }
            offset += SIZEOF_INT;
        }
        if (minLength - offset >= SIZEOF_SHORT) {
            short sl = theUnsafe.getShort(buffer1, offset1Adj + offset);
            short sr = theUnsafe.getShort(buffer2, offset2Adj + offset);
            if (sl != sr) {
                return lessThanUnsignedShort(sl, sr) ? -1: 1;
            }
            offset += SIZEOF_SHORT;
        }
        if (minLength - offset == 1) {
            int a = (buffer1[(int)(offset1 + offset)] & 0xff);
            int b = (buffer2[(int)(offset2 + offset)] & 0xff);
            if (a != b) {
                return a - b;
            }
        }
        return length1 - length2;
    }
    

    ps: 看HBase1.2的源码的时候摘录的一些代码片段

    相关文章

      网友评论

          本文标题:HBase中的Comparator

          本文链接:https://www.haomeiwen.com/subject/zbscfktx.html