美文网首页
HBase中的Comparator

HBase中的Comparator

作者: 天之見證 | 来源:发表于2020-06-23 09:31 被阅读0次

HBase在读取数据的时候, 会用到以下comparator:

  1. KVScannerComparator
  2. CellComparator
  3. Bytes Comparator

1. KVScannerComparator

protected static class KVScannerComparator implements Comparator<KeyValueScanner> {
    protected KVComparator kvComparator;
    
     * Constructor
     * @param kvComparator
     */
    public KVScannerComparator(KVComparator kvComparator) {
        this.kvComparator = kvComparator;
    }
    // 代理给KVComparator, 如果cell相同则比较sequenceId
    // sequenceId小的反而大
    public int compare(KeyValueScanner left, KeyValueScanner right) {
      int comparison = compare(left.peek(), right.peek());
      if (comparison != 0) {
            return comparison;
      } else {
            // Since both the keys are exactly the same, we break the tie in favor
            // of the key which came latest.
            long leftSequenceID = left.getSequenceID();
            long rightSequenceID = right.getSequenceID();
            if (leftSequenceID > rightSequenceID) {
                return -1;
            } else if (leftSequenceID < rightSequenceID) {
                return 1;
            } else {
                return 0;
            }
        }
    }
    /**
     * Compares two KeyValue
     * @param left
     * @param right
     * @return less than 0 if left is smaller, 0 if equal etc..
     */
    public int compare(Cell left, Cell right) {
        return this.kvComparator.compare(left, right);
    }
    /**
     * @return KVComparator
     */
    public KVComparator getComparator() {
        return this.kvComparator;
    }
}

2. CellComparator

public static int compareRows(final Cell left, final Cell right) {
    return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(),
        right.getRowArray(), right.getRowOffset(), right.getRowLength());
}

3. Bytes Comparator

//  Bytes.java
public static int compareTo(byte[] buffer1, int offset1, int length1,
      byte[] buffer2, int offset2, int length2) {
    return LexicographicalComparerHolder.BEST_COMPARER.
      compareTo(buffer1, offset1, length1, buffer2, offset2, length2);
}

3.1 Java版本的实现

这里也体现了一个单例的写法

enum PureJavaComparer implements Comparer<byte[]> {
    INSTANCE;

    @Override
    public int compareTo(byte[] buffer1, int offset1, int length1,
          byte[] buffer2, int offset2, int length2) {
        // Short circuit equal case
        if (buffer1 == buffer2 &&
            offset1 == offset2 &&
            length1 == length2) {
            return 0;
        }
        // Bring WritableComparator code local
        int end1 = offset1 + length1;
        int end2 = offset2 + length2;
        for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
            int a = (buffer1[i] & 0xff);
            int b = (buffer2[j] & 0xff);
            if (a != b) {
                return a - b;
            }
        }
        return length1 - length2;
    }
}

3.2 Unsafe的实现

Unsafe采用逐级比较

@Override
public int compareTo(byte[] buffer1, int offset1, int length1,
    byte[] buffer2, int offset2, int length2) {

    // Short circuit equal case
    if (buffer1 == buffer2 &&
        offset1 == offset2 &&
        length1 == length2) {
        return 0;
    }
    final int minLength = Math.min(length1, length2);
    final int minWords = minLength / SIZEOF_LONG;
    final long offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
    final long offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;

    /*
     * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
     * time is no slower than comparing 4 bytes at a time even on 32-bit.
     * On the other hand, it is substantially faster on 64-bit.
     */
    // This is the end offset of long parts.
    int j = minWords << 3; // Same as minWords * SIZEOF_LONG
    for (int i = 0; i < j; i += SIZEOF_LONG) {
        long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
        long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
        long diff = lw ^ rw;
        if (diff != 0) {
            return lessThanUnsignedLong(lw, rw) ? -1 : 1;
        }
    }
    int offset = j;

    if (minLength - offset >= SIZEOF_INT) {
        int il = theUnsafe.getInt(buffer1, offset1Adj + offset);
        int ir = theUnsafe.getInt(buffer2, offset2Adj + offset);
        if (il != ir) {
            return lessThanUnsignedInt(il, ir) ? -1: 1;
        }
        offset += SIZEOF_INT;
    }
    if (minLength - offset >= SIZEOF_SHORT) {
        short sl = theUnsafe.getShort(buffer1, offset1Adj + offset);
        short sr = theUnsafe.getShort(buffer2, offset2Adj + offset);
        if (sl != sr) {
            return lessThanUnsignedShort(sl, sr) ? -1: 1;
        }
        offset += SIZEOF_SHORT;
    }
    if (minLength - offset == 1) {
        int a = (buffer1[(int)(offset1 + offset)] & 0xff);
        int b = (buffer2[(int)(offset2 + offset)] & 0xff);
        if (a != b) {
            return a - b;
        }
    }
    return length1 - length2;
}

ps: 看HBase1.2的源码的时候摘录的一些代码片段

相关文章

网友评论

      本文标题:HBase中的Comparator

      本文链接:https://www.haomeiwen.com/subject/zbscfktx.html