HBase在读取数据的时候, 会用到以下comparator:
KVScannerComparator
CellComparator
Bytes Comparator
1. KVScannerComparator
protected static class KVScannerComparator implements Comparator<KeyValueScanner> {
protected KVComparator kvComparator;
* Constructor
* @param kvComparator
*/
public KVScannerComparator(KVComparator kvComparator) {
this.kvComparator = kvComparator;
}
// 代理给KVComparator, 如果cell相同则比较sequenceId
// sequenceId小的反而大
public int compare(KeyValueScanner left, KeyValueScanner right) {
int comparison = compare(left.peek(), right.peek());
if (comparison != 0) {
return comparison;
} else {
// Since both the keys are exactly the same, we break the tie in favor
// of the key which came latest.
long leftSequenceID = left.getSequenceID();
long rightSequenceID = right.getSequenceID();
if (leftSequenceID > rightSequenceID) {
return -1;
} else if (leftSequenceID < rightSequenceID) {
return 1;
} else {
return 0;
}
}
}
/**
* Compares two KeyValue
* @param left
* @param right
* @return less than 0 if left is smaller, 0 if equal etc..
*/
public int compare(Cell left, Cell right) {
return this.kvComparator.compare(left, right);
}
/**
* @return KVComparator
*/
public KVComparator getComparator() {
return this.kvComparator;
}
}
2. CellComparator
public static int compareRows(final Cell left, final Cell right) {
return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(),
right.getRowArray(), right.getRowOffset(), right.getRowLength());
}
3. Bytes Comparator
// Bytes.java
public static int compareTo(byte[] buffer1, int offset1, int length1,
byte[] buffer2, int offset2, int length2) {
return LexicographicalComparerHolder.BEST_COMPARER.
compareTo(buffer1, offset1, length1, buffer2, offset2, length2);
}
3.1 Java版本的实现
这里也体现了一个单例的写法
enum PureJavaComparer implements Comparer<byte[]> {
INSTANCE;
@Override
public int compareTo(byte[] buffer1, int offset1, int length1,
byte[] buffer2, int offset2, int length2) {
// Short circuit equal case
if (buffer1 == buffer2 &&
offset1 == offset2 &&
length1 == length2) {
return 0;
}
// Bring WritableComparator code local
int end1 = offset1 + length1;
int end2 = offset2 + length2;
for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
int a = (buffer1[i] & 0xff);
int b = (buffer2[j] & 0xff);
if (a != b) {
return a - b;
}
}
return length1 - length2;
}
}
3.2 Unsafe的实现
Unsafe采用逐级比较
@Override
public int compareTo(byte[] buffer1, int offset1, int length1,
byte[] buffer2, int offset2, int length2) {
// Short circuit equal case
if (buffer1 == buffer2 &&
offset1 == offset2 &&
length1 == length2) {
return 0;
}
final int minLength = Math.min(length1, length2);
final int minWords = minLength / SIZEOF_LONG;
final long offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
final long offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
/*
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a
* time is no slower than comparing 4 bytes at a time even on 32-bit.
* On the other hand, it is substantially faster on 64-bit.
*/
// This is the end offset of long parts.
int j = minWords << 3; // Same as minWords * SIZEOF_LONG
for (int i = 0; i < j; i += SIZEOF_LONG) {
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
long diff = lw ^ rw;
if (diff != 0) {
return lessThanUnsignedLong(lw, rw) ? -1 : 1;
}
}
int offset = j;
if (minLength - offset >= SIZEOF_INT) {
int il = theUnsafe.getInt(buffer1, offset1Adj + offset);
int ir = theUnsafe.getInt(buffer2, offset2Adj + offset);
if (il != ir) {
return lessThanUnsignedInt(il, ir) ? -1: 1;
}
offset += SIZEOF_INT;
}
if (minLength - offset >= SIZEOF_SHORT) {
short sl = theUnsafe.getShort(buffer1, offset1Adj + offset);
short sr = theUnsafe.getShort(buffer2, offset2Adj + offset);
if (sl != sr) {
return lessThanUnsignedShort(sl, sr) ? -1: 1;
}
offset += SIZEOF_SHORT;
}
if (minLength - offset == 1) {
int a = (buffer1[(int)(offset1 + offset)] & 0xff);
int b = (buffer2[(int)(offset2 + offset)] & 0xff);
if (a != b) {
return a - b;
}
}
return length1 - length2;
}
ps: 看HBase1.2的源码的时候摘录的一些代码片段
网友评论