redis实现BloomFilter
bloomFilter 原理介绍
bloomFilter 计算器
BloomFilter
RedisBloomFilter
的java实现,参考service-common
public abstract class BloomFilter<T> implements Serializable {
private int expectedElements;
private double falsePositiveProbablity;
private int numOfBits;
private int numHashFunctions;
public BloomFilter(int expectedElements) {
this(expectedElements, 0.03);
}
public BloomFilter(int expectedElements, double falsePositiveProbability) {
checkArgument(expectedElements >= 0, "Expected insertions (%s) must be >= 0", expectedElements);
checkArgument(falsePositiveProbability > 0.0, "False positive probability (%s) must be > 0.0", falsePositiveProbability);
checkArgument(falsePositiveProbability < 1.0, "False positive probability (%s) must be < 1.0", falsePositiveProbability);
this.expectedElements = expectedElements;
this.falsePositiveProbablity = falsePositiveProbability;
this.numOfBits = optimalNumOfBits(expectedElements, falsePositiveProbability);
this.numHashFunctions = optimalNumOfHashFunctions(expectedElements, numOfBits);
}
public abstract boolean add(T element);
public abstract List<Boolean> addAll(List<T> elements);
public abstract boolean mightContains(T element);
protected int[] hash(byte[] value) {
return hashMurmur3(value, numOfBits, numHashFunctions);
}
private int optimalNumOfBits(long expectedElements, double falsePositiveProbability) {
if (falsePositiveProbability == 0) {
falsePositiveProbability = Double.MIN_VALUE;
}
return (int) (-expectedElements * Math.log(falsePositiveProbability) / (Math.log(2) * Math.log(2)));
}
private static int optimalNumOfHashFunctions(long expectedElements, long numOfBits) {
return Math.max(1, (int) Math.round((double) numOfBits / expectedElements * Math.log(2)));
}
private static int[] hashMurmur3(byte[] value, int numOfBits, int numHashFunctions) {
return rejectionSample(value, numOfBits, numHashFunctions);
}
private static int rejectionSample(int random, int numOfBits) {
random = Math.abs(random);
if (random > (2147483647 - 2147483647 % numOfBits) || random == Integer.MIN_VALUE) {
return -1;
} else {
return random % numOfBits;
}
}
private static int[] rejectionSample(byte[] value, int numOfBits, int numHashFunctions) {
int[] hashes = new int[numHashFunctions];
int seed = 0;
int pos = 0;
while (pos < numHashFunctions) {
seed = murmur3(seed, value);
int hash = rejectionSample(seed, numOfBits);
if (hash != -1) {
hashes[pos++] = hash;
}
}
return hashes;
}
private static int murmur3(int seed, byte[] bytes) {
int h1 = seed; // Standard in Guava
int c1 = 0xcc9e2d51;
int c2 = 0x1b873593;
int len = bytes.length;
int i = 0;
while (len >= 4) {
// process()
int k1 = bytes[i + 0] & 0xFF;
k1 |= (bytes[i + 1] & 0xFF) << 8;
k1 |= (bytes[i + 2] & 0xFF) << 16;
k1 |= (bytes[i + 3] & 0xFF) << 24;
k1 *= c1;
k1 = Integer.rotateLeft(k1, 15);
k1 *= c2;
h1 ^= k1;
h1 = Integer.rotateLeft(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
len -= 4;
i += 4;
}
if (len > 0) {
// processingRemaining()
int k1 = 0;
switch (len) {
case 3 :
k1 ^= (bytes[i + 2] & 0xFF) << 16;
// fall through
case 2 :
k1 ^= (bytes[i + 1] & 0xFF) << 8;
// fall through
case 1 :
k1 ^= (bytes[i] & 0xFF);
// fall through
default :
k1 *= c1;
k1 = Integer.rotateLeft(k1, 15);
k1 *= c2;
h1 ^= k1;
}
i += len;
}
// makeHash()
h1 ^= i;
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return h1;
}
}
网友评论