基于Redis BloomFilter算法,redisbloomfilter
public class BloomFilter implements Serializable {
private static final long serialVersionUID = 5829598197124113258L;
private static final int DEFAULT_SIZE =2 << 25 ; //2的24次方
private static final int [] seeds =new int []{5,7,11,13,19,31,37,61,73,89};//构造不同因子,用于不同散列函数
private BitSet bits= new BitSet(DEFAULT_SIZE);
private SimpleHash[] func=new SimpleHash[seeds.length];//对不同的数字构造不同Hash函数避免冲突
private int size = 0;
public BloomFilter() {
for (int i = 0; i < seeds.length; i++) {//初始化构造函数
func[i] = new SimpleHash(DEFAULT_SIZE, seeds[i]);
}
}
public void add(String value) {
if(!contains(value)){
size++;
}
for (SimpleHash f : func) { //添加hash值映射到相应位
bits.set(f.hash(value), true);
}
}
public boolean contains(String value) { //判断存在有一定误判率,所有位置为1不一定存在
if (value == null) {
return false;
}
boolean ret = true;
for (SimpleHash f : func) {
if(!bits.get(f.hash(value)){return false};//按照添加时候hash函数计算是否true,判断存在有一定误判率,所有位置为1不一定存在
}
return true;
}
public int getSize(){
return size;
}
public static class SimpleHash implements Serializable {
private static final long serialVersionUID = 1286028965900771610L;
private int cap;
private int seed;
public SimpleHash(int cap, int seed) {
this.cap = cap;
this.seed = seed;
}
public int hash(String value) {//hash值通过数字个数和数字散列后得到值
int result = 0;
int len = value.length();
for (int i = 0; i < len; i++) {
result = seed * result + value.charAt(i);
}
return (cap - 1) & result;
}
}
public static void main(String[] args) {
String value = "aaa@yyy.com";
String value2 = "aaa@yyy.com";
String value3 = "bbb@yyy.com";
BloomFilter filter = new BloomFilter();
System.out.println(filter.contains(value));
filter.add(value);
filter.add(value2);
if(filter.noContains(value3)){
System.out.println(value3+" 一定不存在 ");
}
}
/*获取100内质数*/
public static int[] getAllSeeds(){
return new int[]{2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97};
}
}//基于Redis BitSet,由单机扩展到分布式
public void add(byte[] bytes) { int[] hashes = createHashes(bytes, k); for (int hash : hashes) bitset.set(Math.abs(hash % bitSetSize), true); }
public boolean contains(byte[] bytes) { int[] hashes = createHashes(bytes, k); for (int hash : hashes) { if (!bitset.get(Math.abs(hash % bitSetSize))) { return false; } } return true; }
本站文章为和通数据库网友分享或者投稿,欢迎任何形式的转载,但请务必注明出处.
同时文章内容如有侵犯了您的权益,请联系QQ:970679559,我们会在尽快处理。