137 lines
3.4 KiB
C++
137 lines
3.4 KiB
C++
#pragma once
|
|
|
|
#include <iostream>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <cmath>
|
|
#include "BitSet.hpp"
|
|
|
|
namespace Lenyiin
|
|
{
|
|
struct HashStr1
|
|
{
|
|
// BKDR
|
|
size_t operator()(const std::string &str)
|
|
{
|
|
size_t hash = 0;
|
|
for (size_t i = 0; i < str.size(); i++)
|
|
{
|
|
hash *= 131;
|
|
hash += str[i];
|
|
}
|
|
return hash;
|
|
}
|
|
};
|
|
|
|
struct HashStr2
|
|
{
|
|
// SDBM
|
|
size_t operator()(const std::string &str)
|
|
{
|
|
size_t hash = 0;
|
|
for (size_t i = 0; i < str.size(); i++)
|
|
{
|
|
hash *= 65599;
|
|
hash += str[i];
|
|
}
|
|
return hash;
|
|
}
|
|
};
|
|
|
|
struct HashStr3
|
|
{
|
|
// RS
|
|
size_t operator()(const std::string &str)
|
|
{
|
|
size_t hash = 0;
|
|
size_t magic = 63689; // 魔数
|
|
for (size_t i = 0; i < str.size(); i++)
|
|
{
|
|
hash *= magic;
|
|
hash += str[i];
|
|
magic *= 378551;
|
|
}
|
|
return hash;
|
|
}
|
|
};
|
|
|
|
// 布隆过滤器底层是个位图
|
|
template <class K = std::string, class Hash1 = HashStr1, class Hash2 = HashStr2, class Hash3 = HashStr3>
|
|
class BloomFilter
|
|
{
|
|
public:
|
|
BloomFilter(size_t num)
|
|
: _bs(5 * num), _size(5 * num)
|
|
{
|
|
}
|
|
|
|
// 插入元素
|
|
void insert(const K &key)
|
|
{
|
|
// 将 key 映射到三张位图上
|
|
size_t index1 = Hash1()(key) % _size; // Hash1() 是仿函数类型, Hash1()() 是仿函数匿名对象
|
|
size_t index2 = Hash2()(key) % _size;
|
|
size_t index3 = Hash3()(key) % _size;
|
|
|
|
// std::cout << index1 << std::endl;
|
|
// std::cout << index2 << std::endl;
|
|
// std::cout << index3 << std::endl;
|
|
|
|
_bs.set(index1);
|
|
_bs.set(index2);
|
|
_bs.set(index3);
|
|
}
|
|
|
|
// 查询元素是否存在
|
|
bool contains(const K &key)
|
|
{
|
|
size_t index1 = Hash1()(key) % _size;
|
|
if (_bs.test(index1) == false)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
size_t index2 = Hash2()(key) % _size;
|
|
if (_bs.test(index2) == false)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
size_t index3 = Hash3()(key) % _size;
|
|
if (_bs.test(index3) == false)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return true; // 但是这里也不一定是真的在, 还是可能存在误判
|
|
// 判断在, 是不准确的, 可能存在误判, 判断不在是准确的
|
|
}
|
|
|
|
// 删除元素
|
|
void reset(const K &key)
|
|
{
|
|
// 将映射的位置给置零就可以?
|
|
// 不支持删除, 可能会存在误删。
|
|
// 一般布隆过滤器不支持删除操作。
|
|
|
|
// size_t index1 = Hash1()(key) % _size;
|
|
// _bs.reset(index1);
|
|
|
|
// size_t index2 = Hash2()(key) % _size;
|
|
// _bs.reset(index2);
|
|
|
|
// size_t index3 = Hash3()(key) % _size;
|
|
// _bs.reset(index3);
|
|
}
|
|
|
|
// 获取位数组的大小
|
|
size_t size() const
|
|
{
|
|
return _size;
|
|
}
|
|
|
|
private:
|
|
BitSet _bs; // 位图
|
|
size_t _size;
|
|
};
|
|
} |