#pragma once #include #include #include // unordered_set -> HashTable // unordered_map -> HashTable> namespace Lenyiin { template struct SetKeyOfT { const K &operator()(const K &key) { return key; } }; enum State { EMPTY, // 槽位为空 EXIST, // 槽位已经存在一个元素 DELETE // 槽位中元素被删除 }; template struct HashData { T _data; State _state; HashData() : _data(T()), _state(EMPTY) { } }; template class Close_HashTable { private: typedef struct HashData HashData; size_t SecondHash(const K &key, size_t table_size) { return 1 + (key % (table_size - 1)); } public: // 负载因子 = 表中数据/表的大小衡量哈希表满的程度 // 表越接近满, 插入数据越容易冲突, 冲突越多, 效率越低 // 哈希表并不是满了才增容, 开放定制法中, 一般负载因子到 0.7 左右就开始增容 // 负载因子越小, 冲突概率越低, 整体效率越高, 但是负载因子越小, 浪费的空间越大, 所以负载因子一般取一个折中的值 void CheckCapacity() { KeyOfT koft; // // version 1 // if (_tables.size() == 0 || _num * 10 / _tables.size() >= 7) // { // // 增容 // // 1. 开 2倍大小的新表 // // 2. 遍历旧表的数据，重新计算在新表中位置 // // 3. 释放旧表 // std::vector newtables; // size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2; // newtables.resize(newsize); // for (size_t i = 0; i < _tables.size(); i++) // { // if (_tables[i]._state == EXIST) // { // // 计算在新表中的位置, 并处理冲突 // size_t index = koft(_tables[i]._data) % newtables.size(); // while (newtables[index]._state == EXIST) // { // ++index; // if (index == _tables.size()) // { // index = 0; // } // } // newtables[index] = _tables[i]; // } // } // _tables.swap(newtables); // } // // version 2 // if (_tables.size() == 0 || _num * 10 / _tables.size() >= 7) // { // // 增容 // // 1. 开 2倍大小的新表 // // 2. 遍历旧表的数据，重新计算在新表中位置 // // 3. 释放旧表 // std::vector newtables; // size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2; // newtables.resize(newsize); // for (size_t i = 0; i < _tables.size(); i++) // { // if (_tables[i]._state == EXIST) // { // // 重新计算新表中的位置 // size_t index = koft(_tables[i]._data) % newtables.size(); // size_t step = SecondHash(koft(_tables[i]._data), newtables.size()); // // 处理冲突：双重哈希探测 // while (newtables[index]._state == EXIST) // { // index = (index + step) % newtables.size(); // } // // 插入元素到新表 // newtables[index] = _tables[i]; // } // } // _tables.swap(newtables); // } // version 3 // 另一种增容思路 if (_tables.size() == 0 || _num * 10 / _tables.size() >= 7) { Close_HashTable newht; size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2; newht._tables.resize(newsize); for (size_t i = 0; i < _tables.size(); i++) { if (_tables[i]._state == EXIST) { newht.Insert(_tables[i]._data); } } _tables.swap(newht._tables); } } bool Insert(const T &data) { KeyOfT koft; CheckCapacity(); // 闭散列中线性探测有什么问题? // 线性探测思路就是我的位置被占了, 我就挨着往后去占别人的位置, 可能会导致一片一片的冲突, 洪水效应 // version 1 // 线性探测 // 计算 data 中的 key 在表中映射的位置 // size_t index = koft(data) % _tables.size(); // while (_tables[index]._state == EXIST) // { // if (koft(_tables[index]._data) == koft(data)) // { // return false; // 已经存在 // } // ++index; // if (index == _tables.size()) // { // index = 0; // } // } // version 2 // 二次探测 // 计算 data 中的 key 在表中映射的位置 // size_t start = koft(data) % _tables.size(); // size_t index = start; // int i = 0; // while (_tables[index]._state == EXIST) // { // if (koft(_tables[index]._data) == koft(data)) // { // return false; // 已经存在 // } // index = start + i * i; // i++; // index %= _tables.size(); // } // version 3 // 双重哈希 size_t index = koft(data) % _tables.size(); size_t step = SecondHash(koft(data), _tables.size()); while (_tables[index]._state == EXIST) { if (koft(_tables[index]._data) == koft(data)) { return false; // 如果找到相同的 key，插入失败 } index = (index + step) % _tables.size(); // 使用双重哈希计算下一个位置 } _tables[index]._data = data; _tables[index]._state = EXIST; ++_num; // 我么可以看到闭散列-开放定制法不是一种好的解决方式, 因为它是一种我的位置被占了, 我就去抢占别人的位置的思路 // 也就是说他的哈希冲突会相互影响, 我冲突占你的, 你冲突占他的, 他冲突了... , 没完没了, 整体的效率都变低了 // 开散列的哈希桶可以解决上面的问题 return true; } // 线性探测 // HashData *Find(const K &key) // { // KeyOfT koft; // // 计算 data 中的 key 在表中映射的位置 // size_t index = key % _tables.size(); // while (_tables[index]._state != EMPTY) // { // if (koft(_tables[index]._data) == key) // { // if (_tables[index]._state == EXIST) // { // return &_tables[index]; // } // else if (_tables[index]._state == DELETE) // { // return nullptr; // } // } // ++index; // if (index == _tables.size()) // { // index = 0; // } // } // return nullptr; // } // 双重哈希 HashData *Find(const K &key) { KeyOfT koft; size_t index = key % _tables.size(); size_t step = SecondHash(key, _tables.size()); // 计算步长 while (_tables[index]._state != EMPTY) { if (koft(_tables[index]._data) == key) { if (_tables[index]._state == EXIST) { return &_tables[index]; } else if (_tables[index]._state == DELETE) { return nullptr; } } index = (index + step) % _tables.size(); // 使用双重哈希探测下一个位置 } return nullptr; } bool Erase(const K &key) { HashData *ret = Find(key); if (ret) { ret->_state = DELETE; --_num; return true; } else { return false; } } HashData &getHashData(int pos) { return _tables[pos]; } void Print() { int size = _tables.size(); for (int i = 0; i < size; i++) { std::cout << i << "\t"; } std::cout << std::endl; for (int i = 0; i < size; i++) { auto cur = _tables[i]; if (cur._state == EXIST) { std::cout << cur._data << "\t"; } else { std::cout << "*\t"; } } std::cout << "\n\n"; } private: std::vector _tables; size_t _num = 0; // 存储了几个有效数据 }; template struct HashNode { T _data; // 存储数据 HashNode *_next; // 存储下一个节点 // 如果想要实现迭代顺序为插入顺序, 可以加两个指针组成一个链表 // HashNode* _linknext; // HashNode* _linkprev; HashNode(const T &data) : _data(data), _next(nullptr) { } }; // 前置声明 template class Open_HashTable; // 哈希表只有单向迭代器, 只有 ++, 没有-- template struct __HashTableIterator { typedef __HashTableIterator Self; typedef Open_HashTable HT; typedef HashNode Node; Node *_node; HT *_pht; __HashTableIterator(Node *node, HT *pht) : _node(node), _pht(pht) { } T &operator*() { return _node->_data; } T *operator->() { return &_node->_data; } Self &operator++() { if (_node->_next) { _node = _node->_next; } else { // 如果一个桶走完了, 找到下一个桶继续便利 KeyOfT koft; size_t index = _pht->HashFunc(koft(_node->_data)) % _pht->_tables.size(); ++index; while (index < _pht->_tables.size()) { Node *cur = _pht->_tables[index]; if (cur) { _node = cur; return *this; } ++index; } _node = nullptr; } return *this; } Self operator++(int) { Self tmp(*this); ++*this; return tmp; } bool operator!=(const Self &s) { return _node != s._node; } bool operator==(const Self &s) { return _node == s._node; } }; template struct _Hash { const K &operator()(const K &key) { return key; } }; // 特化 template <> struct _Hash { size_t operator()(const std::string &key) { // BKDR Hash size_t hash = 0; for (size_t i = 0; i < key.size(); i++) { hash *= 131; hash += key[i]; } return hash; } }; struct _HashString { size_t operator()(const std::string &key) { // BKDR Hash size_t hash = 0; for (size_t i = 0; i < key.size(); i++) { hash *= 131; hash += key[i]; } return hash; } }; template // template > class Open_HashTable { private: typedef HashNode Node; public: friend struct __HashTableIterator; typedef __HashTableIterator iterator; iterator begin() { for (size_t i = 0; i < _tables.size(); i++) { if (_tables[i]) { return iterator(_tables[i], this); } } return end(); } iterator end() { return iterator(nullptr, this); } Open_HashTable() { } Open_HashTable(size_t bucket_count) : _tables(bucket_count), _num(0) { } ~Open_HashTable() { Clear(); } void Clear() { for (size_t i = 0; i < _tables.size(); i++) { Node *cur = _tables[i]; while (cur) { Node *next = cur->_next; delete cur; cur = next; } _tables[i] = nullptr; } } size_t HashFunc(const K &key) { Hash hash; return hash(key); } size_t GetNextPrime(size_t num) { const int PrimeSize = 28; static const unsigned long PrimeList[PrimeSize] = { 53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul}; for (size_t i = 0; i < PrimeSize; i++) { if (PrimeList[i] > num) { return PrimeList[i]; } } return PrimeList[PrimeSize - 1]; // 如果已经是最后一个数的, 则不增容 } // 重新哈希 void Rehash(size_t newsize) { KeyOfT koft; std::vector newtables; newtables.resize(newsize); for (size_t i = 0; i < _tables.size(); i++) { // 将旧表中的节点取下来, 重新计算在新表中的位置, 并插入进去 Node *cur = _tables[i]; while (cur) { Node *next = cur->_next; size_t index = HashFunc(koft(cur->_data)) % newtables.size(); cur->_next = newtables[index]; newtables[index] = cur; cur = next; } _tables[i] = nullptr; } _tables.swap(newtables); } // 插入操作 // 当大量的数据冲突, 这些哈希冲突的数据就会挂在同一个链式桶中, 查找时效率就会降低, 所以开散列-哈希桶也是要控制哈希冲突的。 // 如何控制呢? 通过控制负载因子, 不过这里就把空间利用率提高一些, 负载因子也可以高一些, 一般开散列把负载因子控制到1, 会比较好一点 std::pair Insert(const T &data) { KeyOfT koft; // 1. 检查负载因子 // 如果负载因子等于 1 , 则增容, 避免大量的哈希冲突 if (_tables.size() == _num) { // 1. 开两倍大小的新表（不一定是两倍） // 2. 遍历旧表的数据, 重新计算在新表中的位置 // 3. 释放旧表 size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2; // size_t newsize = GetNextPrime(_tables.size()); Rehash(newsize); } // 2. 计算数据在表中映射的位置 size_t index = HashFunc(koft(data)) % _tables.size(); // 3. 先查找这个值在不在表中, 是否有冲突 Node *cur = _tables[index]; while (cur) { if (HashFunc(koft(cur->_data)) == HashFunc(koft(data))) { // 如果已经存在该键，返回失败 return std::make_pair(iterator(cur, this), false); } else { // 查找下一个节点 cur = cur->_next; } } // 4. 头插挂到链表中(尾插也是可以的) Node *newnode = new Node(data); newnode->_next = _tables[index]; _tables[index] = newnode; ++_num; // 更新已存储元素数量 return std::make_pair(iterator(newnode, this), true); } // 查找操作 Node *Find(const K &key) { KeyOfT koft; // 1. 计算键在表中映射的位置 size_t index = HashFunc(key) % _tables.size(); Node *cur = _tables[index]; // 2. 遍历链表查找匹配的键 while (cur) { if (HashFunc(koft(cur->_data)) == HashFunc(key)) { // 如果找到匹配的元素，返回其指针 return cur; } // 继续查找下一个节点 cur = cur->_next; } // 如果未找到，返回空指针 return nullptr; } bool Erase(const K &key) { KeyOfT koft; // 1. 计算要删除元素的哈希值 size_t index = HashFunc(key) % _tables.size(); Node *prev = nullptr; Node *cur = _tables[index]; // 2. 遍历链表, 查找匹配的元素 while (cur) { if (HashFunc(koft(cur->_data)) == HashFunc(key)) { // 3. 找到元素后, 调整链表结构 if (prev == nullptr) { // 如果要删除的元素是链表的第一个节点, 直接让桶指向下一个节点 _tables[index] = cur->_next; } else { // 否则，将前一个节点的 next 指向当前节点的下一个节点 prev->_next = cur->_next; } // 4. 释放节点内存 delete cur; --_num; // 元素数量减少 return true; } else { // 继续遍历链表 prev = cur; cur = cur->_next; } } // 如果未找到该元素，返回 false return false; } void Print() const { KeyOfT koft; int size = _tables.size(); for (int i = 0; i < size; i++) { std::cout << i << "\t"; Node *cur = _tables[i]; while (cur) { std::cout << koft(cur->_data) << "\t"; cur = cur->_next; } std::cout << std::endl; } std::cout << std::endl; } private: std::vector _tables; // 哈希表存储桶 size_t _num; // 记录着存储的数据个数 }; }