I'm trying to create a program that opens a .txt file containing a speech and assigns each word to a space in the array/set based on the hash value. Collisions are accounted for using open addressing method. The program should be able to perform the following functions: add(), remove(), find(), count() which keeps count of the elements IN the array/set, and loadfactor(). A template header.h file was provided that required some filling in, but my unfamiliarity with that style of coding was making it difficult for me to understand it. Below I have provided the code I have so far, and everything seems to be working except the mCount. The speech contains about 300 words but when I run the code, the count output shows 17. I'm assuming the error is in my resizing function but I am unsure.
//hashset.h file
#pragma once
#include <cmath>
#include <functional>
#include <vector>
template <typename TValue, typename TFunc>
class HashSet
{
private:
// Unlike Java, the C++ hashtable array won't be full of null references.
// It will be full of "Entry" objects, each of which tracks its current state
// as EMPTY, FULL (with a value), or NIL (value was once here, but was removed).
class Entry
{
public:
enum EntryState { EMPTY, FULL, NIL };
TValue value;
EntryState state;
Entry() : value(), state(EMPTY) {}
Entry(const TValue& v) : value(v), state(EMPTY) {}
};
TFunc mHash;
std::vector<Entry> mTable;
std::size_t mCount;
public:
// Constructs a hashtable with the given size, using the given function for
// computing h(k).
// hash must be a callable object (function, functor, etc.) that takes a parameter
// of type TValue and returns std::size_t (an integer).
HashSet(int size, TFunc hash) : mHash(hash)
{
// initialize count
mCount = 0;
// hashtable array cannot be same data type as that of what is being stored (cannot be string)
// requirement #4 - if user inputs array size that is not a power of 2, constructor must round to nearest power of 2 value
size = pow(2, (int(log(size - 1) / log(2)) | 1));
mTable.resize(size); // resizes the vector to have given size.
// Each element will be default-constructed to have state EMPTY.
}
void resize(int new_size) {
HashSet aux{ new_size, mHash }; //double the size, same hash function
for (const auto& entry : mTable)
if (entry.state == Entry::FULL && entry.state == Entry::EMPTY && entry.state == Entry::NIL) //there is an element
aux.add(entry.value); //insert it on the new set
*this = aux;
}
// Inserts the given value into the set.
void add(const TValue& value)
{
// Use the type std::size_t for working with hash table indices.
// Invoke the mHash function, passing the key to calculate h(k), as in
// size_t hashCode = mHash(value);
// Mod down to size.
// Go to the table at that index and do the insertion routine.
// Note, if table is full when trying to add an element, it should double in size
// to keep table size a power of 2
if (double(mCount) / mTable.size() > 0.8) // load factor comparison
this->resize(2 * mTable.size()); // call resize function if array is too small to accommodate addition
size_t hashCode = mHash(value) % mTable.size(); // mod value by table size to get starting index
if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
mTable[hashCode].value = value; // store value in vector index specified by hashCode
mCount++; // increment counter when word is added
}
else {
for (std::size_t i = 1; i < mTable.size(); i++) {
// use open addressing to find next open space
if (mTable[hashCode].state != Entry::EMPTY) {
hashCode = ((mHash(value) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
}
else if (mTable[hashCode].value == value) { // account for duplicates
break; // exit for-loop
}
else if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
mTable[hashCode].value = value; // store value in vector index specified by new hashCode
mCount++; // increment counter when word is added
break; // exit for-loop
}
else
break; // exit for-loop
}
}
}
// Returns true if the given value is present in the set.
bool find(const TValue& key)
{
size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
if (mTable[hashCode].value == key)
return true;
else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // check that set is not empty or has a NIL state
for (std::size_t i = 1; i < mTable.size(); i++) {
// use open addressing again to find key
if (mTable[hashCode].value != key)
hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();
else if (mTable[hashCode].value == key) {
return true; // value found at speecified location
break; // exit for-loop as first instance of value has been found
}
//else if (i == mTable.size()) // end of table reached, element not in set
//return false;
}
}
else // end of table reached, element was not in set
return false;
}
// Removes the given value from the set.
void remove(const TValue& key)
{
size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
if (mTable[hashCode].value == key) {
mTable[hashCode].value = Entry::NIL; // replace value with NIL so find() op does not return a false when searching for element
mCount--; // decrement element counter
}
else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state != Entry::NIL) { // check that there is a value to be removed
for (std::size_t i = 1; i < mTable.size(); i++) {
// use open addressing again to find key
if (mTable[hashCode].value != key) {
hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();
}
else {
mTable[hashCode].value = Entry::NIL; // if found after open addressing, replace with NIL
mCount--; // decrement element counter
}
}
}
}
int count() {
return mCount;
}
double loadFactor() {
double a = double(mCount) / mTable.size();
return a;
}
};
// main function
#include "hashset.h"
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
using namespace std;
int main()
{
string testline;
vector<string> word;
HashSet<std::string, std::hash<std::string> > obj1{ 50, std::hash<std::string>{} };
ifstream Test("speech.txt");
if (!Test)
{
cout << "There was an error opening the file.\n";
return 0;
}
//store words in vector
while (Test >> testline) {
word.push_back(testline);
//obj1.add(testline);
}
//output whole vector with position numbers for each entry
cout << "Array contents:\n";
for (int i = 0; i < word.size(); i++) {
obj1.add(word[i]);
cout << word[i] << "(" << i << ")" << endl;
}
cout << "current count: " << obj1.count() << endl;
obj1.add("abcd"); // should hash to 4
if (obj1.find("abcd"))
cout << "abcd is in the set " << endl;
else
cout << "abcd is not in set " << endl;
obj1.add("adcb"); // should hash to 4 then 5 after probing
if (obj1.find("adcb"))
cout << "adcb is in the set " << endl;
else
cout << "adcb is not in set " << endl;
obj1.add("acde"); // should hash to 4 then 7 after probing
if (obj1.find("acde"))
cout << "acde is in the set " << endl;
else
cout << "acde is not in set " << endl;
obj1.remove("adcb"); // 5 should have NIL
if (obj1.find("adcb"))
cout << "adcb is in the set " << endl;
else
cout << "adcb is not in set " << endl;
if (obj1.find("acde"))
cout << "acde is still in the set " << endl;
else
cout << "acde is not in set " << endl;
cout << "final count: " << obj1.count() << endl;
system("pause");
exit(0);
}
}
The errors around NIL are because the enum defining NIL is part of the Entry class. You need to prefix NIL with the class name so the compile knows where the keyword comes from.
The HashSet variable declaration is complaining because you are passing the wrong types. HashSet constructor takes a size and and a hash function. You are passing it a size and a string. Note the comment above the HashSet constructor
This is your clue how to construct a HashSet object.