Incorrect count output / Having difficulty trying to create a HashTable/Set using open addressing

134 Views Asked by At

I'm trying to create a program that opens a .txt file containing a speech and assigns each word to a space in the array/set based on the hash value. Collisions are accounted for using open addressing method. The program should be able to perform the following functions: add(), remove(), find(), count() which keeps count of the elements IN the array/set, and loadfactor(). A template header.h file was provided that required some filling in, but my unfamiliarity with that style of coding was making it difficult for me to understand it. Below I have provided the code I have so far, and everything seems to be working except the mCount. The speech contains about 300 words but when I run the code, the count output shows 17. I'm assuming the error is in my resizing function but I am unsure.

//hashset.h file
#pragma once

#include <cmath>
#include <functional>
#include <vector>

template <typename TValue, typename TFunc>
class HashSet
{
private:
    // Unlike Java, the C++ hashtable array won't be full of null references.
    // It will be full of "Entry" objects, each of which tracks its current state
    // as EMPTY, FULL (with a value), or NIL (value was once here, but was removed).
    class Entry
    {
    public:
        enum EntryState { EMPTY, FULL, NIL };

        TValue value;
        EntryState state;

        Entry() : value(), state(EMPTY) {}
        Entry(const TValue& v) : value(v), state(EMPTY) {}
    };

    TFunc mHash;
    std::vector<Entry> mTable;
    std::size_t mCount;

public:
    // Constructs a hashtable with the given size, using the given function for
    // computing h(k).
    // hash must be a callable object (function, functor, etc.) that takes a parameter
    //    of type TValue and returns std::size_t (an integer).
    HashSet(int size, TFunc hash) : mHash(hash)
    {
        // initialize count
        mCount = 0;

        // hashtable array cannot be same data type as that of what is being stored (cannot be string) 
        // requirement #4 - if user inputs array size that is not a power of 2, constructor must round to nearest power of 2 value
        size = pow(2, (int(log(size - 1) / log(2)) | 1));

        mTable.resize(size); // resizes the vector to have given size.
                             // Each element will be default-constructed to have state EMPTY.
    }

    void resize(int new_size) {
        HashSet aux{ new_size, mHash }; //double the size, same hash function
        for (const auto& entry : mTable)
            if (entry.state == Entry::FULL && entry.state == Entry::EMPTY && entry.state == Entry::NIL) //there is an element
                aux.add(entry.value); //insert it on the new set
        *this = aux;
    }

    // Inserts the given value into the set.
    void add(const TValue& value)
    {
        // Use the type std::size_t for working with hash table indices.
        // Invoke the mHash function, passing the key to calculate h(k), as in

        // size_t hashCode = mHash(value);

        // Mod down to size.
        // Go to the table at that index and do the insertion routine.

        // Note, if table is full when trying to add an element, it should double in size 
        // to keep table size a power of 2
        
        if (double(mCount) / mTable.size() > 0.8) // load factor comparison
            this->resize(2 * mTable.size()); // call resize function if array is too small to accommodate addition 

        size_t hashCode = mHash(value) % mTable.size(); // mod value by table size to get starting index

        if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
            mTable[hashCode].value = value; // store value in vector index specified by hashCode
            mCount++; // increment counter when word is added
        }

        else {
            for (std::size_t i = 1; i < mTable.size(); i++) {
                // use open addressing to find next open space
                if (mTable[hashCode].state != Entry::EMPTY) {
                    hashCode = ((mHash(value) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
                }

                else if (mTable[hashCode].value == value) { // account for duplicates 
                    break; // exit for-loop
                }

                else if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
                    mTable[hashCode].value = value; // store value in vector index specified by new hashCode
                    mCount++; // increment counter when word is added
                    break; // exit for-loop
                }

                else
                    break; // exit for-loop
            } 
        }
    }

    // Returns true if the given value is present in the set.
    bool find(const TValue& key)
    {
        size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
        if (mTable[hashCode].value == key)
            return true;

        else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // check that set is not empty or has a NIL state
            for (std::size_t i = 1; i < mTable.size(); i++) {
                // use open addressing again to find key
                if (mTable[hashCode].value != key)
                    hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();

                else if (mTable[hashCode].value == key) {
                    return true; // value found at speecified location
                    break; // exit for-loop as first instance of value has been found
                }

                //else if (i == mTable.size()) // end of table reached, element not in set
                    //return false;
            }
        }

        else // end of table reached, element was not in set
            return false;
    }

    // Removes the given value from the set.
    void remove(const TValue& key)
    {
        size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
        if (mTable[hashCode].value == key) {
            mTable[hashCode].value = Entry::NIL; // replace value with NIL so find() op does not return a false when searching for element 
            mCount--; // decrement element counter
        }

        else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state != Entry::NIL) { // check that there is a value to be removed
            for (std::size_t i = 1; i < mTable.size(); i++) {
                // use open addressing again to find key
                if (mTable[hashCode].value != key) {
                    hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();
                }

                else {
                    mTable[hashCode].value = Entry::NIL; // if found after open addressing, replace with NIL
                    mCount--; // decrement element counter
                }
            }
        }
    }

    int count() {
        return mCount;
    }

    double loadFactor() {
        double a = double(mCount) / mTable.size();
        return a;
    }
};
// main function
#include "hashset.h"
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
using namespace std;

int main()
{
    string testline;
    vector<string> word;

    HashSet<std::string, std::hash<std::string> > obj1{ 50, std::hash<std::string>{} };

    ifstream Test("speech.txt");

    if (!Test)
    {
        cout << "There was an error opening the file.\n";
        return 0;
    }

    //store words in vector
    while (Test >> testline) {
        word.push_back(testline);
        //obj1.add(testline);
    }

    //output whole vector with position numbers for each entry
    cout << "Array contents:\n";
    for (int i = 0; i < word.size(); i++) {
        obj1.add(word[i]);
        cout << word[i] << "(" << i << ")" << endl;
    }

    cout << "current count: " << obj1.count() << endl;

    obj1.add("abcd"); // should hash to 4
    if (obj1.find("abcd")) 
        cout << "abcd is in the set " << endl;
    else
        cout << "abcd is not in set " << endl;

    obj1.add("adcb"); // should hash to 4 then 5 after probing
    if (obj1.find("adcb"))
        cout << "adcb is in the set " << endl;
    else
        cout << "adcb is not in set " << endl;

    obj1.add("acde"); // should hash to 4 then 7 after probing
    if (obj1.find("acde"))
        cout << "acde is in the set " << endl;
    else
        cout << "acde is not in set " << endl;

    obj1.remove("adcb"); // 5 should have NIL
    if (obj1.find("adcb"))
        cout << "adcb is in the set " << endl;
    else
        cout << "adcb is not in set " << endl;

    if (obj1.find("acde"))
        cout << "acde is still in the set " << endl;
    else
        cout << "acde is not in set " << endl;

    cout << "final count: " << obj1.count() << endl;

    system("pause");
    exit(0);
}

}
1

There are 1 best solutions below

2
Durstann On

The errors around NIL are because the enum defining NIL is part of the Entry class. You need to prefix NIL with the class name so the compile knows where the keyword comes from.

else if (mTable[hashCode] != NULL || mTable == Entry::NIL) { // getting error NIL identifier not found

The HashSet variable declaration is complaining because you are passing the wrong types. HashSet constructor takes a size and and a hash function. You are passing it a size and a string. Note the comment above the HashSet constructor

    // hash must be a callable object (function, functor, etc.) that takes a parameter
    //    of type TValue and returns std::size_t (an integer).

This is your clue how to construct a HashSet object.