Trying to quick sort a suffix array

72 Views Asked by At

I am trying to apply quick sort to suffix array. The suffix array contains all the starting indexes of the string. The first partition works but not the others. I have reviewed many quick sort code examples and can't figure out why does not sort correctly. I am currently only sorting the array from 0 to 20 as a test.

Edit: Now uses vector for suffix array. No longer has unsigned ints.

main.cpp

#include <iostream>
#include <string>
#include <vector>

int compareSuffix(const std::string &text, int suffixIndexA, int suffixIndexB);
int suffixArrayPartition(std::string &text, std::vector<int> &suffixArray, int left, int right);
void suffixArrayQuickSort(const std::string &text, std::vector<int> &suffixArray, int left, int right);
void suffixArraySwap(std::vector<int> &suffixArray, int indexA, int indexB);

int main(int argc, char const *argv[])
{
    std::string pi = "141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067";
    std::string search = "209"; // index 3
    std::vector<int> suffixArray;
    for (unsigned int i = 0; i < pi.size(); i++)
    {
        suffixArray.push_back(i);
    }

    suffixArrayQuickSort(pi, suffixArray, 0, 20);

    for (int i = 0; i < 21; i++)
    {
        std::cout << suffixArray[i] << ":" << pi.substr(suffixArray[i], pi.size() - suffixArray[i]) << std::endl;
    }

    return 0;
}

int compareSuffix(const std::string &text, int suffixIndexA, int suffixIndexB)
{
    int length;
    if (suffixIndexA > suffixIndexB)
    {
        length = text.size() - suffixIndexA;
    }
    else
    {
        length = text.size() - suffixIndexB;
    }

    for (int i = 0; i < length; i++)
    {
        if (text[suffixIndexA + i] > text[suffixIndexB + i])
        {
            return 1;
        }
        else if (text[suffixIndexA + i] < text[suffixIndexB + i])
        {
            return -1;
        }
    }

    if (suffixIndexA < suffixIndexB)
    {
        return 1;
    }
    else if (suffixIndexA > suffixIndexB)
    {
        return -1;
    }
    else
    {
        return 0;
    }
}

int suffixArrayPartition(const std::string &text, std::vector<int> &suffixArray, int left, int right)
{
    int i = left - 1;

    for (int j = left; j < right; j++)
    {
        // std::cout << "jString: " << text.substr(j, 20) << std::endl;
        // std::cout << "rightString: " << text.substr(right, 20) << std::endl;
        if (compareSuffix(text, j, right) == -1)
        {
            i++;
            suffixArraySwap(suffixArray, j, i);
        }
    }

    suffixArraySwap(suffixArray, i + 1, right);

    return i + 1;
}

void suffixArrayQuickSort(const std::string &text, std::vector<int> &suffixArray, int left, int right)
{
    if (left >= right)
    {
        return;
    }
    unsigned int p = suffixArrayPartition(text, suffixArray, left, right);

    suffixArrayQuickSort(text, suffixArray, left, p - 1);
    suffixArrayQuickSort(text, suffixArray, p + 1, right);
}

void suffixArraySwap(std::vector<int> &suffixArray, int indexA, int indexB)
{
    unsigned int tmp = suffixArray[indexA];
    suffixArray[indexA] = suffixArray[indexB];
    suffixArray[indexB] = tmp;
}

output:

0:141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
15:238462643383279502884197169399375105820974944592307816406286208998628034825342117067
2:1592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
20:2643383279502884197169399375105820974944592307816406286208998628034825342117067
1:41592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
3:592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
4:92653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
5:2653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
6:653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
7:53589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
8:3589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
9:589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
10:89793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
11:9793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
12:793238462643383279502884197169399375105820974944592307816406286208998628034825342117067
13:93238462643383279502884197169399375105820974944592307816406286208998628034825342117067
14:3238462643383279502884197169399375105820974944592307816406286208998628034825342117067
16:38462643383279502884197169399375105820974944592307816406286208998628034825342117067
17:8462643383279502884197169399375105820974944592307816406286208998628034825342117067
18:462643383279502884197169399375105820974944592307816406286208998628034825342117067
19:62643383279502884197169399375105820974944592307816406286208998628034825342117067
0

There are 0 best solutions below