Find all the appearances of substring in the string using suffix tree

26 Views Asked by At

I have to print all appearances of the substring in string with help of suffix tree for instance:
for string

abcdabcab

For substring ab there are 3 appearances, so output must be:
0, 4, 7

it's important to be in increasing order

But I have no idea how to implement this with suffix tree, I thought about DFS but I could implement it,

here is my implementation of suffix tree

struct SuffixTreeNode {
    struct SuffixTreeNode *children[MAX_CHAR];
    struct SuffixTreeNode *suffixLink;
    int start;
    int *end;
    int suffixIndex;
};

typedef struct SuffixTreeNode Node;

char text[MAX_TEXT];
Node *root = NULL;
Node *lastNewNode = NULL;
Node *activeNode = NULL;
int activeEdge = -1;
int activeLength = 0;
int remainingSuffixCount = 0;
int leafEnd = -1;
int *rootEnd = NULL;
int *splitEnd = NULL;
int size = -1;

void countingSort(int arr[], int n) {
    int max = arr[0];
    int min = arr[0];

    for (int i = 1; i < n; i++) {
        if (arr[i] > max) {
            max = arr[i];
        }
        if (arr[i] < min) {
            min = arr[i];
        }
    }

    int range = max - min + 1;
    int *countArray = (int *) malloc(range * sizeof(int));

    for (int i = 0; i < range; i++) {
        countArray[i] = 0;
    }

    for (int i = 0; i < n; i++) {
        countArray[arr[i] - min]++;
    }

    int index = 0;
    for (int i = 0; i < range; i++) {
        while (countArray[i] > 0) {
            arr[index] = i + min;
            index++;
            countArray[i]--;
        }
    }

    free(countArray);
}

Node *newNode(int start, int *end) {
    Node *node = (Node *) malloc(sizeof(Node));
    int i;
    for (i = 0; i < MAX_CHAR; i++)
        node->children[i] = NULL;

    node->suffixLink = root;
    node->start = start;
    node->end = end;
    node->suffixIndex = -1;
    return node;
}

int edgeLength(Node *n) {
    if (n == root)
        return 0;
    return *(n->end) - (n->start) + 1;
}

int walkDown(Node *currNode) {
    if (activeLength >= edgeLength(currNode)) {
        activeEdge += edgeLength(currNode);
        activeLength -= edgeLength(currNode);
        activeNode = currNode;
        return 1;
    }
    return 0;
}

void extendSuffixTree(int pos) {
    leafEnd = pos;
    remainingSuffixCount++;
    lastNewNode = NULL;

    while (remainingSuffixCount > 0) {
        if (activeLength == 0)
            activeEdge = pos;

        if (activeNode->children[(int) text[activeEdge]] == NULL) {
            activeNode->children[(int) text[activeEdge]] = newNode(pos, &leafEnd);

            if (lastNewNode != NULL) {
                lastNewNode->suffixLink = activeNode;
                lastNewNode = NULL;
            }
        } else {
            Node *next = activeNode->children[(int) text[activeEdge]];
            if (walkDown(next)) {
                continue;
            }

            if (text[next->start + activeLength] == text[pos]) {
                if (lastNewNode != NULL && activeNode != root) {
                    lastNewNode->suffixLink = activeNode;
                    lastNewNode = NULL;
                }
                activeLength++;
                break;
            }

            splitEnd = (int *) malloc(sizeof(int));
            *splitEnd = next->start + activeLength - 1;
            Node *split = newNode(next->start, splitEnd);
            activeNode->children[(int) text[activeEdge]] = split;
            split->children[(int) text[pos]] = newNode(pos, &leafEnd);
            next->start += activeLength;
            split->children[(int) text[next->start]] = next;

            if (lastNewNode != NULL) {
                lastNewNode->suffixLink = split;
            }
            lastNewNode = split;
        }

        remainingSuffixCount--;
        if (activeNode == root && activeLength > 0) {
            activeLength--;
            activeEdge = pos - remainingSuffixCount + 1;
        } else if (activeNode != root) {
            activeNode = activeNode->suffixLink;
        }
    }
}


void setSuffixIndexByDFS(Node *n, int labelHeight) {
    if (n == NULL)
        return;
    int leaf = 1;
    int i;
    for (i = 0; i < MAX_CHAR; i++) {
        if (n->children[i] != NULL) {
            leaf = 0;
            setSuffixIndexByDFS(n->children[i], labelHeight + edgeLength(n->children[i]));
        }
    }
    if (leaf == 1) {
        n->suffixIndex = size - labelHeight;
    }
}

void buildSuffixTree() {
    size = strlen(text);
    int i;
    rootEnd = (int *) malloc(sizeof(int));
    *rootEnd = -1;
    root = newNode(-1, rootEnd);
    activeNode = root;
    for (i = 0; i < size; i++)
        extendSuffixTree(i);
    int labelHeight = 0;
    setSuffixIndexByDFS(root, labelHeight);
}

I tried to check DFS, but it didn't go well, maybe I did something wrong

0

There are 0 best solutions below