C: Binary Search and Binary Insertion

100 Views Asked by At

I'm currently working on solving the Distinct Powers Question (29) on Project Euler. The problem involves finding the number of distinct products for (a^b) where (2 < a < 100) and (2 < b < 100). I initially created an algorithm that successfully handles factorization and checks for duplications. However, as I attempted to enhance the program's performance using linked lists and binary search, I encountered issues leading to a core dump. Despite efforts to identify logical flaws, I couldn't resolve the problem.

Here's the code snippet I'm working with:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define A_MIN 2
#define A_MAX 100
#define B_MIN 2
#define B_MAX 100

struct Node {
    int *data;
    struct Node *next;
    struct Node *previous;
};

void primeFactor(int *arr, int num) {
    for (int k = 2; k <= num; ++k) {
        while (num % k == 0) {
            num /= k;
            arr[k] += 1;
        }
    }
}

int compareArrays(const int *arr1, const int *arr2, int size) {
    for (int k = 0; k < size; ++k) {
        if (arr2[k] < arr1[k]) {
            return -1; // arr2 is left of arr1
        } else if (arr1[k] < arr2[k]) {
            return 1; // arr2 is right of arr1
        }
    }
    return 0; // arr2 is arr1
}

void insertSorted(struct Node **head, const int *arr, int size, int *listLength) {
    struct Node *current = *head;

    int pos = 0;
    int left = 0;
    int right = *listLength;

    int comparison = -3;
    
    while (current != NULL && left < right) {
        int mid = (left + right) / 2;
        while (pos < mid) {
            current = current->next;
            pos++;
        }
        while (pos > mid) {
            current = current->previous;
            pos--;
        }
        comparison = compareArrays(current->data, arr, size);
        if (comparison == 1) { // arr2 is right of arr1
            left = mid + 1;
        } else if (comparison == -1) { // arr2 is left of arr1
            right = mid;
        } else {
            break;
        }
    }

    struct Node *prev = (current != NULL) ? current->previous : NULL;
    struct Node *nxt = (current != NULL) ? current->next : NULL;

    if (comparison != 0) {
        struct Node *newNode = (struct Node *)malloc(sizeof(struct Node));
        if (newNode == NULL) {
            perror("Memory allocation error");
            exit(EXIT_FAILURE);
        }

        newNode->data = (int *)malloc(size * sizeof(int));
        if (newNode->data == NULL) {
            perror("Memory allocation error");
            exit(EXIT_FAILURE);
        }

        memcpy(newNode->data, arr, size * sizeof(int));

        if (comparison == -1) { // perform insert left
            newNode->next = current;
            if (current != NULL) {
                current->previous = newNode;
            }
            if (prev == NULL) {
                *head = newNode;
            } else {
                prev->next = newNode;
                newNode->previous = prev;
            }
        } else if (comparison == 1) { // perform insert right
            newNode->next = nxt;
            if (current != NULL) {
                current->next = newNode;
            }
            if (nxt != NULL) {
                nxt->previous = newNode;
            }
        } else { // no node exist yet. make node the head.
            printf("lol");
            *head = newNode;
            
        }

        (*listLength)++;
    }
}

void freeLinkedList(struct Node *head) {
    while (head != NULL) {
        struct Node *temp = head;
        head = head->next;
        free(temp->data);
        free(temp);
    }
}

int main() {
    struct Node *link_list = NULL;
    int size = A_MAX;
    int listLength = 0;

    for (int i = A_MIN; i <= A_MAX; ++i) {
        int arr[A_MAX] = {0};
        primeFactor(arr, i);

        for (int j = B_MIN; j <= B_MAX; ++j) {
            int *arr2 = (int *)malloc(size * sizeof(int));
            if (arr2 == NULL) {
                perror("Memory allocation error");
                exit(EXIT_FAILURE);
            }

            for (int k = 0; k < size; ++k) {
                arr2[k] = arr[k] * j;
            }

            insertSorted(&link_list, arr2, size, &listLength);
        }
    }

    printf("%d\n", listLength);

    freeLinkedList(link_list);

    return 0;
}```
3

There are 3 best solutions below

1
John Omielan On BEST ANSWER

One logical flaw, which can definitely cause issues such as a core dump occurring, is with the following section of code in your insertSorted function:

        memcpy(newNode->data, arr, size * sizeof(int));

        if (comparison == -1) { // perform insert left
            newNode->next = current;
            if (current != NULL) {
                current->previous = newNode;
            }
            if (prev == NULL) {
                *head = newNode;
            } else {
                prev->next = newNode;
                newNode->previous = prev;
            }
        } else if (comparison == 1) { // perform insert right
            newNode->next = nxt;
            if (current != NULL) {
                current->next = newNode;
            }
            if (nxt != NULL) {
                nxt->previous = newNode;
            }
        } else { // no node exist yet. make node the head.
            printf("lol");
            *head = newNode;
            
        }

The code uses malloc to allocate memory for newNode but, as indicated in the malloc man page,

The memory is not initialized.

Since comparison can only be 0, -1 or 1, and this code is within the conditional if (comparison != 0) {, then the final else should never be executed. As such, the value of newNode->next is always set to a valid value. However, this is not always the case for newNode->previous. For example, if comparison == -1, then newNode->previous is only set if prev != NULL.

This means that newNode->previous may be uninitialized, thus using it such as with the current = current->previous; line earlier in insertSorted, can cause invalid memory to be accessed.

This issue can easily be fixed. Note that replacing your malloc call with calloc which "... initializes all bytes in the allocated storage to zero" is an option, but as Neil's comment states

Even calloc is not guaranteed to initialize null pointers; it works on most computers because null is very probably 0, (5.13).

Thus, to be more robust, it's better to instead add the line newNode->previous = NULL; just after the memcpy(newNode->data, arr, size * sizeof(int)); line in my specified section of your code.

Also, to make it more clear when casually reading the code that newNode->next is always initialized, and to help ensure later code changes don't break your code, I suggest adding the line newNode->next = NULL; initially as well.

0
John Bollinger On

General Comments

You use a binary search on your linked list to locate the insertion point for the next node. Binary search is efficient when you have random access to the items, but much less so when you don't. A linear search would be simpler to implement in your case, and possibly faster, too.

Even though your linked list does not use a dummy head node, you could provide one temporarily for the purposes of your linked list operations. That could provide for cleaner code by removing the need for special-case handling of an empty list.

You use an inefficient representation for your prime factorizations: an array of A_MAX exponents. There will be non-zero exponents only for primes, however, so you could easily get away with much shorter arrays, where each element corresponded to a prime.

The main issue

In many cases, your program fails to initialize the previous pointers of its dynamically allocated list nodes. In some cases, it fails to initialize the next pointer, too. malloc() does not initialize the allocated memory for you. calloc() initializes it to all-byte-zero, but C does not define what that means as a pointer representation, so even with calloc() you would need to explicitly assign values to the pointer members of your allocated nodes.

Additional considerations

You don't need to store or compare factorizations at all, though I would still compute them. You can determine directly from each factorization of an ab whether it will have been counted also as an a'b' where a' < a:

  • compute the greatest common denominator, d(a), of all the non-zero exponents in the prime factorization of a. This will often be 1.

  • There is then a positive integer a0 such that a0d(a) = a. There is no smaller positive integer r such that a is an integer power of r.

  • ab is, of course, also an integer power of a0: a0d(a)b.

  • Let x(a) = d(a)b. You will avoid counting any duplicates if you count only those ab where b is the smallest divisor of x(a) such that x(a) / b <= bmax. Where that is not the case, there is some power of a0 less than a for which ab will have been counted.

0
fish_brain On

this is what my code looks like with John Omielan's suggestions! Fail to initialize the attribute of the node is the root cause of the core dump.

void insertSorted(struct Node **head, const int *arr, int size, int *listLength) {
    int comparison = -1;
    // unchanged code goes here.
    if (comparison != 0) {
        // more unchanges code goes here.
        if (comparison == -1) { // perform insert left
            newNode->next = current;
            if (current == NULL) {
                newNode->next = NULL;
            }else{
                newNode->next = current;                
                current->previous = newNode;
            }
            if (prev == NULL) {
                newNode->previous = NULL;
                *head = newNode;
            } else {
                newNode->previous = prev;
                prev->next = newNode;
            }
        } else if (comparison == 1) { // perform insert right
            newNode->next = nxt;
            if (current == NULL) {
                newNode->previous = NULL;
                *head = newNode;
            }else{
                newNode->previous = current;
                current->next = newNode;
            }

            if (nxt == NULL) {
                newNode->next = NULL;
            }else{
                newNode->next = nxt;
                nxt->previous = newNode;
            }
        }

        (*listLength)++;
    }
}