Union vs void pointer performance

94 Views Asked by At

TL;DR is union better in terms of performance than void pointer

When I was searching for Union vs void pointer performance I gone thorough this question Union versus void pointer. Many suggested to use union but none of it is because of performance. My question is does void pointer take more time than union since we need to type cast it again and again.

I wrote the below code to test the performance and found that union is much better.

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

typedef struct union_test union_test;
typedef struct void_test void_test;
typedef void (*PrintUnionFunction)(union_test *);
typedef void (*PrintVoidFunction)(void_test *);

typedef enum ELEM_TYPE
{
    INT_TYPE,
    STRING_TYPE,
    FLOAT_TYPE
} ELEM_TYPE;

struct union_test
{
    ELEM_TYPE elemType;
    union {
        int **intElems;
        float **floatElems;
        char **stringElems;
    };
    size_t elemCount;
    PrintUnionFunction printFunction;
};

struct void_test
{
    ELEM_TYPE elemType;
    void **elems;
    size_t elemCount;
    PrintVoidFunction printFunction;
};

void printUnionInt(union_test *fpTest)
{
    for (size_t i = 0; i < fpTest->elemCount; i++)
    {
        int *temp = fpTest->intElems[i];
        (*temp)++;
        // printf("%d ", *fpTest->intElems[i]);
    }
}

void printVoidPointerInt(void_test *fpTest)
{
    for (size_t i = 0; i < fpTest->elemCount; i++)
    {
        int *temp = ((int *)fpTest->elems[i]);
        (*temp)++;
        // printf("%d ", *fpTest->intElems[i]);
    }
}

int main()
{
    clock_t start_time_union, end_time_union;
    clock_t start_time_void, end_time_void;
    size_t elemCount = 1024 * 1024 * 1024;

    union_test *fp = (union_test *)malloc(sizeof(union_test));
    fp->elemCount = elemCount;
    fp->elemType = INT_TYPE;
    fp->printFunction = printUnionInt;
    fp->intElems = (int **)malloc(sizeof(int *) * fp->elemCount);

    for (size_t i = 0; i < fp->elemCount; i++)
    {
        fp->intElems[i] = (int *)malloc(sizeof(int));
        memcpy(fp->intElems[i], &i, sizeof(int));
    }

    void_test *void_fp = (void_test *)malloc(sizeof(union_test));
    void_fp->elemCount = elemCount;
    void_fp->elemType = INT_TYPE;
    void_fp->printFunction = printVoidPointerInt;
    void_fp->elems = (void **)malloc(sizeof(void *) * void_fp->elemCount);

    for (size_t i = 0; i < void_fp->elemCount; i++)
    {
        void_fp->elems[i] = (int *)malloc(sizeof(int));
        memcpy(void_fp->elems[i], &i, sizeof(int));
    }

    start_time_union = clock();
    fp->printFunction(fp);
    end_time_union = clock();

    start_time_void = clock();
    void_fp->printFunction(void_fp);
    end_time_void = clock();

    printf("\n\nunion execution time: %f seconds\n", (double)(end_time_union - start_time_union) / CLOCKS_PER_SEC);
    printf("void pointer execution time: %f seconds\n", (double)(end_time_void - start_time_void) / CLOCKS_PER_SEC);

    return 0;
}

I got the below results.

union execution time: 8.237730 seconds
void pointer execution time: 8.647505 seconds

I some where seen that using -O3 will make compilers never bother about pointer types and considers everything as only bytes of memory but I couldn't find any improved performance even with -O3 flag.

Note: I am only concerned about performance not readability. I know that using union when we have limited number of types to deal with is a good approach for improving readability.

1

There are 1 best solutions below

4
KamilCuk On

Your code generates on godbolt with gcc13.2 and -O3 the following assembly:

printUnionInt:
        mov     rdx, QWORD PTR [rdi+16]
        test    rdx, rdx
        je      .L1
        mov     rax, QWORD PTR [rdi+8]
        lea     rcx, [rax+rdx*8]
.L3:
        mov     rdx, QWORD PTR [rax]
        add     rax, 8
        add     DWORD PTR [rdx], 1
        cmp     rcx, rax
        jne     .L3
.L1:
        ret
printVoidPointerInt:
        mov     rdx, QWORD PTR [rdi+16]
        test    rdx, rdx
        je      .L9
        mov     rax, QWORD PTR [rdi+8]
        lea     rcx, [rax+rdx*8]
.L11:
        mov     rdx, QWORD PTR [rax]
        add     rax, 8
        add     DWORD PTR [rdx], 1
        cmp     rcx, rax
        jne     .L11
.L9:
        ret

I took a ruler and measured each function length, and it is exactly the same. There is no difference.

You may be interested in https://quick-bench.com/ .

My question is does void pointer take more time than union since we need to type cast it again and again.

In assembly there are no types, a register is a register. Casting a pointer is something in C world, in assembly it is a no-op, nothing happens.