I'm trying to allocate memory to an array variable of a struct defined in C++ through CFFI.
In NVCC/C++, I have:
struct array_struct {
int ndim;
int len;
int* shape;
float* array;
};
extern "C" void allocate(
struct array_struct* host_array,
struct array_struct* device_array
) {
device_array->ndim = host_array->ndim;
device_array->len = 1;
device_array->shape = new int[host_array->ndim];
printf("Printing device_array info:\n");
printf("device_array->ndim = %d\n", device_array->ndim);
printf("device_array->shape = (");
for (int i = 0; i < host_array->ndim; ++i) {
device_array->shape[i] = host_array->shape[i];
device_array->len *= host_array->shape[i];
printf("%d, ", device_array->shape[i]);
}
printf(")\n");
printf("device_array->len = %d\n", device_array->len);
}
In Python, I define the ffi.cdefs and try to allocate memory to the shape and array fields of the array_struct, and copy the numpy array information across:
ffi.cdef(
"""
struct array_struct {
int ndim;
int len;
int* shape;
float* array;
};
""")
ffi.cdef(
"""
void allocate(
struct array_struct *host_array,
struct array_struct *device_array
);
""")
lib = ffi.dlopen("./cupid/src/libAlg.so")
numpy_array = np.zeros((5,6),dtype=float)
host_array_struct = ffi.new("struct array_struct *")
host_array_struct.ndim = numpy_array.ndim
host_array_struct.len = numpy_array.size
host_array_struct.shape = ffi.new(f"int[{numpy_array.ndim}]")
contiguous_shape_array = np.ascontiguousarray(numpy_array.shape)
ffi.memmove(host_array_struct.shape, ffi.cast("int*", ffi.from_buffer(contiguous_shape_array)), numpy_array.ndim * ffi.sizeof("int"))
host_array_struct.array = ffi.new(f"float[{host_array_struct.len}]")
contiguous_numpy_array = np.ascontiguousarray(numpy_array)
ffi.memmove(host_array_struct.array, ffi.cast("float*", ffi.from_buffer(contiguous_numpy_array)), host_array_struct.len * ffi.sizeof("float"))
device_array_struct = ffi.new("struct array_struct *")
lib.allocate(host_array_struct, device_array_struct)
However, not only is it not giving the correct device_array->shape, it's also producing a memory corruption issue in the output. I wondered whether this was to do with the device_array not having memory allocated to it, but that hasn't helped either.
Printing device_array info:
device_array->ndim = 2
device_array->shape = (5, 0, )
device_array->len = 0
*** Error in `python': corrupted size vs. prev_size: 0x00005619fc092850 ***
======= Backtrace: =========
/lib64/libc.so.6(+0x7f7c4)[0x2ad30b6bd7c4]
/lib64/libc.so.6(+0x818bb)[0x2ad30b6bf8bb]
python(PyObject_Free+0x199)[0x5619fa064969]
...
Thanks in advance.
I managed to fix this issue by specifying the data types more explicitly. I'd forgotten that C++ float != Python float, so needed to specify np.float32 when creating (or casting) the np.array: