Merging multiple .npz files into single .npz file

388 Views Asked by At

I have multiple .npz files in folder with same nature, I want to append all of my .npz files into a single .npz file present in a given folder

I have tried below code to achieve so, but it seems its not appending multiple .npz files to single npz file. Here is the code

import numpy as np
file_list = ['image-embeddings\img-emb-1.npz', 'image-embeddings\img-emb-2.npz']
data_all = [np.load(fname) for fname in file_list]
merged_data = {}
for data in data_all:
   [merged_data.update({k: v}) for k, v in data.items()]
np.savez('new_file.npz', **merged_data)

Where img-emb-1.npz has different value and img-emb-2.npz has different value

1

There are 1 best solutions below

2
paime On

Maybe try the following to construct merged_data:

arrays_read = dict(
    chain.from_iterable(np.load(file(arr_name)).items() for arr_name in arrays.keys())
)

Full example:

from itertools import chain
import numpy as np

file = lambda name: f"arrays/{name}.npz"

# Create data
arrays = {f"arr{i:02d}": np.random.randn(10, 20) for i in range(10)}

# Save data in separate files
for arr_name, arr in arrays.items():
    np.savez(file(arr_name), **{arr_name: arr})

# Read all files into a dict
arrays_read = dict(
    chain.from_iterable(np.load(file(arr_name)).items() for arr_name in arrays.keys())
)

# Save into a single file
np.savez(file("arrays"), **arrays_read)

# Load to compare
arrays_read_single = dict(np.load(file("arrays")).items())

assert arrays_read.keys() == arrays_read_single.keys()
for k in arrays_read.keys():
    assert np.array_equal(arrays_read[k], arrays_read_single[k])