I have 3 list like this
path = ['path/to/file-1', 'path/to/file-2', 'path/to/file-3', ...]
sentence = ['some text-1', 'some text-2', 'some text-3', ...]
audio = [
{
'path': 'path/to/file-1'
'array': array([-0.00036621, ..., 0.00015259])
'sampling_rate': 16000
}, ...
]
I want to create datasets.arrow_dataset.Dataset of these, I use this code
import datasets
audio_arrays = []
for audio_file in tqdm(audio_paths[0:10]):
data, sample_rate = librosa.load(audio_file, sr=None)
audio_arrays.append(np.array(data, dtype=np.float32))
audio_dict = []
for i, k in tqdm(enumerate(audio_arrays)):
audio_dict.append({
'path': audio_paths[i],
'array': np.array(k),
'sampling_rate': 16000
})
data = {"path": ok_path, 'audio': audio_dict, "sentence": ok_sentence}
dataset = Dataset.from_dict(data)
but when I try to get the type of dataset[0]['audio']['array'] I got class 'list'
print(dataset[0]['audio']['array']) #output is <class 'list'>