I'm running this code and got stuck on this error for too long. I don't know why it's happening, could you help?
Traceback (most recent call last): File "C:\Users\Catarina Oliveira\PycharmProjects\SER_challenge\main.py", line 12, in <module> for audio_batch, label_batch, meta_data_batch, b_index in dataloader: File "C:\Users\Catarina Oliveira\PycharmProjects\SER_challenge\.venv\lib\site-packages\torch\utils\data\dataloader.py", line 631, in __next__ data = self._next_data() File "C:\Users\Catarina Oliveira\PycharmProjects\SER_challenge\.venv\lib\site-packages\torch\utils\data\dataloader.py", line 1346, in _next_data return self._process_data(data) File "C:\Users\Catarina Oliveira\PycharmProjects\SER_challenge\.venv\lib\site-packages\torch\utils\data\dataloader.py", line 1372, in _process_data data.reraise() File "C:\Users\Catarina Oliveira\PycharmProjects\SER_challenge\.venv\lib\site-packages\torch\_utils.py", line 722, in reraise raise exception soundfile.LibsndfileError: <exception str() failed>
My code:
This is the AudioDataset script:
import torch
import torchaudio
from torch.utils.data import Dataset
import os
import pandas as pd
class AudioDataset(Dataset):
def __init__(self, data_csv, data_path, batch_size):
self.data_path = data_path
self.data_csv = pd.read_csv(data_path+data_csv)
self.batch_size = batch_size
self.audio_files = [os.path.join(self.data_path + '/Audios', file) for file in self.data_csv.iloc[:, 0]]
def __len__(self):
return len(self.data_csv) // self.batch_size
def __getitem__(self, idx):
start_idx = idx * self.batch_size
end_idx = start_idx + self.batch_size
batch_waveforms = []
batch_labels = []
batch_meta_data = []
for idx in range(start_idx, end_idx):
audio_file = os.path.join(self.data_path+'/Audios', self.data_csv.iloc[idx, 0])
waveform, sample_rate = torchaudio.load(audio_file)
batch_waveforms.append(waveform)
label = self.data_csv.iloc[idx, 1]
batch_labels.append(label)
batch_meta_data.append(self.data_csv.iloc[idx])
max_length = max(sample.shape[1] for sample in batch_waveforms)
batch_waveforms = [torch.nn.functional.pad(audio_tensor, (0, max_length - audio_tensor.shape[1])) for
audio_tensor in batch_waveforms]
return torch.stack(batch_waveforms), batch_labels, batch_meta_data, idx
This is the main script. I also would like to write the program without the if name=main but don't know how.
from torch.utils.data import DataLoader
from AudioDataset import AudioDataset
from torchaudio import transforms
if __name__ == '__main__':
bs = 32
file_path = ''
train_dataset = AudioDataset(data_csv='Train_data.csv', data_path=file_path, batch_size=bs)
dataloader = DataLoader(train_dataset, batch_size=None, shuffle=True, num_workers=16,
prefetch_factor=2) # Setting batch_size=None since the dataset already returns batches
for audio_batch, label_batch, meta_data_batch, b_index in dataloader:
transform = transforms.MelSpectrogram(n_fft=400, win_length=400, hop_length=160, n_mels=10)
mel_specgram = transform(audio_batch)
# mel_spectrogram = torchaudio.transforms.MelSpectrogram()(audio_batch)
# features = mel_spectrogram.flatten().numpy()
# print(b_index)
# print(audio_batch.shape)
# print(label_batch)
# print(meta_data_batch)
pass