How can I remove noise from a .wav audio file given the noise file as a .wav audio using python

1.1k Views Asked by At

As mentioned, I want to create a program to remove noise from an audio clip given that I have the noise as a seperate audio file. I know it has something to do with FFT so I have applied it on both the audio file and the noise file. These are what I got Audio file: Audio file Noise file: Noise file Would something like subtracting the amplitudes of both for each frequency and then inverse FFTing work and if it would how would I go about programming it?

Here's what I have:

from __future__ import print_function
import scipy.io.wavfile as wavfile
import scipy
import scipy.fftpack
import numpy as np
from matplotlib import pyplot as plt

fs_rate, signal = wavfile.read("sound.wav")
print ("Frequency sampling", fs_rate)
l_audio = len(signal.shape)
print ("Channels", l_audio)
if l_audio == 2:
    signal = signal.sum(axis=1) / 2
N = signal.shape[0]
print ("Complete Samplings N", N)
secs = N / float(fs_rate)
print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
FFT = abs(scipy.fft.fft(signal))
FFT_side = FFT[range(N//2)] # one side FFT range
freqs = scipy.fftpack.fftfreq(signal.size, t[1]-t[0])
fft_freqs = np.array(freqs)
freqs_side = freqs[range(N//2)] # one side frequency range
fft_freqs_side = np.array(freqs_side)
plt.subplot(311)
p1 = plt.plot(t, signal, "g") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.subplot(312)
p2 = plt.plot(freqs, FFT, "r") # plotting the complete fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count dbl-sided')
plt.subplot(313)
p3 = plt.plot(freqs_side, abs(FFT_side), "b") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count single-sided')
plt.show()
from __future__ import print_function
import scipy.io.wavfile as wavfile
import scipy
import scipy.fftpack
import numpy as np
from matplotlib import pyplot as plt

fs_rate, signal = wavfile.read("noise.wav")
fs_rate, signal2 = wavfile.read("noise.wav")
print ("Frequency sampling", fs_rate)
l_audio = len(signal.shape)
print ("Channels", l_audio)
if l_audio == 2:
    signal = signal.sum(axis=1) / 2
N = signal.shape[0]
print ("Complete Samplings N", N)
secs = N / float(fs_rate)
print ("secs", secs)
Ts = 1.0/fs_rate # sampling interval in time
print ("Timestep between samples Ts", Ts)
t = scipy.arange(0, secs, Ts) # time vector as scipy arange field / numpy.ndarray
FFT = abs(scipy.fft.fft(signal))
FFT_side = FFT[range(N//2)] # one side FFT range
freqs = scipy.fftpack.fftfreq(signal.size, t[1]-t[0])
fft_freqs = np.array(freqs)
freqs_side = freqs[range(N//2)] # one side frequency range
fft_freqs_side = np.array(freqs_side)
plt.subplot(311)
p1 = plt.plot(t, signal, "g") # plotting the signal
plt.xlabel('Time')
plt.ylabel('Amplitude')
plt.subplot(312)
p2 = plt.plot(freqs, FFT, "r") # plotting the complete fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count dbl-sided')
plt.subplot(313)
p3 = plt.plot(freqs_side, abs(FFT_side), "b") # plotting the positive fft spectrum
plt.xlabel('Frequency (Hz)')
plt.ylabel('Count single-sided')
plt.show()

EDIT:

import numpy as np 
import matplotlib.pyplot as plt
from scipy import signal as sig
rng = np.random.default_rng()
fs, x = wavfile.read("sound.wav")
f, t, Zxx = sig.stft(x, fs, nperseg=10)

## Take STFT of noise
f_n, noise = wavfile.read("noise.wav")
noise = noise[:len(x)]
f_n, t_n, Zxx_n = sig.stft(noise, fs, nperseg=10)

## Subtract noise from noisy signal and perform inverse STFT
Zxx_denoise = Zxx - Zxx_n
_, denoised_signal = sig.istft(Zxx_denoise, fs)
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(6,3))
ax[0].pcolormesh(t, f, np.abs(Zxx), vmin=0, vmax=1, shading='gouraud')
ax[0].set_title('Noisy signal')
ax[0].set_ylabel('Frequency [Hz]')
ax[0].set_xlabel('Time [sec]')

ax[1].pcolormesh(t, f, np.abs(Zxx_denoise), vmin=0, vmax=1, shading='gouraud')
ax[1].set_title('Denoised signal')
ax[1].set_ylabel('Frequency [Hz]')
ax[1].set_xlabel('Time [sec]')

Error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[30], line 18
     16 _, denoised_signal = sig.istft(Zxx_denoise, fs)
     17 fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(6,3))
---> 18 ax[0].pcolormesh(t, f, np.abs(Zxx).T, vmin=0, vmax=1, shading='gouraud')
     19 ax[0].set_title('Noisy signal')
     20 ax[0].set_ylabel('Frequency [Hz]')

File c:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\__init__.py:1433, in _preprocess_data..inner(ax, data, *args, **kwargs)
   1430 @functools.wraps(func)
   1431 def inner(ax, *args, data=None, **kwargs):
   1432     if data is None:
-> 1433         return func(ax, *map(sanitize_sequence, args), **kwargs)
   1435     bound = new_sig.bind(ax, *args, **kwargs)
   1436     auto_label = (bound.arguments.get(label_namer)
   1437                   or bound.kwargs.get(label_namer))

File c:\Users\User\AppData\Local\Programs\Python\Python39\lib\site-packages\matplotlib\axes\_axes.py:6169, in Axes.pcolormesh(self, alpha, norm, cmap, vmin, vmax, shading, antialiased, *args, **kwargs)
   6166 shading = shading.lower()
   6167 kwargs.setdefault('edgecolors', 'none')
-> 6169 X, Y, C, shading = self._pcolorargs('pcolormesh', *args,
   6170                                     shading=shading, kwargs=kwargs)
   6171 coords = np.stack([X, Y], axis=-1)
   6172 # convert to one dimensional array
...
   5673 else:
   5674     raise TypeError(f'{funcname}() takes 1 or 3 positional arguments '
   5675                     f'but {len(args)} were given')

ValueError: too many values to unpack (expected 2)

MORE EDIT: The results of subtracting just the FFT is quite impressive: Result This signal seems clear. However, converting it back to .wav, the sound has more disturbance than the orignal.

1

There are 1 best solutions below

3
astroChance On

Since your signal and your noise are non-stationary, a simple FFT won't do the trick. You could try using the Short Time Fourier Transform function from SciPy, here is an example just modifying the example in their docs. Take the STFT of your noisy signal as well as your noise, then subtract the noise from the noisy signal in the frequency domain, then perform the inverse STFT to get your denoised signal. If this doesn't give you the results you want, you may need to look at slightly more involved solutions like this.

import numpy as np 
import matplotlib.pyplot as plt
from scipy import signal
rng = np.random.default_rng()

## Generate test signal per SciPy docs
fs = 10e3
N = 1e5
amp = 2 * np.sqrt(2)
noise_power = 0.01 * fs / 2
time = np.arange(N) / float(fs)
mod = 500*np.cos(2*np.pi*0.25*time)
carrier = amp * np.sin(2*np.pi*3e3*time + mod)
noise = rng.normal(scale=np.sqrt(noise_power),
                   size=time.shape)
noise *= np.exp(-time/5)
x = carrier + noise

## Take STFT of noisy signal
f, t, Zxx = signal.stft(x, fs, nperseg=1000)

## Take STFT of noise
f_n, t_n, Zxx_n = signal.stft(noise, fs, nperseg=1000)

## Subtract noise from noisy signal and perform inverse STFT
Zxx_denoise = Zxx - Zxx_n
_, denoised_signal = signal.istft(Zxx_denoise, fs)


fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(6,3))
ax[0].pcolormesh(t, f, np.abs(Zxx), vmin=0, vmax=1, shading='gouraud')
ax[0].set_title('Noisy signal')
ax[0].set_ylabel('Frequency [Hz]')
ax[0].set_xlabel('Time [sec]')

ax[1].pcolormesh(t, f, np.abs(Zxx_denoise), vmin=0, vmax=1, shading='gouraud')
ax[1].set_title('Denoised signal')
ax[1].set_ylabel('Frequency [Hz]')
ax[1].set_xlabel('Time [sec]')

plt.tight_layout()
plt.show()

Produces:

enter image description here