The goal of this program is to create a database of spectrograms from audio files, and then identify a specific song based on a provided audio fragment. The current code only correctly identifies if the provided snippet starts exactly at the beginning of the database song. However, if the snippet does not start from the beginning, it does not identify the song correctly.
generaDB.py
import os
import numpy as np
import soundfile as sf
from scipy.signal import spectrogram
from utilidades import generate_database
def generate_spectrogram(audio_file, target_length):
audio_data, sampling_frequency = sf.read(audio_file)
_, _, Sxx = spectrogram(audio_data, fs=sampling_frequency)
if Sxx.shape[1] < target_length:
pad_width = target_length - Sxx.shape[1]
Sxx = np.pad(Sxx, pad_width=((0, 0), (0, pad_width)))
elif Sxx.shape[1] > target_length:
Sxx = Sxx[:, :target_length]
return Sxx
if __name__ == "__main__":
folder_path = "Canciones/Canciones" # Utiliza '/' para rutas (compatible con sistemas operativos diferentes)
generate_database(folder_path)
identifica.py
import os
import numpy as np
from utilidades import identify_song
def load_database():
try:
data = np.load("EspectrogramaBD.npz")
spectrogram_db = data['spectrogram_db']
song_info = np.load("info.out.npy", allow_pickle=True)
return spectrogram_db, song_info
except FileNotFoundError:
print("Error: No se encontraron los archivos de la base de datos.")
return None, None
except Exception as e:
print(f"Error al cargar la base de datos: {e}")
return None, None
def main():
spectrogram_db, song_info = load_database()
if spectrogram_db is not None and song_info is not None:
print("Base de datos cargada exitosamente.")
else:
print("Error al cargar la base de datos.")
return
while True:
audio_file = input("Ingrese la ruta del archivo WAV: ")
if os.path.exists(audio_file):
break
else:
print("Error: El archivo no existe. Por favor, verifique la ruta.")
target_length = 500 # Puedes ajustar esto según tus necesidades
identification_result = identify_song(audio_file, target_length, spectrogram_db, song_info)
if identification_result:
print(f"La canción fue identificada como: {identification_result}")
else:
print("No se pudo identificar la canción.")
if __name__ == "__main__":
main()
utiidades.py
# utilidades.py
import os
import numpy as np
import soundfile as sf
from scipy.signal import spectrogram
def generate_spectrogram(audio_file, target_length):
audio_data, sampling_frequency = sf.read(audio_file)
_, _, Sxx = spectrogram(audio_data, fs=sampling_frequency)
if Sxx.shape[1] < target_length:
pad_width = target_length - Sxx.shape[1]
Sxx = np.pad(Sxx, pad_width=((0, 0), (0, pad_width)))
elif Sxx.shape[1] > target_length:
Sxx = Sxx[:, :target_length]
return (Sxx - np.mean(Sxx)) / np.std(Sxx) # Normalizar espectrograma
def generate_database(folder_path, target_length=500):
spectrogram_db = []
song_info = []
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith(".wav"):
audio_file = os.path.join(root, file)
spectrogram = generate_spectrogram(audio_file, target_length)
spectrogram_db.append(spectrogram)
song_info.append({"file": audio_file, "duration": spectrogram.shape[1]})
np.savez("EspectrogramaBD.npz", spectrogram_db=spectrogram_db)
np.save("info.out", song_info)
def identify_song(audio_file, target_length, spectrogram_db, song_info):
try:
input_spectrogram = generate_spectrogram(audio_file, target_length)
except FileNotFoundError:
return "Error: El archivo de audio no existe."
best_match = None
best_match_difference = float('inf')
best_match_start = 0
best_match_end = target_length
identification_threshold = 2.0 # Ajusta según sea necesario
for idx, db_spectrogram in enumerate(spectrogram_db):
# Asumiendo que los espectrogramas son del mismo tamaño
db_spectrogram = db_spectrogram[:, :target_length]
for start_col in range(db_spectrogram.shape[1] - target_length + 1):
end_col = start_col + target_length
# Comparar submatrices
submatrix_db = db_spectrogram[:, start_col:end_col]
submatrix_input = input_spectrogram[:, start_col:end_col]
difference = np.linalg.norm(submatrix_db - submatrix_input)
if difference < best_match_difference:
best_match_difference = difference
best_match = idx
best_match_start = start_col
best_match_end = end_col
if best_match is not None and best_match_difference < identification_threshold:
identified_song = song_info[best_match]["file"]
return f"Canción identificada: {identified_song}, Diferencia mínima: {best_match_difference}, Inicio: {best_match_start}, Fin: {best_match_end}"
else:
return f"No se pudo identificar la canción. Diferencia mínima: {best_match_difference}"
I've tried every possible combination of knowledge that I have but cant find the answer to why it does not identify the song if its not starting from the beggining. I'm expecting the code to identify the song if it does not start at second 0 (beggining). I'am nearly 100% sure that the problem is at utilidades.py in the function identify_song. If anyone could help I would greatly appreciate it.