Python KNN Classifer Has Missing | Constant Accuracy reporting
The KNN Classifer is producing inaccurate results from the T_Train, when refactoring the code I had lost the accuracy reporting. This code needs to be in functions and not incline Python for deplyment.
Here is the results of inaccurate reporting, appears to be stuck on one specific KNN distance and reports only 60%; the data does support a richer degree of accuracies, only this code is failing to generate correct accuracy on prediction function.
K L Accuracy
0 2 10 60.714286
1 4 20 60.714286
2 6 40 60.714286
3 8 60 60.714286
4 10 80 60.714286
5 12 10 60.714286
6 14 20 60.714286
7 16 40 60.714286
8 18 60 60.714286
9 2 80 60.714286
10 4 10 60.714286
11 6 20 60.714286
12 8 40 60.714286
13 10 60 60.714286
Code that is in logic error
def knn_classifier(train_data, test_data, y_train, y_test, k):
y_pred = []
for i in range(test_data.shape[1]):
distance_heap = []
for j in range(train_data.shape[1]):
distance = kNN_hamming_distance(test_data[:, i], train_data[:, j])
heappush(distance_heap, (distance, y_train[j]))
nearest_neighbors = heapq.nsmallest(k, distance_heap)
kNN_labels = [neighbor[1] for neighbor in nearest_neighbors]
prediction = max(set(kNN_labels), key=kNN_labels.count)
y_pred.append(prediction)
accuracy = np.mean(y_test == y_pred)
return accuracy
def stft_matrix_combine(signal, n_fft=64, window_hop=48):
blackman = np.blackman(n_fft)
number_matrix_samples = len(np.array(signal))
frames = 1 + (number_matrix_samples - n_fft) // window_hop
stft_matrix = np.empty((n_fft // 2 + 1, frames), dtype=complex)
for j in range(frames):
start_frame = j * window_hop
end_frame = start_frame + n_fft
frame = np.pad(signal[start_frame:end_frame], (0, n_fft - len(signal[start_frame:end_frame])), 'constant') * blackman
stft_matrix[:, j] = np.fft.fft(frame)[:n_fft // 2 + 1]
return stft_matrix
def stft_matrix_combined(signal, n_fft=64, hop_length=48):
window = np.blackman(n_fft)
num_samples = len(signal)
num_frames = 1 + (num_samples - n_fft) // hop_length
stft_matrix = np.empty((n_fft // 2 + 1, num_frames), dtype=complex)
for t in range(num_frames):
start = t * hop_length
end = start + n_fft
frame = np.pad(signal[start:end], (0, n_fft - len(signal[start:end])), 'constant') * window
stft_matrix[:, t] = np.fft.fft(frame)[:n_fft // 2 + 1]
return stft_matrix
def stft_matrix_creation(x_train, xtest, trainDim, testDim):
matrix = np.full((trainDim, 255), 0, dtype=complex)
for i in range(trainDim):
samples = [stft_combined(x_train[:, j, i]) for j in range(3)]
max_len = max(sample.shape[1] for sample in samples)
padded_samples = [np.pad(sample, ((0, 0), (0, max_len - sample.shape[1])), 'constant') for sample in samples]
concatenated_samples = np.hstack([sample[:, :85] for sample in padded_samples])
matrix[i] = concatenated_samples.reshape(-1)[:255]
matrix_train = np.transpose(matrix)
matrix_test = np.full((testDim, 255), 0, dtype=complex)
for i in range(testDim):
samples_test = [stft_combined(x_test[:, j, i]) for j in range(3)]
max_len_test = max(sample_test.shape[1] for sample_test in samples_test)
padded_samples_test = [np.pad(sample_test, ((0, 0), (0, max_len_test - sample_test.shape[1])), 'constant') for sample_test in samples_test]
concatenated_samples_test = np.hstack([sample_test[:, :85] for sample_test in padded_samples_test])
matrix_test[i] = concatenated_samples_test.reshape(-1)[:255]
matrix_test = np.transpose(matrix_test)
return matrix_train, matrix_test
def generate_random_projection_matrix(M, L):
projection = np.random.normal(0, 1, (L, M))
projection = projection / np.linalg.norm(projection, axis=1)[:, np.newaxis] # Normalize row vectors to be unit vectors
return projection
def binary_random_projection(X_stft_matrix, project_matrix):
Y = np.dot(project_matrix, X_stft_matrix)
Y_binary = np.where(np.abs(Y) > 0, 1, 0)
return Y_binary.astype(int)
def kNN_hamming_distance(x1, x2):
return np.sum(np.bitwise_xor(x1, x2))
Main Python KNN Classifier controller code.
First invote the stft_matrix_creation() to create the X_train and x_test STFT matrics. Create binary_projection_matrix() for binary X_train binary and x_test binary to reduce the eeg audio signals. Then run the knn_classifier(X_train_binary, x_test_binary, y_train_, y_test_, k).
X_train = np.array(eeg['x_train'])
y_train = np.array(eeg['y_train'])
x_test = np.array(eeg['x_test'])
y_test = np.array(eeg['y_teest'])
y_train_ = np.array([x[0] for x in y_train])
y_test_ = np.array([x[0] for x in y_test])
matrix_train, matrix_test = stft_matrix_creation(x_train, x_test, trainDim = 112, testDim = 28)
accuracies = []
results = []
l_list = [10,20,40,60,80]
for l in l_list:
binary_projection_matrix = generate_random_projection_matrix(255, l)
X_train_binary = binary_random_projection(matrix_train, binary_projection_matrix)
x_test_binary = binary_random_projection(matrix_test, binary_projection_matrix)
for k in range(2, 20, 2):
accuracy = knn_classifier(X_train_binary, x_test_binary, y_train_, y_test_, k)
accuracies.append(accuracy*100)
results.append(k)
accuracy = pd.DataFrame({'K': results, 'L': l_list * len(range(2,20, 2)), 'Accuracy': accuracies})
print(accuracy)
Expect correct accuracy from KNN Classifier
K L Accuracy
0 2 10 xx.01
1 4 20 yy.02
2 6 40 zz.06
3 8 60 aa.04