I have been trying to find Dunns index using the Armadillo library for a larger algorithm I'm working on. Whenever I run the code, I get an output Dunns index:-nan(ind) and an error saying I'm out of the indices. I provide the code below and the main function I'm using to test. The code also has random checks I added to try and troubleshoot the issue.
#include <iostream>
#include <armadillo>
using namespace std;
using namespace arma;
double dunns(int clusters_number, const mat& distM, const uvec& ind) {
// Determine the number of unique clusters
int i = max(ind);
vec denominator;
for (int i2 = 1; i2 <= i; ++i2) {
uvec indi = find(ind == i2);
uvec indj = find(ind != i2);
// Check if indi and indj are not empty
if (!indi.is_empty() && !indj.is_empty()) {
mat temp;
// Check if indices are within bounds before submatrix extraction
if (indi.max() < distM.n_rows && indj.max() < distM.n_cols) {
temp = distM.submat(indi, indj);
denominator = join_cols(denominator, vectorise(temp));
}
else {
// Debugging: Print indices that caused the error
cout << "Error: Indices out of bounds for Cluster " << i2 << endl;
}
}
}
double num = 0.0; // Initialize num to 0.0
// Check if denominator is not empty before finding the minimum
if (!denominator.is_empty()) {
num = min(denominator);
}
mat neg_obs = zeros<mat>(distM.n_rows, distM.n_cols);
for (int ix = 1; ix <= i; ++ix) {
uvec indxs = find(ind == ix);
// Check if indxs is not empty
if (!indxs.is_empty()) {
// Check if indices are within bounds before setting elements
if (indxs.max() < distM.n_rows) {
neg_obs.submat(indxs, indxs).fill(1.0);
}
}
}
// Print intermediate values
cout << "Intermediate Values:" << endl;
cout << "Denominator: " << denominator << endl;
cout << "num: " << num << endl;
mat dem = neg_obs % distM;
double max_dem = max(max(dem));
// Print max_dem
cout << "max_dem: " << max_dem << endl;
double DI = num / max_dem;
return DI;
}
int main() {
// New inputs for testing
int clusters_number = 2;
// Modified dissimilarity matrix (4x4)
mat distM(4, 4);
distM << 0.0 << 1.0 << 2.0 << 3.0
<< 1.0 << 0.0 << 1.0 << 2.0
<< 2.0 << 1.0 << 0.0 << 1.0
<< 3.0 << 2.0 << 1.0 << 0.0;
// Modified cluster indices (4x1)
arma::uvec ind;
ind << 1 << 1 << 2 << 2;
// Print the input dissimilarity matrix
cout << "Dissimilarity Matrix:" << endl;
cout << distM << endl;
// Print the cluster indices
cout << "Cluster Indices:" << endl;
cout << ind << endl;
double DI = dunns(clusters_number, distM, ind);
cout << "Dunn's Index: " << DI << endl;
return 0;
}
The data format: seems correct. I'm using double for the dissimilarity matrix and arma::uvec for cluster indices, which is appropriate.
Data allignment: The alignment of data points in the dissimilarity matrix and cluster indices appears to be correct. Each data point in the matrix corresponds to an entry in the cluster indices.
There don't seem to be any empty clusters or missing data points in the dissimilarity matrix. The data appears to be complete.
Given that the data seems correctly aligned and there are no obvious issues with empty clusters or missing data, it's perplexing that I'm still encountering "Indices out of bounds" errors during submatrix extraction.