Error calculating Dunn's Index in c++ using Armadillo library

52 Views Asked by At

I have been trying to find Dunns index using the Armadillo library for a larger algorithm I'm working on. Whenever I run the code, I get an output Dunns index:-nan(ind) and an error saying I'm out of the indices. I provide the code below and the main function I'm using to test. The code also has random checks I added to try and troubleshoot the issue.

#include <iostream>
#include <armadillo>

using namespace std;
using namespace arma;

double dunns(int clusters_number, const mat& distM, const uvec& ind) {
    // Determine the number of unique clusters
    int i = max(ind);
    vec denominator;

    for (int i2 = 1; i2 <= i; ++i2) {
        uvec indi = find(ind == i2);
        uvec indj = find(ind != i2);

        // Check if indi and indj are not empty
        if (!indi.is_empty() && !indj.is_empty()) {
            mat temp;

            // Check if indices are within bounds before submatrix extraction
            if (indi.max() < distM.n_rows && indj.max() < distM.n_cols) {
                temp = distM.submat(indi, indj);
                denominator = join_cols(denominator, vectorise(temp));
            }
            else {
                // Debugging: Print indices that caused the error
                cout << "Error: Indices out of bounds for Cluster " << i2 << endl;
            }
        }
    }

    double num = 0.0;  // Initialize num to 0.0

    // Check if denominator is not empty before finding the minimum
    if (!denominator.is_empty()) {
        num = min(denominator);
    }

    mat neg_obs = zeros<mat>(distM.n_rows, distM.n_cols);

    for (int ix = 1; ix <= i; ++ix) {
        uvec indxs = find(ind == ix);

        // Check if indxs is not empty
        if (!indxs.is_empty()) {
            // Check if indices are within bounds before setting elements
            if (indxs.max() < distM.n_rows) {
                neg_obs.submat(indxs, indxs).fill(1.0);
            }
        }
    }

    // Print intermediate values
    cout << "Intermediate Values:" << endl;
    cout << "Denominator: " << denominator << endl;
    cout << "num: " << num << endl;

    mat dem = neg_obs % distM;
    double max_dem = max(max(dem));

    // Print max_dem
    cout << "max_dem: " << max_dem << endl;

    double DI = num / max_dem;
    return DI;
}

int main() {
    // New inputs for testing
    int clusters_number = 2;

    // Modified dissimilarity matrix (4x4)
    mat distM(4, 4);
    distM << 0.0 << 1.0 << 2.0 << 3.0
        << 1.0 << 0.0 << 1.0 << 2.0
        << 2.0 << 1.0 << 0.0 << 1.0
        << 3.0 << 2.0 << 1.0 << 0.0;

    // Modified cluster indices (4x1)
    arma::uvec ind;
    ind << 1 << 1 << 2 << 2;

    // Print the input dissimilarity matrix
    cout << "Dissimilarity Matrix:" << endl;
    cout << distM << endl;

    // Print the cluster indices
    cout << "Cluster Indices:" << endl;
    cout << ind << endl;

    double DI = dunns(clusters_number, distM, ind);

    cout << "Dunn's Index: " << DI << endl;

    return 0;
}

The data format: seems correct. I'm using double for the dissimilarity matrix and arma::uvec for cluster indices, which is appropriate.

Data allignment: The alignment of data points in the dissimilarity matrix and cluster indices appears to be correct. Each data point in the matrix corresponds to an entry in the cluster indices.

There don't seem to be any empty clusters or missing data points in the dissimilarity matrix. The data appears to be complete.

Given that the data seems correctly aligned and there are no obvious issues with empty clusters or missing data, it's perplexing that I'm still encountering "Indices out of bounds" errors during submatrix extraction.

0

There are 0 best solutions below