ValueError: Length mismatch: Expected axis has 60 elements, new values have 300 elements

24 Views Asked by At

I got an error with the following code. The code can be used directly for debugging as I am using it as a small example for my bigger code.

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from itertools import chain, combinations
from keras.models import Sequential
from keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, Flatten

# Create LSTM model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, input_shape=input_shape))
    model.add(Dense(3, activation='softmax'))  # Use 3 units for multiclass classification
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Create 1D CNN model
def create_1d_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(3, activation='softmax'))  # Use 3 units for multiclass classification
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Assuming each heatmap is represented by 10 features
data = np.random.rand(300, 26)  # 10 heatmaps with 10 features each
# Classes are represented as 0 for 'Up', 1 for 'Down', and 2 for 'Random'
classes = np.random.choice([0, 1, 2], 300)  # Randomly assign classes to each heatmap

df = pd.DataFrame(data, columns=[f'Week{i+1}' for i in range(26)])
df['Class'] = classes
df.index = ['HM'+str(i+1) for i in range(300)]  # Heatmap names as index

# Initialize your models
models = {
    "Logistic Regression": LogisticRegression(),
    "LSTM": create_lstm_model((26, 1)),  # assuming each input has 26 timesteps and 1 feature
    "1D CNN": create_1d_cnn_model((26, 1)),  # same as above
}

# Split your data into features and target
X = df.drop('Class', axis=1)
y = df['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the input data for LSTM and 1D CNN models
X_train_lstm = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_lstm = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)

# Create all combinations of models
model_combinations = list(chain(*[combinations(models.keys(), i) for i in range(1, len(models) + 1)]))

class_to_index = {'Up': 0, 'Down': 1, 'Random': 2}  # Map classes to indices

for comb in model_combinations:
    comb = list(comb)  # convert tuple to list
    accuracies = {}
    probabilities = {}
    combined_accuracies = {}
    overall_combined_accuracies = {}

    for name in comb:
        model = models[name]

        if name == "LSTM" or name == "1D CNN":
            model.fit(X_train_lstm, y_train, epochs=10, verbose=0)
            y_pred_labels = np.argmax(model.predict(X_test_lstm), axis=-1)
        else:
            model.fit(X_train, y_train)
            y_pred_labels = model.predict(X_test)

        accuracies[name] = accuracy_score(y_test, y_pred_labels, normalize=True)
        probabilities[name] = model.predict_proba(X_test)

    # Calculate weighted accuracies
    for c in ['Up', 'Down', 'Random']:
        combined_accuracies[c] = sum(
            accuracies[name] * probabilities[name][:, class_to_index[c]] for name in comb
        )
    combined_accuracies_df = pd.DataFrame(combined_accuracies)
    combined_accuracies_df.index = df.index  # Set the index to the same index as the original DataFrame
  # set index to HM1, HM2, ..., HM300
    # Calculate overall weighted accuracies
    for c in ['Up', 'Down', 'Random']:
        overall_combined_accuracies[c] = sum(
            accuracies[name] * probabilities[name][:, class_to_index[c]].mean() for name in comb
        )

    print(f"\n{' & '.join(comb)} Model:")
    print("\nHeatmap Specific Combined_accuracies :")
    print(pd.DataFrame(combined_accuracies_df))
    print("\nOverall Combined_accuracies :")
    print(pd.Series(overall_combined_accuracies))

The error is the following

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-115-3c495cd79e3a> in <module>
     85         )
     86     combined_accuracies_df = pd.DataFrame(combined_accuracies)
---> 87     combined_accuracies_df.index = df.index  # Set the index to the same index as the original DataFrame
     88   # set index to HM1, HM2, ..., HM300
     89     # Calculate overall weighted accuracies

/home/ml_in_pp/virtualenvs/AnacondaHendrik.gpu/lib/python3.6/site-packages/pandas/core/generic.py in __setattr__(self, name, value)
   5152         try:
   5153             object.__getattribute__(self, name)
-> 5154             return object.__setattr__(self, name, value)
   5155         except AttributeError:
   5156             pass

pandas/_libs/properties.pyx in pandas._libs.properties.AxisProperty.__set__()

/home/ml_in_pp/virtualenvs/AnacondaHendrik.gpu/lib/python3.6/site-packages/pandas/core/generic.py in _set_axis(self, axis, labels)
    562     def _set_axis(self, axis: int, labels: Index) -> None:
    563         labels = ensure_index(labels)
--> 564         self._mgr.set_axis(axis, labels)
    565         self._clear_item_cache()
    566 

/home/ml_in_pp/virtualenvs/AnacondaHendrik.gpu/lib/python3.6/site-packages/pandas/core/internals/managers.py in set_axis(self, axis, new_labels)
    225         if new_len != old_len:
    226             raise ValueError(
--> 227                 f"Length mismatch: Expected axis has {old_len} elements, new "
    228                 f"values have {new_len} elements"
    229             )

ValueError: Length mismatch: Expected axis has 60 elements, new values have 300 elements

I used to have combined_accuracies_df.index = ['HM'+str(i+1) for i in range(300)] # set index to HM1, HM2, ..., HM300 einstead but this was also triggering another error ...

0

There are 0 best solutions below