I got an error with the following code. The code can be used directly for debugging as I am using it as a small example for my bigger code.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from itertools import chain, combinations
from keras.models import Sequential
from keras.layers import Dense, LSTM, Conv1D, MaxPooling1D, Flatten
# Create LSTM model
def create_lstm_model(input_shape):
model = Sequential()
model.add(LSTM(50, input_shape=input_shape))
model.add(Dense(3, activation='softmax')) # Use 3 units for multiclass classification
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# Create 1D CNN model
def create_1d_cnn_model(input_shape):
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=input_shape))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(3, activation='softmax')) # Use 3 units for multiclass classification
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
return model
# Assuming each heatmap is represented by 10 features
data = np.random.rand(300, 26) # 10 heatmaps with 10 features each
# Classes are represented as 0 for 'Up', 1 for 'Down', and 2 for 'Random'
classes = np.random.choice([0, 1, 2], 300) # Randomly assign classes to each heatmap
df = pd.DataFrame(data, columns=[f'Week{i+1}' for i in range(26)])
df['Class'] = classes
df.index = ['HM'+str(i+1) for i in range(300)] # Heatmap names as index
# Initialize your models
models = {
"Logistic Regression": LogisticRegression(),
"LSTM": create_lstm_model((26, 1)), # assuming each input has 26 timesteps and 1 feature
"1D CNN": create_1d_cnn_model((26, 1)), # same as above
}
# Split your data into features and target
X = df.drop('Class', axis=1)
y = df['Class']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Reshape the input data for LSTM and 1D CNN models
X_train_lstm = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_lstm = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)
# Create all combinations of models
model_combinations = list(chain(*[combinations(models.keys(), i) for i in range(1, len(models) + 1)]))
class_to_index = {'Up': 0, 'Down': 1, 'Random': 2} # Map classes to indices
for comb in model_combinations:
comb = list(comb) # convert tuple to list
accuracies = {}
probabilities = {}
combined_accuracies = {}
overall_combined_accuracies = {}
for name in comb:
model = models[name]
if name == "LSTM" or name == "1D CNN":
model.fit(X_train_lstm, y_train, epochs=10, verbose=0)
y_pred_labels = np.argmax(model.predict(X_test_lstm), axis=-1)
else:
model.fit(X_train, y_train)
y_pred_labels = model.predict(X_test)
accuracies[name] = accuracy_score(y_test, y_pred_labels, normalize=True)
probabilities[name] = model.predict_proba(X_test)
# Calculate weighted accuracies
for c in ['Up', 'Down', 'Random']:
combined_accuracies[c] = sum(
accuracies[name] * probabilities[name][:, class_to_index[c]] for name in comb
)
combined_accuracies_df = pd.DataFrame(combined_accuracies)
combined_accuracies_df.index = df.index # Set the index to the same index as the original DataFrame
# set index to HM1, HM2, ..., HM300
# Calculate overall weighted accuracies
for c in ['Up', 'Down', 'Random']:
overall_combined_accuracies[c] = sum(
accuracies[name] * probabilities[name][:, class_to_index[c]].mean() for name in comb
)
print(f"\n{' & '.join(comb)} Model:")
print("\nHeatmap Specific Combined_accuracies :")
print(pd.DataFrame(combined_accuracies_df))
print("\nOverall Combined_accuracies :")
print(pd.Series(overall_combined_accuracies))
The error is the following
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-115-3c495cd79e3a> in <module>
85 )
86 combined_accuracies_df = pd.DataFrame(combined_accuracies)
---> 87 combined_accuracies_df.index = df.index # Set the index to the same index as the original DataFrame
88 # set index to HM1, HM2, ..., HM300
89 # Calculate overall weighted accuracies
/home/ml_in_pp/virtualenvs/AnacondaHendrik.gpu/lib/python3.6/site-packages/pandas/core/generic.py in __setattr__(self, name, value)
5152 try:
5153 object.__getattribute__(self, name)
-> 5154 return object.__setattr__(self, name, value)
5155 except AttributeError:
5156 pass
pandas/_libs/properties.pyx in pandas._libs.properties.AxisProperty.__set__()
/home/ml_in_pp/virtualenvs/AnacondaHendrik.gpu/lib/python3.6/site-packages/pandas/core/generic.py in _set_axis(self, axis, labels)
562 def _set_axis(self, axis: int, labels: Index) -> None:
563 labels = ensure_index(labels)
--> 564 self._mgr.set_axis(axis, labels)
565 self._clear_item_cache()
566
/home/ml_in_pp/virtualenvs/AnacondaHendrik.gpu/lib/python3.6/site-packages/pandas/core/internals/managers.py in set_axis(self, axis, new_labels)
225 if new_len != old_len:
226 raise ValueError(
--> 227 f"Length mismatch: Expected axis has {old_len} elements, new "
228 f"values have {new_len} elements"
229 )
ValueError: Length mismatch: Expected axis has 60 elements, new values have 300 elements
I used to have combined_accuracies_df.index = ['HM'+str(i+1) for i in range(300)] # set index to HM1, HM2, ..., HM300 einstead but this was also triggering another error ...