I am trying to train the model below with Indian Pines dataset but I get the following error
Model:
def ResNet50(input_shape, classes=16):
# Define the input as a tensor with shape input_shape
#X_input = Input(shape=(97,145,200))
X_input = Input(shape=input_shape)
# Zero-Padding
X = ZeroPadding2D((3, 3), data_format='channels_last')(X_input) # 3 x 3 padding
# Stage 1 (≈4 lines)
X = Conv2D(64, (7, 7), strides=(2, 2))(X)
print('==================', X.shape)
X = BatchNormalization(axis=3)(X)
print('BN', X.shape)
X = Activation('relu')(X)
print('relu', X.shape)
X = MaxPooling2D((3, 3), strides=(1, 1))(X)
print('max', X.shape)
# Stage 2 (≈3 lines)
X = convolutional_block(X, f=3, filters=[64, 64, 256], s=1)
X = identity_block(X, 3, [64, 64, 256])
X = identity_block(X, 3, [64, 64, 256])
# Stage 3 (≈4 lines)
X = convolutional_block(X, f=3, filters=[128, 128, 512], s=2)
X = identity_block(X, 3, [128, 128, 512])
X = identity_block(X, 3, [128, 128, 512])
X = identity_block(X, 3, [128, 128, 512])
# Stage 4 (≈6 lines)
X = convolutional_block(X, f=3, filters=[256, 256, 1024], s=2)
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
X = identity_block(X, 3, [256, 256, 1024])
# Stage 5 (≈3 lines)
X = convolutional_block(X, f=3, filters=[512, 512, 2048], s=2)
X = identity_block(X, 3, [512, 512, 2048])
X = identity_block(X, 3, [512, 512, 2048])
# AVG-POOL (≈1 line). Use "X = AveragePooling2D(...)(X)"
X = AveragePooling2D(pool_size=(2, 2), padding='same')(X)
# output layer
X = Flatten()(X)
X = Dense(17, activation='softmax', kernel_initializer=glorot_uniform(seed=0))(X)
print("=================",X.shape)
# Create model
model = Model(inputs=X_input, outputs=X, name='ResNet50')
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.summary()
return model
Identity Block
def identity_block(X, f, filters):
# Retrieve filters
F1, F2, F3 = filters
# Copy of the input
X_shortcut = X
# First layer
X = Conv2D(F1, kernel_size=(1, 1), strides=(1, 1), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Second layer
X = Conv2D(F2, kernel_size=(f, f), strides=(1, 1), padding='same')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Third Layer
X = Conv2D(F3, kernel_size=(1, 1), strides=(1, 1), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
# Last step: Adding shortcut value to F(x), and pass it through a RELU activation
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
Convolutional Block
def convolutional_block(X, f, filters, s):
# Retrieve filters
F1, F2, F3 = filters
# Copy of the input
X_shortcut = X
# First layer
X = Conv2D(F1, kernel_size=(1, 1), strides=(s, s), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Second layer
X = Conv2D(F2, kernel_size=(f, f), strides=(1, 1), padding='same')(X)
X = BatchNormalization(axis=3)(X)
X = Activation('relu')(X)
# Third Layer
X = Conv2D(F3, kernel_size=(1, 1), strides=(1, 1), padding='valid')(X)
X = BatchNormalization(axis=3)(X)
# Shortcut path
X_shortcut = Conv2D(F3, kernel_size=(1, 1), strides=(s, s), padding='valid')(X_shortcut)
X_shortcut = BatchNormalization(axis=3)(X_shortcut)
# Last step: Adding shortcut value to F(x), and pass it through a RELU activation
X = Add()([X, X_shortcut])
X = Activation('relu')(X)
return X
Main:
def load_hsi():
X = loadmat('dataset/Indian_pines_corrected.mat')['indian_pines_corrected']
y = loadmat('dataset/Indian_pines_gt.mat')['indian_pines_gt']
print(f"X shape: {X.shape}\ny shape: {y.shape}")
print("===========================================")
return X, y
X, y = load_hsi()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
y_train = to_categorical(y_train)
y_train = y_train.transpose(1, 0, 2).reshape(-1, y_train.shape[2])
y_test = to_categorical(y_test)
y_test = y_test.transpose(1, 0, 2).reshape(-1, y_test.shape[2])
#ADD one dimension to X_train so it becomes 4D
X_train = np.expand_dims(X_train, axis=0)
X_test = np.expand_dims(X_test, axis=0)
y_train = np.expand_dims(y_train, axis=0)
y_test = np.expand_dims(y_test, axis=0)
history = model.fit(X_train, y_train, epochs=100)
I get the following error: File "C:\Users-\PycharmProjects\pythonProject\venv\lib\site-packages\keras\backend.py", line 5119, in categorical_crossentropy target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None, 14065, 17) and (None, 17) are incompatible
I think my data is the wrong size, if someone could tell me what I'm doing wrong
#Model summary
Model: "ResNet50"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 97, 145, 20 0 []
0)]
zero_padding2d (ZeroPadding2D) (None, 103, 151, 20 0 ['input_1[0][0]']
0)
conv2d (Conv2D) (None, 49, 73, 64) 627264 ['zero_padding2d[0][0]']
batch_normalization (BatchNorm (None, 49, 73, 64) 256 ['conv2d[0][0]']
alization)
activation (Activation) (None, 49, 73, 64) 0 ['batch_normalization[0][0]']
max_pooling2d (MaxPooling2D) (None, 47, 71, 64) 0 ['activation[0][0]']
conv2d_1 (Conv2D) (None, 47, 71, 64) 4160 ['max_pooling2d[0][0]']
batch_normalization_1 (BatchNo (None, 47, 71, 64) 256 ['conv2d_1[0][0]']
rmalization)
.....
activation_45 (Activation) (None, 6, 9, 2048) 0 ['add_14[0][0]']
conv2d_50 (Conv2D) (None, 6, 9, 512) 1049088 ['activation_45[0][0]']
batch_normalization_50 (BatchN (None, 6, 9, 512) 2048 ['conv2d_50[0][0]']
ormalization)
activation_46 (Activation) (None, 6, 9, 512) 0 ['batch_normalization_50[0][0]']
conv2d_51 (Conv2D) (None, 6, 9, 512) 2359808 ['activation_46[0][0]']
batch_normalization_51 (BatchN (None, 6, 9, 512) 2048 ['conv2d_51[0][0]']
ormalization)
activation_47 (Activation) (None, 6, 9, 512) 0 ['batch_normalization_51[0][0]']
conv2d_52 (Conv2D) (None, 6, 9, 2048) 1050624 ['activation_47[0][0]']
batch_normalization_52 (BatchN (None, 6, 9, 2048) 8192 ['conv2d_52[0][0]']
ormalization)
add_15 (Add) (None, 6, 9, 2048) 0 ['batch_normalization_52[0][0]',
'activation_45[0][0]']
activation_48 (Activation) (None, 6, 9, 2048) 0 ['add_15[0][0]']
average_pooling2d (AveragePool (None, 3, 5, 2048) 0 ['activation_48[0][0]']
ing2D)
flatten (Flatten) (None, 30720) 0 ['average_pooling2d[0][0]']
dense (Dense) (None, 17) 522257 ['flatten[0][0]']
==================================================================================================
Total params: 24,727,761
Trainable params: 24,674,641
Non-trainable params: 53,120
__________________________________________________________________________________________________
So the main issue is that the shape of your labels is
(1, 14065, 17), whereas your model outputs(None, 17). Also if you have 16 classes you need 16 units on the last dense layer not 17.For the labels, there is no need to add a dimension for the channel. They are labels, meaning either indexes, or one-hot-encodings, therefore the correct shape is
(14065, 17)and not(1, 14065, 17).Another issue is how your images are shaped. For your images, the dimension needs to be
(n_images, dim1, dim2, n_channels), wheren_imagesis the number of images,dim1anddim2are the dimensions of each image andn_channelsis the number of channels of each image.EDIT:
"Yes that is the size of my X (145,145,200) that is what X.shape shows me after i load the data"
Then you simply do
np.expand_dims(X, axis=0)orX.reshape((1, 145, 145, 200))to have(1, 145, 145, 200)