I am working on a CycleGAN implementation. With this implementation, I tried converting images to LAB color space. The Tanh function at the final layer of the generator and discriminator was causing issues with the conversion, so I added a correction below. However, when I call my generator(which has the correction on the final layer), I get an error message. Here is the generator code
class TanhCorrection(torch.nn.Module):
def __init__(self, steepness=4):
super(TanhCorrection, self).__init__()
self.lumi_offset = torch.nn.Parameter(torch.tensor([1.]))
self.steepness = steepness
def steep_sig(self, x):
return 1 / (1 + torch.exp(-self.steepness * x))
def forward(self, x) -> torch.Tensor:
x_l = x[:, 0:1, :, :]
x_ab = x[:, 1:, :, :]
x_l = (B_L - A_L) * (x_l + 1) / 2 + A_L
x_ab = (B_AB - A_AB) * (x_ab + 1) / 2 + A_AB
return torch.cat((x_l * self.steep_sig(self.lumi_offset), x_ab), dim=1)
class GlobalGenerator(nn.Module):
def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d,
padding_type='reflect'):
assert (n_blocks >= 0)
super(GlobalGenerator, self).__init__()
model = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0), norm_layer(ngf), nn.ReLU()]
# downsample
for i in range(n_downsampling):
mult = 2 ** i
model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1),
norm_layer(ngf * mult * 2), nn.ReLU()]
# resnet blocks
mult = 2 ** n_downsampling
for i in range(n_blocks):
model += [ResnetBlock(ngf * mult, padding_type=padding_type, activation=nn.GELU(), norm_layer=norm_layer)]
# upsample
for i in range(n_downsampling):
mult = 2 ** (n_downsampling - i)
model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1,
output_padding=1),
norm_layer(int(ngf * mult / 2)), nn.ELU()]
model += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh()]
model += [TanhCorrection()]
self.model = nn.Sequential(*model)
def forward(self, x):
return self.model(x)
class LocalEnhancer(nn.Module):
def __init__(self, input_nc, output_nc, ngf=32, n_downsample_global=3, n_blocks_global=9,
n_local_enhancers=1, n_blocks_local=3, norm_layer=nn.BatchNorm2d, padding_type='reflect'):
super(LocalEnhancer, self).__init__()
self.n_local_enhancers = n_local_enhancers
# global generator model ##
ngf_global = ngf * (2 ** n_local_enhancers)
model_global = GlobalGenerator(input_nc, output_nc, ngf_global, n_downsample_global, n_blocks_global,
norm_layer).model
model_global = [model_global[i] for i in
range(len(model_global) - 3)] # get rid of final convolution layers
self.model = nn.Sequential(*model_global)
# local enhancer layers ##
for n in range(1, n_local_enhancers + 1):
# downsample
ngf_global = ngf * (2 ** (n_local_enhancers - n))
model_downsample = [nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf_global, kernel_size=7, padding=0),
norm_layer(ngf_global), nn.ReLU(True),
nn.Conv2d(ngf_global, ngf_global * 2, kernel_size=3, stride=2, padding=1),
norm_layer(ngf_global * 2), nn.ReLU(True)]
# residual blocks
model_upsample = []
for i in range(n_blocks_local):
model_upsample += [ResnetBlock(ngf_global * 2, padding_type=padding_type, norm_layer=norm_layer)]
# upsample
model_upsample += [
nn.ConvTranspose2d(ngf_global * 2, ngf_global, kernel_size=3, stride=2, padding=1, output_padding=1),
norm_layer(ngf_global), nn.ELU()]
# final convolution
if n == n_local_enhancers:
model_upsample += [nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0),
nn.Tanh()]
setattr(self, 'model' + str(n) + '_1', nn.Sequential(*model_downsample))
setattr(self, 'model' + str(n) + '_2', nn.Sequential(*model_upsample))
self.downsample = nn.AvgPool2d(3, stride=2, padding=(1, 1), count_include_pad=False)
def forward(self, x):
# create input pyramid
input_downsampled = [x]
for i in range(self.n_local_enhancers):
input_downsampled.append(self.downsample(input_downsampled[-1]))
# output at coarest level
output_prev = self.model(input_downsampled[-1])
# build up one layer at a time
for n_local_enhancers in range(1, self.n_local_enhancers + 1):
model_downsample = getattr(self, 'model' + str(n_local_enhancers) + '_1')
model_upsample = getattr(self, 'model' + str(n_local_enhancers) + '_2')
input_i = input_downsampled[self.n_local_enhancers - n_local_enhancers]
output_prev = model_upsample(model_downsample(input_i) + output_prev)
return output_prev
Here is the error message:
Traceback (most recent call last):
File "C:\Users\phill\PycharmProjects\pythonProject\hd\train.py", line 125, in <module>
train()
File "C:\Users\phill\PycharmProjects\pythonProject\hd\train.py", line 74, in train
fake_a = gen_ba(real_b)
^^^^^^^^^^^^^^
File "C:\Users\phill\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\phill\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\phill\PycharmProjects\pythonProject\hd\models.py", line 162, in forward
output_prev = model_upsample(model_downsample(input_i) + output_prev)
~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~
RuntimeError: The size of tensor a (128) must match the size of tensor b (134) at non-singleton dimension 3
Process finished with exit code 1