how does the training routine and test routine work when applying spatial transform network

32 Views Asked by zakaria14 At 24 May 2022 at 19:32

I'm just starting a new project and I'm having some problems understanding how the training and testing routine works in a neural network. So first i started with this tutorial provided by pytorch.

STN pytorch tutorial

I copied the same layers and all the parameters I didn't really change anything. I was able to understand the tutorial, but I wanted to go further, so I wanted to create my own dataset and apply it in the same convolutional neural network. My idea was as follows: I take an image of the FashionMNSIT dataset instead of the number MNSIT dataset. let's take this example:

item-1

item-2

then i think about rotating the images with random rotation angles and create for example 100 images per item. After that, my idea was to create a dataset with these rotated images, and I want to assign index 0 for item-1 and index 1 for item-2 as classification ID. Because I found that this method torch.utils.data.DataLoader needs the images and the classification index for the training and testing part. After creating the dataset, I want to split the 200 images into two parts. The first part is the training dataset, which contains 160 images, and the rest of the images (40 images) is the test dataset. My expectation is that the stn layers will give me an image that is aligned, ie not rotated, once I use the visualize_stn method as in the tutorial. Can anyone tell me if my ideas are correct or I am thinking wrong.

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

plt.ion()   # interactive mode

# Training dataset
train_loader = torch.utils.data.DataLoader(
datasets.FashionMNIST(root='.', train=True, download=True,
               transform=transforms.Compose([
                   transforms.ToTensor(),
                   transforms.Normalize((0.1307,), (0.3081,))
               ])), batch_size=64, shuffle=True, num_workers=4)
# Test dataset
test_loader = torch.utils.data.DataLoader(
datasets.FashionMNIST(root='.', train=False, 
transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])), batch_size=64, shuffle=True, num_workers=4)

class Net(nn.Module):
def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
    self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
    self.conv2_drop = nn.Dropout2d()
    self.fc1 = nn.Linear(320, 50)
    self.fc2 = nn.Linear(50, 10)

    # Spatial transformer localization-network
    self.localization = nn.Sequential(
        nn.Conv2d(1, 8, kernel_size=7),
        nn.MaxPool2d(2, stride=2),
        nn.ReLU(True),
        nn.Conv2d(8, 10, kernel_size=5),
        nn.MaxPool2d(2, stride=2),
        nn.ReLU(True)
    )

    # Regressor for the 3 * 2 affine matrix
    self.fc_loc = nn.Sequential(
        nn.Linear(10 * 3 * 3, 32),
        nn.ReLU(True),
        nn.Linear(32, 3 * 2)
    )

    # Initialize the weights/bias with identity transformation
    self.fc_loc[2].weight.data.zero_()
    self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

# Spatial transformer network forward function
def stn(self, x):
    xs = self.localization(x)
    xs = xs.view(-1, 10 * 3 * 3)
    theta = self.fc_loc(xs)
    theta = theta.view(-1, 2, 3)

    grid = F.affine_grid(theta, x.size())
    x = F.grid_sample(x, grid)

    return x

def forward(self, x):
    # transform the input
    x = self.stn(x)

    # Perform the usual forward pass
    x = F.relu(F.max_pool2d(self.conv1(x), 2))
    x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
    x = x.view(-1, 320)
    x = F.relu(self.fc1(x))
    x = F.dropout(x, training=self.training)
    x = self.fc2(x)
    return F.log_softmax(x, dim=1)


  model = Net().to(device)
  optimizer = optim.SGD(model.parameters(), lr=0.01)


  def train(epoch):
     model.train()
     for batch_idx, (data, target) in enumerate(train_loader):
          data, target = data.to(device), target.to(device)

     optimizer.zero_grad()
     output = model(data)
     loss = F.nll_loss(output, target)
     loss.backward()
     optimizer.step()
     if batch_idx % 500 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: 
        {:.6f}'.format(epoch, batch_idx * len(data), 
         len(train_loader.dataset),
            100. * batch_idx / len(train_loader), loss.item()))



  def test():
     with torch.no_grad():
        model.eval()
        test_loss = 0
        correct = 0
        for data, target in test_loader:
             data, target = data.to(device), target.to(device)
        output = model(data)

        # sum up batch loss
        test_loss += F.nll_loss(output, target, 
        size_average=False).item()
        # get the index of the max log-probability
        pred = output.max(1, keepdim=True)[1]
        correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} 
       ({:.0f}%)\n'
          .format(test_loss, correct, len(test_loader.dataset),
                  100. * correct / len(test_loader.dataset)))
   def convert_image_np(inp):
"""Convert a Tensor to numpy image."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
return inp

def visualize_stn():
   with torch.no_grad():
    # Get a batch of training data
    data = next(iter(test_loader))[0].to(device)

    input_tensor = data.cpu()
    transformed_input_tensor = model.stn(data).cpu()

    in_grid = convert_image_np(
        torchvision.utils.make_grid(input_tensor))

    out_grid = convert_image_np(
        torchvision.utils.make_grid(transformed_input_tensor))

    # Plot the results side-by-side
    f, axarr = plt.subplots(1, 2)
    axarr[0].imshow(in_grid)
    axarr[0].set_title('Dataset Images')

    axarr[1].imshow(out_grid)
    axarr[1].set_title('Transformed Images')

for epoch in range(1, 20 + 1):
  train(epoch)
  test()

 # Visualize the STN transformation on some input batch
  visualize_stn()

  plt.ioff()
  plt.show()

Original Q&A

how does the training routine and test routine work when applying spatial transform network

There are 0 best solutions below

Related Questions in PYTHON

Related Questions in MACHINE-LEARNING

Related Questions in PYTORCH

Related Questions in COMPUTER-VISION

Related Questions in SPATIAL-TRANSFORMER-NETWORK

Trending Questions

Popular # Hahtags

Popular Questions