Problem with RAM and loading (?) of images in a Segmentation Problem

Question

Problem with RAM and loading (?) of images in a Segmentation Problem

68 Views Asked by RyanH At 21 October 2023 at 13:39

I am trying to work on ISIC 2017 dataset. I have the code from a paper and I am trying to make it run locally or on colab (limited resources). In a proper server it works fine but with less training images (originally they are 2000, in a server I tested with 1000 and worked). Locally I tried even with - like - 20 images but the RAM still collapses (8 GB). Also, on my local set-up I don't have any conda devices (yeah it sucks). Also on colab sadly the RAM just explodes.

I modified something from the original paper, so now I have the followind dataset_ISIC.py where the loading of images should be progressive:

dataset_isic.py

from torch.utils.data import Dataset
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import random
import torch
import os
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
import pandas as pd
class ISIC2017(Dataset):
    #mask=label
    #images and mask are csv
    def __init__(self,csv,imgs_path,labels_path,transform,training=True):
        self.transform=transform
        if training:
            self.df = pd.read_csv(csv)
            self.images, self.masks = imgs_path, labels_path
      
            print("getting images")
            self.images = [''.join([self.images, '/', i.replace('.jpg', '.jpg')]) for i in self.df['image_name']]
            self.masks = [''.join([self.masks, '/', i.replace('.jpg', '_segmentation.png')]) for i in self.df['image_name']]
        else:
            print("taking val imgs and masks path")
            self.df = pd.read_csv(csv)
            self.images, self.masks = imgs_path, labels_path
            print("getting val")
            self.images = [''.join([self.images, '/', i]) for i in self.df['image_name']]
            self.masks = [''.join([self.masks, '/', i.replace('.jpg', '_segmentation.png')]) for i in self.df['image_name']]
        
    def __getitem__(self, index):
        img = cv2.imread(self.images[index])[:, :, ::-1]
        mask = cv2.imread(self.masks[index], cv2.IMREAD_GRAYSCALE)
        if img.shape[:2] != mask.shape[:2]:
            mask = cv2.resize(mask, (img.shape[1], img.shape[0]))
        if self.transform:
            augmented = self.transform(image=img, mask=mask)
            img = augmented['image']
            mask = augmented['mask']
        return img, mask
    
    def __len__(self):
        return len(self.images)
    
def for_train_transform():
    desired_size = 512
    train_transform = A.Compose([
        A.Resize(width=desired_size, height=desired_size),
        A.RandomRotate90(),
        A.Flip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0, scale_limit=(-0.2, 0.1), rotate_limit=40, p=0.5),
        A.RandomBrightnessContrast(
            brightness_limit=0.5,
            contrast_limit=0.1,
            p=0.5
        ),
        A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=100, val_shift_limit=80),
        A.GaussNoise(),
        A.OneOf([
            A.ElasticTransform(),
            A.GridDistortion(),
            A.OpticalDistortion(distort_limit=0.5, shift_limit=0)
        ]),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.)
    return train_transform 

test_transform = A.Compose([
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
        max_pixel_value=255.0,
        p=1.0
    ),
    ToTensorV2()], p=1.)

And this, instead, is the training file. I will post only the train method because everything work just fine before entering inside the actual train:

train.py

def train(model, save_name):
    model_savedir = args.checkpoint + save_name + '/'
    save_name = model_savedir + 'ckpt'
    print(model_savedir)
    if not os.path.exists(model_savedir):
        os.mkdir(model_savedir)
    train_ds=ISIC2017(train_csv,train_imgs, train_masks,train_transform)
    val_ds=ISIC2017(df_val, val_imgs, val_masks,test_transform,training=False)
    # train_ds = Mydataset(imgs_train, masks_train, train_transform)
    # val_ds = Mydataset(imgs_val, masks_val, test_transform)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr)
    CosineLR = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs, eta_min=1e-8)

    # train_dl = DataLoader(train_ds, shuffle=True, batch_size=args.batch_size, pin_memory=False, num_workers=0,
    #                       drop_last=True, )
    # val_dl = DataLoader(val_ds, batch_size=args.batch_size, pin_memory=False, num_workers=0, )
    train_dl = DataLoader(train_ds, shuffle=True, batch_size=16, pin_memory=False, num_workers=0, drop_last=True)
    val_dl = DataLoader(val_ds, batch_size=16, pin_memory=False, num_workers=0)
    best_acc = 0
    print("Start inside train function")
    with tqdm(total=epochs, ncols=60) as t:
        for epoch in range(epochs):
            epoch_loss, epoch_iou, epoch_val_loss, epoch_val_iou = \
                fit(epoch, epochs, model, train_dl, val_dl, device, criterion, optimizer, CosineLR)
            f = open(model_savedir + 'log' + '.txt', "a")
            f.write('epoch' + str(float(epoch)) +
                    '  _train_loss' + str(epoch_loss) + '  _val_loss' + str(epoch_val_loss) +
                    ' _epoch_acc' + str(epoch_iou) + ' _val_iou' + str(epoch_val_iou) + '\n')
            if epoch_val_iou > best_acc:
                f.write('\n' + 'here' + '\n')
                best_model_wts = copy.deepcopy(model.state_dict())
                best_acc = epoch_val_iou
                torch.save(best_model_wts, ''.join([save_name, '.pth']))
            f.close()
            t.update(1)
            
    write_options(model_savedir, args, best_acc)
    print('Done!')

Now, what I want to achieve is to run locally even with like 50 or 70 train images. The validation set is originally of 150 but I reduced to 10 - the reason is of course that I was thinking that if the images were less, then my device would not have any problem. Sadly it has.

Initially, this:

 if training:
            self.df = pd.read_csv(csv)
            self.images, self.masks = imgs_path, labels_path
      
            print("getting images")
            self.images = [''.join([self.images, '/', i.replace('.jpg', '.jpg')]) for i in self.df['image_name']]
            self.masks = [''.join([self.masks, '/', i.replace('.jpg', '_segmentation.png')]) for i in self.df['image_name']]
        else:
            print("taking val imgs and masks path")
            self.df = pd.read_csv(csv)
            self.images, self.masks = imgs_path, labels_path
            print("getting val")
            self.images = [''.join([self.images, '/', i]) for i in self.df['image_name']]
            self.masks = [''.join([self.masks, '/', i.replace('.jpg', '_segmentation.png')]) for i in self.df['image_name']]

and this:

        img = cv2.imread(self.images[index])[:, :, ::-1]
        mask = cv2.imread(self.masks[index], cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(image,(512,512))
        if img.shape[:2] != mask.shape[:2]:
            mask = cv2.resize(mask, (img.shape[1], img.shape[0]))
        if self.transform:
            augmented = self.transform(image=img, mask=mask)
            img = augmented['image']
            mask = augmented['mask']

were both on the training file but was such a huge amount of memory since all the images were loaded all at once. I am kinda stuck right now, so I would like to understand where my mistake is and understand if it is actually possible to run the code locally or just on a server.

Dataset has different size for each image, so I resize. But I don't think that could be the problem.

EDIT: Now, I am starting to think that is a problem of the model. It has 2M parameters and surely they are too many for my local set up. With a model that has 0.15M parameters, tho, even if the RAM get saturated this is what I obtain Error

Original Q&A

Problem with RAM and loading (?) of images in a Segmentation Problem

There are 0 best solutions below

Related Questions in PYTHON

Related Questions in MACHINE-LEARNING

Related Questions in DEEP-LEARNING

Related Questions in PYTORCH

Related Questions in ALBUMENTATIONS

Trending Questions

Popular # Hahtags

Popular Questions