I am trying to get results from a pretrained model using coco2017 dataset, but I get the error below in my training loop:-
Exception has occurred: IndexError
too many indices for tensor of dimension 0
File "C:\Users\manoj\Downloads\FYP\OBJECT_DETECTION_V2.py", line 323, in <module>
output = model(images,valid_targets)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: too many indices for tensor of dimension 0
my getitem code where i make the targets:-
def __getitem__(self, idx):
annotation = self.annotations[idx].copy()
image_id = annotation['image_id']
image_filename = "{:012d}.jpg".format(image_id)
image_path = os.path.join(self.image_dir, image_filename)
#print("Original target:", annotation)
if not os.path.exists(image_path):
print(f"Warning: Image not found - {image_path}. Skipping.")
return None
image = Image.open(image_path).convert('RGB')
bbox = annotation['bbox']
#print(bbox)
if bbox[0] == 0 or bbox[1] == 0 or bbox[2] == 0 or bbox[3] == 0:
print(f"Warning: Bounding box with x_min or y_min or width or height as 0 found. Skipping image.")
return None
if bbox is None:
print(f"Warning: 'bbox' key not found in annotation {annotation}. Skipping.")
return None
category_id = annotation['category_id']
#print(category_id)
if category_id is None:
print(f"Warning: 'category_id' key not found in annotation {annotation}. Skipping.")
return None
if self.transform:
image, bbox = self.transform(image,bbox)
else:
image = transforms.ToTensor()(image)
print("Keys in annotation before assertion:", annotation.keys())
extra_keys = set(annotation.keys()) - {'image_id', 'bbox', 'category_id'}
for key in extra_keys:
del annotation[key]
print("Keys in annotation after assertion:", annotation.keys())
#annotation['label'] = label
category_name = self.category_id_to_name.get(category_id)
if category_name is not None:
#annotation['labels'] = category_name
category_id = [category['id'] for category in categories_info if category['name'] == category_name][0]
else:
print(f"Category name not found for category ID: {category_id}")
#labels_tensor = torch.tensor(annotation['labels'], dtype=torch.int64)
#annotation['labels'] = labels_tensor
assert set(annotation.keys()) == {'image_id', 'bbox', 'category_id'}, "Target keys modified during loading/transformation"
# Check data types of target elements
assert isinstance(annotation['bbox'], list), "Bounding box data type modified"
assert isinstance(annotation['category_id'], int), "Category ID data type modified"
bbox[2] += bbox[0]
bbox[3] += bbox[1]
bbox = torch.tensor(bbox, dtype=torch.float32)
if bbox.dim() == 1:
bbox = bbox.view(1, -1)
# Convert category ID to tensor
category_id_tensor = torch.tensor(category_id, dtype=torch.int64)
category_id = torch.tensor(category_id, dtype=torch.int64)
image_id = torch.tensor(image_id, dtype=torch.int64)
target = {
'boxes': bbox,
'category_id': category_id,
'image_id':image_id,
'labels': category_id_tensor # annotation['labels']
}
return image, target
and my training loop:-
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
model.train()
print(f"Epoch {epoch+1}/{num_epochs}")
for batch_idx, (images, targets) in enumerate(train_loader):
print(f"Batch {batch_idx}, Number of images: {len(images)}, Number of targets: {len(targets)}")
for i, target in enumerate(targets):
print(f"Target {i}: {target}")
for j, bbox in enumerate(target['boxes']):
print(f"Bounding box {j+1}: {bbox}")
if len(targets) == 0 or len(images) == 0:
print("Warning: Empty targets list. Skipping batch.")
continue
print(targets)
print(type(targets))
for t in targets:
if isinstance(t, dict):
print("dictionary target detected:", t)
print(type(targets))
print(len(targets))
images = [image.to(device) for image in images]
#targets = [{k: v.to(device) for k, v in t.items()} if isinstance(t, dict) else t for t in targets]
# Move targets to device
targets = [{k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in t.items()} if isinstance(t, dict) else t for t in targets]
print("Targets after conversion to device:", targets)
for i, target in enumerate(targets):
bbox = target['boxes'][0]
print(f"Bounding box {i+1}: {bbox}")
valid_targets = []
for target_idx, target in enumerate(targets):
valid_boxes = []
for box_idx, box in enumerate(target['boxes']):
# Check if width and height are positive
if box[0] > 0 and box[1] > 0 and box[2] > 0 and box[3] > 0:
valid_boxes.append(box)
#print(f"target index {target_idx}, box index {box_idx}: {box}, width: {box[2]}, height: {box[3]}")
else:
print(f"Invalid box at target index {target_idx}, box index {box_idx}: {box}, width: {box[2]}, height: {box[3]}")
# Only add targets with valid boxes
if valid_boxes:
target['boxes'] = torch.stack(valid_boxes)
valid_targets.append(target)
print(f"valid targets: {valid_targets}")
#valid_indices = []
#for i, item in enumerate(valid_targets):
# if 'boxes' in item and len(item['boxes']) == 1: # Assuming each item has only one box
## box = item['boxes'][0]
# if len(box) == 4:
# x_min, y_min, width, height = box
# if width > 0 and height > 0:
# valid_indices.append(i)
# else:
# print(f"Invalid bounding box at index {i}: {box}")
# else:
# print(f"Invalid bounding box at index {i}: {box}")
# else:
# print(f"No valid bounding box found at index {i}")
#valid_targets = [valid_targets[i] for i in valid_indices]
if not valid_targets:
print("Warning: Empty valid targets list. Skipping batch.")
continue
# Forward pass
output = model(images,valid_targets)
# Extract predicted bounding boxes from the output
pred_boxes = output['boxes']
# Extract predicted labels from the output
pred_labels = output['labels']
# Extract target bounding boxes from the targets
target_boxes = [t['boxes'] for t in valid_targets]
# Extract target labels from the targets
target_labels = [t['labels'] for t in valid_targets]
# Calculate IoU loss
iou_loss_value = iou_loss(pred_boxes, target_boxes)
# Calculate cross-entropy loss for labels
labels_loss = torch.nn.functional.cross_entropy(pred_labels, torch.cat(target_labels))
# Total loss (you may want to combine it with other losses)
total_loss = iou_loss_value + labels_loss
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
print(f"Batch {batch_idx}, Loss: {total_loss.item()}")
lr_scheduler.step()
print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss.item()}")
What am I doing wrong?