I have done the following steps:
- Installed tensorflow-macos, tensorflow-metal and also set the model "meta-llama/Llama-2-7b-hf" model.to(device) after validating token from Hugging face.
- Set device(type=mps) Machine is showing that gpu is activated in Mac but while running the model on my dataset its very slow and using the cpu instead of gpu. Please provide some solution. I have been struggling on this for last 15 days.
I am pasting my training code below.
`
# Load pre-trained BERT model and tokenizer
from transformers import AutoModelForSequenceClassification
model_name = "meta-llama/Llama-2-7b-hf"
# Load model tokenizer with the user authentication token
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token = True)
# Set padding token as EOS token
tokenizer.pad_token = tokenizer.eos_token
#model = AutoModelForCausalLM.from_pretrained(model_name, num_labels=len(label_encoder.classes\_))
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
import torch
device = torch.device("mps")
model.to(device)
# Tokenize and encode the text data
train_encodings1 = tokenizer(list(X_train), truncation=True, padding=True, max_length=5000)
test_encodings1 = tokenizer(list(X_test), truncation=True, padding=True, max_length=5000)
# Create DataLoader for training and testing data
from torch.utils.data import DataLoader, TensorDataset
import torch
train_dataset1 = TensorDataset(torch.tensor(train_encodings1\['input_ids'\]),
torch.tensor(train_encodings1\['attention_mask'\]),
torch.tensor(y_train))
test_dataset1 = TensorDataset(torch.tensor(test_encodings1\['input_ids'\]),
torch.tensor(test_encodings1\['attention_mask'\]),
torch.tensor(y_test))
train_loader1 = DataLoader(train_dataset1, batch_size=1, shuffle=True)
test_loader1 = DataLoader(test_dataset1, batch_size=1, shuffle=False)
# Set up optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = torch.nn.CrossEntropyLoss()
torch.mps.synchronize()
#**Training**
import time
num_epochs=3
for epoch in range(num_epochs):
model.train()
print(time.ctime())
start=time.time()
print("Train")
if device.type =='mps':
for input_ids, attention_mask, labels in train_loader1:
for i in range(len(train_loader1)):
print(f"Starting of loop: {i+1}")
#input_ids,attention_mask, labels = input_ids,attention_mask, labels
input_ids, attention_mask, labels = input_ids.to(device),
attention_mask.to(device), labels.to(device)
optimizer.zero_grad()
print(f"Middle of loop: {i+1}")
outputs = model(input_ids,attention_mask)
loss = criterion(outputs.logits, labels)
loss.backward()
optimizer.step()
print(f"End of loop: {i+1}")
average_loss = loss / len(train_loader1)
print(f"Epoch {epoch + 1}, Average Loss: {average_loss}")
stop=time.time()
print(f"Training time: {stop-start}s")
else:
print("exit")
exit()`
Please speed up the training process so that this run using gpu of macbook pro M2.
I am getting this error also after a few loops.
RuntimeError: MPS backend out of memory (MPS allocated: 35.79 GB, other allocations: 508.66 MB, max allowed: 36.27 GB). Tried to allocate 291.25 KB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).