I'm trying to implement federated learning to forecast solar photovoltaic generation using the LADPU dataset.
After preprocessing the dataset, I partitioned it into segments to simulate a federated environment, aligning each segment with unique METER_FID values for 10 different clients.
The next step involved creating sequences from these partitions as LSTM model inputs.
I used a create_sequence function for this, targeting sequences with START_READ and END_READ features to predict INTERVAL_READ.
However, when I split these into training and validation sets and set up PyTorch DataLoader objects (including a separate test_loader for evaluation), there is an issue. My Colab session consistently crashes due to exhausting all available RAM.
I tried to reduce the batch size and limit to 6 CSV files,but the issue persists.
I am very new to it,I am referring to the tutorials and documentation given in https://flower.ai for my usecase.I can't figure out the problem or possibly I am missing out on something. Can anyone please help.
#for partitioning I did this
NUM_CLIENTS = 10
def partition_data(df, num_clients):
np.random.seed(42)
unique_ids = df['METER_FID'].unique()
np.random.shuffle(unique_ids)
partitions = np.array_split(unique_ids, num_clients)
partitioned_dfs = [df[df['METER_FID'].isin(ids)] for ids in partitions]
return partitioned_dfs
partitioned_dfs = partition_data(df, NUM_CLIENTS)
#then created sequences from each partition to serve as input for an LSTM model
def create_sequences_efficiently(df, sequence_length=5):
sequences, targets = [], []
df = df.sort_values('INTERVAL_TIME')
for i in range(sequence_length, len(df)):
sequence = df[['START_READ', 'END_READ']].values[i-sequence_length:i]
target = df['INTERVAL_READ'].values[i]
sequences.append(sequence)
targets.append(target)
return np.array(sequences), np.array(targets)
#then split
from torch.utils.data import DataLoader, TensorDataset
import torch
from sklearn.model_selection import train_test_split
def create_loaders(partition, sequence_length=5, batch_size=32):
sequences, targets = create_sequences_efficiently(partition, sequence_length)
# Convert sequences and targets into PyTorch tensors
sequence_tensor = torch.tensor(sequences, dtype=torch.float32)
target_tensor = torch.tensor(targets, dtype=torch.float32)
# Create a TensorDataset and DataLoader
dataset = TensorDataset(sequence_tensor, target_tensor)
return DataLoader(dataset, batch_size=batch_size, shuffle=True)
# create the loaders for each partition
train_loaders, val_loaders = [], []
for partition_df in partitioned_dfs:
# Split the data of each client into training and validation sets
train_df, val_df = train_test_split(partition_df, test_size=0.2, random_state=42)
# Create loaders for the training and validation sets
train_loader = create_loaders(train_df, sequence_length=5, batch_size=16)
val_loader = create_loaders(val_df, sequence_length=5, batch_size=16)
train_loaders.append(train_loader)
val_loaders.append(val_loader)