So I have a workbook, opened using openxlsx, called 1111_copy_master.xlsx (fake data used) and I've written a code that will open this keeping all the colour formatting etc, and filter out a list of of specific rows based on a few things (each row represents a sample aliquot in a freezer location) such as sample thaws, if the sample has already been taken out, and sample volume. If a sample has been taken, it will have a code starting SE followed by any number in the 'Subproject column' if it hasn't been used, it will have a code starting RE in that column. My problem is that once ive pulled out the list of samples I want and written that in a new xlsx file(i.e. rows i want based on the filtering requirements), I can't seem to be able to update the row in the original workbook to have an SE code in the subproject column to now show it has been taken. Ideally it would make the whole row red too - but i can easily do this in excel if I can get the SE code to enter into the subproject column for the rows I've taken.
Can anyone fix this? It's under #6. in the code notes
#code for pulling sample list (dummy data)
install.packages("openxlsx")
library(openxlsx)
install.packages("dplyr")
library(dplyr)
#2. Read the Excel file - including all the formatting
wb <- loadWorkbook("1111_copy_master.xlsx")
sheet <- 1 # specify the sheet number
data <- read.xlsx("1111_copy_master.xlsx", sheet = sheet)
# Hypothetical color mapping based on row numbers
color_mapping <- c("red", "blue", "green", "yellow") # Example colors
# Create a column for colors based on row numbers
data$color <- color_mapping[((1:nrow(data)) %% length(color_mapping)) + 1]
saveWorkbook(wb, "testing_colour_formatting.xlsx", TRUE) #this file will confirm formatting is correct
#3. Now read second excel file - the list of samples requested in 2 columns (Sample_Original_ID and ItemType)
df2 <- read.xlsx("1111_samplelist.xlsx")
#4. Filtering. Filter df2 based on the master data's Sample_Original_ID and ItemType
df2_filtered <- df2 %>%
semi_join(data, by = c("Sample_Original_ID", "ItemType"))
# Filter rows based on matching Sample_Original_ID and ItemType
filtered_data <- data[data$Sample_Original_ID %in% df2_filtered$Sample_Original_ID & data$ItemType %in% df2_filtered$ItemType, ]
# Further filter based on additional conditions
filtered_data <- filtered_data[!grepl("^SE", filtered_data$Subproject) & filtered_data$Sample_Thaws == 0 & filtered_data$Sample_Volume > 400, ]
# Remove rows with already existing SE code in Subproject column
filtered_data <- filtered_data[!grepl("^SE", filtered_data$Subproject), ]
# Group by Sample_Original_ID and select row with lowest Sample_Part within each group
selected_rows <- filtered_data %>%
group_by(Sample_Original_ID, ItemType) %>%
slice(which.min(as.numeric(gsub("\\D", "", Sample_Part))))
# Update Subproject column for the selected rows
selected_rows$Subproject <- "SE275"
print(selected_rows)
# Custom function to convert date columns in selected_rows dataframe (gives correct date info in the SE275.xlsx)
convert_dates <- function(date_column) {
# Check if the date column contains "/"
if (any(grepl("/", date_column))) {
# Convert dates in "dd/mm/yyyy" format
return(as.Date(date_column, format = "%d/%m/%Y"))
} else {
# Convert Excel date serial format
return(as.Date(as.numeric(date_column), origin = "1899-12-30"))
}
}
# Apply the function to specified columns, correcting the date/string issue
selected_rows <- selected_rows %>%
mutate(across(c(Date_Of_Import, Extraction_Date, Date_Sent, Date_Received), convert_dates))
#5. Export selected rows to new Excel file - this is your location list
write.xlsx(selected_rows, "SE275_example.xlsx")
#6.Update the existing rows in the workbook and save the updated workbook
for (i in 1:nrow(selected_rows)) {
row_index <- which(data$Sample_Original_ID == selected_rows[i, "Sample_Original_ID"] & data$ItemType == selected_rows[i, "ItemType"])
if (length(row_index) > 0) {
data[row_index, "Subproject"] <- "SE275"
# Define date format (adjust as per your date format)
date_format <- "yyyy-mm-dd"
writeDataTable(wb, sheet = sheet, x = data[row_index, ], startRow = min(row_index), startCol = 1, colNames = TRUE, rowNames = FALSE, withFilter = FALSE, dateTimeFormatting = list("Date" = date_format))
}
}
#7. Save the updated workbook (updated master sheet)
saveWorkbook(wb, "1111_copy_master_updated.xlsx", overwrite = TRUE)
#still doesnt update old workbook
dput(head(data))
structure(list(Sample_ID = c("1111_193001-20180130 RE33 ", "1111_193001-20180130 RE33 1 of 15","1111_193001-20180130 RE33 2 of 15", "1111_193001-20180130 RE33 3 of 15","1111_193001-20180130 RE33 4 of 15", "1111_193001-20180130 RE33 5 of 15"), Sample_Project_Code = c("1111\_", "1111\_", "1111\_", "1111\_","1111\_", "1111\_"), Sample_Original_ID = c("193001-20180130","193001-20180130", "193001-20180130", "193001-20180130", "193001-20180130","193001-20180130"), Sample_Part = c(NA, "1 of 15", "2 of 15","3 of 15", "4 of 15", "5 of 15"), Original_Batch_Code = c("RE33","RE33", "RE33", "RE33", "RE33", "RE33"), Subproject = c("RE33","RE33", "RE33", "RE33", "RE33", "RE33"), Last_Action_Type = c(NA_character\_,NA_character\_, NA_character\_, NA_character\_, NA_character\_, NA_character\_), Date_Of_Import = c("43130", "43130", "43130", "43130", "43130","43130"), ItemType = c("Card ", "P ", "P ", "C", "S", "S"), Sample_Container_Type = c("DMPK-C Card","Sarstedt 2mL", "Sarstedt 2mL", "Sarstedt 2mL", "Sarstedt 2mL","Sarstedt 2mL"), Sample_Thaws = c("0", "0", "1", "0", "0", "0"), Subject_Age = c(NA_character\_, NA_character\_, NA_character\_,NA_character\_, NA_character\_, NA_character\_), Subject_Sex = c("M","M", "M", "M", "M", "M"), PPL_Resident_PI = c("Joe", "Joe", "Joe","Joe", "Joe", "Joe"), PPL_Researcher = c("Wendy", "Wendy", "Wendy","Wendy", "Wendy", "Wendy"), Extracted_By = c("Diana", "Diana","Diana", "Diana", "Diana", "Diana"), Identifier = c("ABC", "ABC","ABC", "ABC", "ABC", "ABC"), PPL_Sender = c("Wendy", "Wendy","Wendy", "Wendy", "Wendy", "Wendy"), Extraction_Date = c("43130","43130", "43130", "43130", "43130", "43130"), Sample_PLPI = c("340","65", "65", "65", "65", "65"), Date_Sent = c("43130", "43130","43130", "43130", "43130", "43130"), Date_Received = c("43130","43130", "43130", "43130", "43130", "43130"), Diagnosis = c(NA_character\_,NA_character\_, NA_character\_, NA_character\_, NA_character\_, NA_character\_), Info_Code = c("1/L/44", "1/L/44", "1/L/44", "1/L/44", "1/L/44","1/L/44"), Sample_Volume = c(NA, "500", "500", "500", "500","500"), Extraction_Method = c(NA_character\_, NA_character\_, NA_character\_,NA_character\_, NA_character\_, NA_character\_), Sample_Comments = c(NA_character\_,NA_character\_, NA_character\_, NA_character\_, NA_character\_, NA_character\_), Row = c("20", "34", "34", "35", "35", "35"), Col = c("A","K", "L", "A", "B", "C"), Level5Name = c("Book 18", "Shelf 14","Shelf 14", "Shelf 14", "Shelf 14", "Shelf 14"), Level4Name = c("Compartment B","Compartment E", "Compartment E", "Compartment E", "Compartment E","Compartment E"), Level3Name = c("Freezer 8", "Freezer 7", "Freezer 7","Freezer 7", "Freezer 7", "Freezer 7"), Level2Name = c("Ground Floor","Ground Floor", "Ground Floor", "Ground Floor", "Ground Floor","Ground Floor"), Building = c("King's Round Bungalow ", "King's Round Bungalow ","King's Round Bungalow ", "King's Round Bungalow ", "King's Round Bungalow ","King's Round Bungalow "), color = c("blue", "green", "yellow","red", "blue", "green")), row.names = c(NA, 6L), class = "data.frame")