Replacing elements of column names with associated strings

74 Views Asked by At

I have a data.table with automatically generated column names. The names take the form:

N.x.y.z

Where N is a character (literally N), and the remaining variables are integers.

I also have a .csv which associates iterations of x with meaningful strings. As in:

X Name
1 Model
3 Mileage

What I would like to do is regenerate the column names in the format:

N.Name.y.z

I've tried to work along the lines of extracting the column names first, like

thefile = fread('filepath')

xx <- colnames(thefile)

colindex <- read.csv('the other file path')
colindex[,1] <- paste0('N.', colindex[,1], '.') #Converting x to N.x.

I messed around with grepl, replace_at, splitting the string up by '.'

4

There are 4 best solutions below

1
M-- On BEST ANSWER
read.table(text = "X    Name
                   1    Model
                   3    Mileage", 
           header = T, stringsAsFactor = FALSE) -> colindex


df1 <- data.frame(`N.1.2.3` = c(1,2), `N.3.1.2` = c(6,5), 
                  `N.1.3.1` = c(3, 4), `N.3.2.2` = c(8, 7))

df1
#>   N.1.2.3 N.3.1.2 N.1.3.1 N.3.2.2
#> 1       1       6       3       8
#> 2       2       5       4       7

names_split <-  as.data.frame(strsplit(names(df1), "\\."))
names_split[2,] <-  colindex[match(names_split[2,], colindex$X), "Name"]
names(df1) <- apply(names_split, 2, paste, collapse = ".")

df1
#>   N.Model.2.3 N.Mileage.1.2 N.Model.3.1 N.Mileage.2.2
#> 1           1             6           3             8
#> 2           2             5           4             7

Created on 2024-03-18 with reprex v2.0.2

4
r2evans On

Another option (using @M--'s data) is to use stringr:

stringr::str_replace_all(
  names(df1),
  setNames(paste0("N.", colindex$Name), paste0("N\\.", colindex$X)))
# [1] "N.Model.2.3"   "N.Mileage.1.2" "N.Model.3.1"   "N.Mileage.2.2"

(and reassigning those back with names(df1) <-)

This approach is a bit specific to having what you want be exactly next to the literal N.. It can be adapted to other positions if need be, with slight mods to the pattern.

A base-R version could be:

gre <- gregexpr("(?<=N[.])([0-9]+)(?=[.])", names(df1), perl = TRUE)
regmatches(names(df1), gre) <- colindex$Name[match(unlist(regmatches(names(df1), gre)), colindex$X)]
df1
#   N.Model.2.3 N.Mileage.1.2 N.Model.3.1 N.Mileage.2.2
# 1           1             6           3             8
# 2           2             5           4             7
0
zx8754 On

Using data.table::setnames, split column names on ".", then paste it back together replacing the 2nd item from the lookup:

library(data.table)

lookup <- fread(text = "X    Name
                   1    Model
                   3    Mileage", 
           header = T, stringsAsFactor = FALSE)
# convert to named vector
lookup <- setNames(lookup$Name, lookup$X)

dt <- data.table(`N.1.2.3` = c(1,2), `N.3.1.2` = c(6,5), 
                 `N.1.3.1` = c(3, 4), `N.3.2.2` = c(8, 7))

setnames(dt, 
         new = sapply(strsplit(colnames(dt), ".", fixed = TRUE), function(i){
           paste(i[ 1 ], lookup[ i[ 2 ] ], i[ 3 ], i[ 4 ], sep = ".") } ))
dt

#      N.Model.2.3 N.Mileage.1.2 N.Model.3.1 N.Mileage.2.2
#           <num>         <num>       <num>         <num>
# 1:           1             6           3             8
# 2:           2             5           4             7
0
Onyambu On
library(tidyverse)
rename_with(df1, ~str_replace(.x, "\\d", ~deframe(colindex)[.x]))

  N.Model.2.3 N.Mileage.1.2 N.Model.3.1 N.Mileage.2.2
1           1             6           3             8
2           2             5           4             7

Other options:

rename_with(df1,~str_replace_all(.x,deframe(map(colindex, ~str_c('N.', .x)))))

  N.Model.2.3 N.Mileage.1.2 N.Model.3.1 N.Mileage.2.2
1           1             6           3             8
2           2             5           4             7

base R:

fn <- \(x,y)sub(sprintf("(?<=N.)%s", y[1]), y[2], x, perl = TRUE)
setNames(df1, Reduce(fn, asplit(colindex, 1), names(df1)))

  N.Model.2.3 N.Mileage.1.2 N.Model.3.1 N.Mileage.2.2
1           1             6           3             8
2           2             5           4             7

Data:

colindex <- read.table(text = "X    Name
                   1    Model
                   3    Mileage", 
           header = T, stringsAsFactor = FALSE) 


df1 <- data.frame(`N.1.2.3` = c(1,2), `N.3.1.2` = c(6,5), 
                  `N.1.3.1` = c(3, 4), `N.3.2.2` = c(8, 7))