I want to break a file without reading it all at once? Say I'm having 30000000 records. I want to have 30 files with 1000000 records each. Currently I'm using nrow and skip attribute in read.csv() but some ambiguous data is coming up.
fileConn1<-file("D:\\Vikas\\x1.csv", open = "a")
fileConn2<-file("D:\\Vikas\\x2.csv", open = "a")
fileConn3<-file("D:\\Vikas\\x3.csv", open = "a")
fileConn4<-file("D:\\Vikas\\x4.csv", open = "a")
df <- read.csv("abc.csv", skip = 200000,nrows = 100000)
counter <- 3
while(nrow(df)!=0){
colnames(df) <- c("X", "X_raw")
m <- 100000 * counter
temp1 <- df
temp1$X_raw <- gsub("\"","",temp1$X_raw)
temp1$X_raw <- as.character(temp1$X_raw)
for(i in 1:nrow(temp1)){
test_data <- strsplit(temp1$X_raw[i], ",")
if(length(test_data[[1]]) == 6){
writeLines(temp1$X_raw[i], fileConn1)
}else if(length(test_data[[1]]) == 12){
writeLines(temp1$X_raw[i], fileConn2)
}else if(length(test_data[[1]]) == 18){
writeLines(temp1$X_raw[i], fileConn3)
}else{
writeLines(temp1$X_raw[i], fileConn4)
}
}
df <- read.csv("abc.csv", skip = m,nrows = 100000)
counter <- counter + 1
}
close(fileConn1)
close(fileConn2)
close(fileConn3)
close(fileConn4)
Thanks in advance :)